# musoW Twitter Pipeline v.3

## Imports

In [1]:
path = '../'
import pandas as pd
#import custom functions
from musow_pipeline.logreg_prediction import PredictPipeline
from musow_pipeline.twitter_pipeline import TwitterPipeline
from musow_pipeline.text_prediction import TextPrediction
#import token 
from musow_pipeline import conf

## Variables

In [2]:
# descriptions training set 
archive_desc_training = pd.read_pickle(path+'LOGREG_RELEVANCE/TRAINING_SETS/archive_desc_training_v4.pkl')

# twitter training set 
twitter_training = pd.read_pickle(path+'LOGREG_RELEVANCE/TRAINING_SETS/twitter_training_v2_alt.pkl')

## Training twitter and descriptions classifiers

This is a ONE TIME operation. The models are pickled and loaded later to predict new results from LOGREG_RELEVANCE/MODELS folder. 

In [None]:
# one time training on twitter
twitter_training_model = PredictPipeline.train(twitter_training, 'tweet', 'Target', 10, 1000, 'twitter_pipeline_june_2022', path)

# one time training on resources
resource_training_model = PredictPipeline.train(archive_desc_training, 'Description', 'Target', 10, 1000, 'resources_pipeline_june_2022',path)

## Query Twitter

Calls Twitter API with a list of keywords and return results as raw csv and clean pickle in TWITTER_SEARCHES/RAW_SEARCHES folder.

In [3]:
#load token
token = conf.TOKEN

#Choose keywords  
keywords = ['music archive', 'music collection', 'audio file', 'music library', 'sheet music', 'sound archive', 'sound recording']

#search timeframe (if using custom search)
start = ['2022-06-03T00:00:00.000Z']
end = ['2022-06-03T23:59:59.000Z']

#choose search option 
## search last week
#tweets = TwitterPipeline.search_weekly(token, keywords, 50, 50)
## search custom timeframe
tweets = TwitterPipeline.search_custom(token, keywords, start, end, 500, 500)

-------------------
Token:  None
Endpoint Response Code: 200
-------------------
Start Date:  2022-06-03T00:00:00.000Z
# of Tweets added from this response:  5
Total # of Tweets added for '"music archive" -is:retweet': 5
-------------------
Total number of results: 5
-------------------
Token:  None
Endpoint Response Code: 200
-------------------
Start Date:  2022-06-03T00:00:00.000Z
# of Tweets added from this response:  208
Total # of Tweets added for '"music collection" -is:retweet': 208
-------------------
Total number of results: 208
-------------------
Token:  None
Endpoint Response Code: 200
-------------------
Start Date:  2022-06-03T00:00:00.000Z
# of Tweets added from this response:  65
Total # of Tweets added for '"audio file" -is:retweet': 65
-------------------
Total number of results: 65
-------------------
Token:  None
Endpoint Response Code: 200
-------------------
Start Date:  2022-06-03T00:00:00.000Z
# of Tweets added from this response:  121
Total # of Tweets added f

## Classify tweets

In [4]:
#load all search results into a single dataframe 
tweets_to_classify = TwitterPipeline.classify_tweets(path+'TWITTER_SEARCHES/RAW_SEARCHES/', f'{tweets[0][-16:]}.pkl')
tweets_to_classify 

Total tweets to classify: 165


Unnamed: 0,user,tweet id,created_at,lang,like_count,quote_count,reply_count,retweet_count,tweet,URL,Search KW
0,JuneIND,1532872470042140672,2022-06-03 23:50:50+00:00,en,0,0,0,0,Name your price and add First Annual Meeting o...,https://juneind.bandcamp.com/album/first-annua...,"""music library"" -is:retweet"
1,LibrarySheet,1532854561400147969,2022-06-03 22:39:41+00:00,en,1,0,0,0,Red River Valley (Country Music) . The Sheet ...,https://sheetmusiclibrary.website/2022/05/24/r...,"""music library"" -is:retweet"
2,TBDigitalShop,1532799373331120131,2022-06-03 19:00:23+00:00,en,0,0,0,0,Digital Compilations are a great way to increa...,https://tlbx.digital/CompilationAlbums,"""music library"" -is:retweet"
3,swalkinganimals,1532789927062159364,2022-06-03 18:22:51+00:00,en,13,1,0,6,Freedom Song - out 10th June Our first single...,https://lnk.to/FreedomSong_10-06-22,"""music library"" -is:retweet"
4,LibrarySheet,1532778733039497219,2022-06-03 17:38:22+00:00,en,3,0,0,1,"Rachmaninoff Symphony No.2,Op.27 (Arranged For...",https://sheetmusiclibrary.website/2022/04/17/s...,"""music library"" -is:retweet"
...,...,...,...,...,...,...,...,...,...,...,...
160,tannerlbraden,1532533155768328192,2022-06-03 01:22:32+00:00,en,1,0,1,0,for digital sheet music library https://t.co/...,"https://forscore.co/, https://insighttimer.com...","""sheet music"" -is:retweet"
161,BitchinKitsch,1532528717183983626,2022-06-03 01:04:53+00:00,en,0,0,0,0,An Evening With John Denver Song Book Songbook...,https://etsy.me/3GMdIAx,"""sheet music"" -is:retweet"
162,GavinLibotte,1532517803940405248,2022-06-03 00:21:31+00:00,en,2,0,0,0,Na new arrangement - the Muppet Movie Rainbow ...,"https://www.musicnotes.com/l/gD9MJ, https://yo...","""sheet music"" -is:retweet"
163,WeddingsBest,1532516213301051429,2022-06-03 00:15:12+00:00,en,0,0,0,0,Checking out the latest Fashion Sales - Women ...,"http://tinyurl.com/c7mvrhn6, http://tinyurl.co...","""sheet music"" -is:retweet"


In [5]:
#run classification and get links from results
predicted_tweets, twitter_link_list = TwitterPipeline.predict_twitter(path, 'twitter_pipeline_june_2022', tweets_to_classify, 'tweet', 1)
predicted_tweets

Total tweets predicted: 72


Unnamed: 0,tweet,Prediction,Score,Probability,Input Length,URL,Search KW,tweet date,user,tweet id
0,"Thanks, Janet! Great review. I found the same ...",1,7.139409,0.999207,268,https://afb.org/aw/23/5/17937,"""sheet music"" -is:retweet",2022-06-03,laurel_jean,1532687984222429186
1,Pieces by Anne Crosby Gaudet + Sheet Music (Me...,1,6.809992,0.998899,99,https://www.themichiganharpist.com/blog/2022/6...,"""sheet music"" -is:retweet",2022-06-03,MichiganHarpist,1532745300225425408
2,piano beginner sheet music free https://t.co/X...,1,6.628887,0.998680,55,http://dlvr.it/SRYk5m,"""sheet music"" -is:retweet",2022-06-03,HifiLebensart,1532721526675365889
3,Sheet Music - Largo from Concerto in D Major h...,1,6.418846,0.998372,92,https://bit.ly/38r63Ln,"""sheet music"" -is:retweet",2022-06-03,pianolessonsmn,1532729579051397121
4,New app deals![iOS] [Visual Piano] [Visual Pia...,1,5.900533,0.997269,255,https://ift.tt/L9ndHiT,"""sheet music"" -is:retweet",2022-06-03,TechlogicDs,1532673750499475456
...,...,...,...,...,...,...,...,...,...,...
67,"Music from Encanto, WandaVision, Soul, and mor...",1,1.111508,0.752410,163,https://okt.to/cINzd8,"""sheet music"" -is:retweet",2022-06-03,jwpepper,1532756260914864130
68,Fresh new swing charts for collegiate and adva...,1,0.756510,0.680596,108,https://okt.to/8a0TIH,"""sheet music"" -is:retweet",2022-06-03,jwpepper,1532694846305181699
69,This hamster is the perfect toy for a kid lear...,1,0.553520,0.634952,304,https://kidschoice.online/talking-hamster-plus...,"""sound recording"" -is:retweet",2022-06-03,kidschoicestore,1532586274283978752
70,We're excited to announce the Kohi Creator too...,1,0.413366,0.601895,298,https://creator.kohi.art/moal,"""music collection"" -is:retweet",2022-06-03,kohiart,1532775308415356928


## Scrape URLS

In [6]:
#scrape URL list and return a DF for resource classification
scraped_links = TextPrediction.scrape_links(twitter_link_list, predicted_tweets, f'{tweets[0][-16:]}_scrapes')
scraped_links

Your max_length is set to 120, but you input_length is only 114. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=57)


1 https://afb.org/aw/23/5/17937
2 https://www.themichiganharpist.com/blog/2022/6/3/pieces-by-anne-crosby-gaudet-sheet-music-medley-the-michigan-harpist


Your max_length is set to 120, but you input_length is only 71. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=35)


3 https://bit.ly/38r63Ln
4 https://ift.tt/L9ndHiT
5 https://go.unc.edu/Sheet
6 https://t.uga.edu/87V


Token indices sequence length is longer than the specified maximum sequence length for this model (1419 > 1024). Running this sequence through the model will result in indexing errors


7 http://hermitage-crabapple.amebaownd.com/pages/811095/page_201701220951
8 https://bit.ly/3j6luKN
9 http://michaelreichenbach.musicaneo.com/
10 https://www.lulu.com/shop/vlada-veselinovi%C4%87/school-of-balkan-folk-music-for-accordion/paperback/product-24354783.html


Your max_length is set to 120, but you input_length is only 93. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=46)


11 https://need-mart.com/2022/06/03/blank-sheet-music-for-piano-large-staves-perfect-for-younger-learners-8-5-x-11-inches-100-pages/
12 https://buff.ly/3hMH73f
13 https://store.piascore.com/scores/150248
14 https://warpedboard.itch.io/the-devil-went-down-to-giphy
15 https://relate13.com/best-apps-for-apple-carplay/
16 http://Tomplay.com
17 https://www.livingetc.com/reviews/monoprice-monolith-600046-turntable-review
18 https://www.thedailymash.co.uk/news/lifestyle/man-whos-given-up-weed-needs-entirely-new-music-collection-20200717198603?utm_medium=Social&utm_source=Twitter#Echobox=1654098231
19 https://hermitage-crabapple.amebaownd.com/posts/categories/6017921
20 https://www.sd.net/blogs/archive/6232022-sd-game-fish-and-parks/
21 https://classicalguitarshed.com/link/9i
22 https://bit.ly/3t2RPXV
23 https://www.giftyourcoupon.online/davy-jones-theme-sheet-music-organ-64-software-patch-utorrent-full-version-activator/


Your max_length is set to 120, but you input_length is only 86. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=43)


24 https://tinyurl.com/y3y4d85n
25 https://maoridictionary.co.nz/word/3596
26 http://tinyurl.com/yy688jeg
27 https://livemusicarchive.app/music/artists/PhilLeshandFriends/recordings/phil2007-06-03.bk4022.acm671.burke.flac16
28 https://en.wikipedia.org/w/index.php?title=Steely_Dan_discography&action=edit&section=2
29 https://www.pioslabs.com/chordinates/index.html?&h=harmony-majorsixth&s=112
30 http://hcacatm.theshop.jp/categories/849125
31 http://hermitage-crabapple.stores.jp/?category_id=5564fd0aef33775cc4002cc3
32 https://www.history.com/news/josephine-baker-world-war-ii-spy
33 https://www.pioslabs.com/chordinates/index.html?f=triangle&m=chromatic&x=x-scale-chromatic-positive&y=y-scale-chromatic-origin&t=timbre-guitar-acoustic&h=harmony-chromaticmajor&s=151&params=-1,16,8,12
34 https://www.mlb.com/news/the-original-blue-jays-theme-song
35 https://hermitage-crabapple.amebaownd.com/pages/5181126/page_202108140852
36 https://ift.tt/E0OaKmC


Your max_length is set to 120, but you input_length is only 28. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)


37 https://bit.ly/3x2eKVN
38 https://www.steinwaykc.com/spirio#5c3e32dd-4e00-491c-90c1-7d6efeec3a5a
39 https://eshap.substack.com/p/streaming-wars-ii
40 https://bit.ly/36UVDQ9
41 https://bit.ly/3KV4oej
42 https://drive.google.com/file/d/1Ejq95WVIZj3wpZyFY472_sADVTFr6LM_/view?usp=drivesdk
43 http://bit.ly/3mhKIXO
44 https://www.texasstandard.org/stories/uvalde-shooting-victims-tribute/
45 https://blog.musiio.com/2022/06/03/melodie-ai-search-human-tags/
46 https://okt.to/cINzd8
47 https://okt.to/8a0TIH
48 https://kidschoice.online/talking-hamster-plush-toy/
49 https://creator.kohi.art/moal
50 https://kohi.art/creator
Total links scraped: 32


Unnamed: 0,tweet,Prediction,Score,Probability,Input Length,URL,Search KW,tweet date,user,tweet id,Title,Description
0,"Thanks, Janet! Great review. I found the same ...",1,7.139409,0.999207,268,https://afb.org/aw/23/5/17937,"""sheet music"" -is:retweet",2022-06-03,laurel_jean,1532687984222429186,A Review of Two Sheet Music Apps: SM Music Rea...,SM Music Reader allows a user to read the mus...
1,Pieces by Anne Crosby Gaudet + Sheet Music (Me...,1,6.809992,0.998899,99,https://www.themichiganharpist.com/blog/2022/6...,"""sheet music"" -is:retweet",2022-06-03,MichiganHarpist,1532745300225425408,"""The Michigan Harpist"" Chanah Ambuter - Pieces...",Pieces by Anne Crosby Gaudet + Sheet Music (Me...
2,Sheet Music - Largo from Concerto in D Major h...,1,6.418846,0.998372,92,https://bit.ly/38r63Ln,"""sheet music"" -is:retweet",2022-06-03,pianolessonsmn,1532729579051397121,Laura's Music Studio: Sheet Music - Largo from...,Sheet Music - Largo from Concerto in D Major ...
3,The Nineteenth Century American Sheet Music Co...,1,5.769634,0.996889,218,https://go.unc.edu/Sheet,"""sheet music"" -is:retweet",2022-06-03,UNCLibrary,1532739257084829699,Playmakers Repertory Company Playbills,Please enter the term(s) you wish to search for:
4,Calling Music Lovers! Consider loaning items...,1,5.732888,0.996773,211,https://t.uga.edu/87V,"""music collection"" -is:retweet",2022-06-03,hargrettlibrary,1532723793138950145,UGA Libraries Calls for Community Curators | ...,Georgia on My Mind: Finding Belonging in Musi...
5,A visionary production music library that comb...,1,4.7422,0.991356,282,https://bit.ly/3j6luKN,"""music library"" -is:retweet",2022-06-03,SoundFellasLabs,1532657596976484360,Celestial Beats - Futuristic and Energetic | P...,A visionary production music library that com...
6,"♫♪♫ ♡ Sheet music for mandolin, guitar, zithe...",1,4.62718,0.990312,200,http://michaelreichenbach.musicaneo.com/,"""sheet music"" -is:retweet",2022-06-03,mandoisland,1532743044591767554,Michael Reichenbach,Michael Reichenbach has published several boo...
7,"BOOK ""School of Balkan Folk Music for Accordio...",1,4.514466,0.989169,274,https://www.lulu.com/shop/vlada-veselinovi%C4%...,"""sheet music"" -is:retweet",2022-06-03,VeselinovicV,1532637330292498433,School of Balkan Folk Music for Accordion,School of Balkan Folk Music for Accordion inc...
8,"Blank Sheet Music for Piano: large staves, per...",1,4.494461,0.988953,123,https://need-mart.com/2022/06/03/blank-sheet-m...,"""sheet music"" -is:retweet",2022-06-03,itsrekib,1532761604529479682,"Blank Sheet Music For Piano: Large Staves, Per...",Publisher : Independently published (October ...
9,Cotton Quilt Fabric Tea Time Sheet Music Europ...,1,4.382936,0.987665,111,https://buff.ly/3hMH73f,"""sheet music"" -is:retweet",2022-06-03,acquiltfabric,1532781168789229573,Cotton Quilt Fabric Tea Time Sheet Music Europ...,Cotton Quilt Fabric Tea Time Sheet Music Euro...


## Classify web resources

In [7]:
predicted_resources = TextPrediction.resource_predictions(path, 'resources_pipeline_june_2022', scraped_links, 'Description', 1, f'{tweets[0][-16:]}')
predicted_resources

Unnamed: 0,tweet,Prediction,Score,Probability,Input Length,URL,Search KW,tweet date,user,tweet id,Title,Description,Match
0,"BOOK ""School of Balkan Folk Music for Accordio...",1,4.461233,0.988584,289,https://www.lulu.com/shop/vlada-veselinovi%C4%...,"""sheet music"" -is:retweet",2022-06-03,VeselinovicV,1532637330292498433,School of Balkan Folk Music for Accordion,School of Balkan Folk Music for Accordion inc...,
1,Pieces by Anne Crosby Gaudet + Sheet Music (Me...,1,3.702412,0.97593,113,https://www.themichiganharpist.com/blog/2022/6...,"""sheet music"" -is:retweet",2022-06-03,MichiganHarpist,1532745300225425408,"""The Michigan Harpist"" Chanah Ambuter - Pieces...",Pieces by Anne Crosby Gaudet + Sheet Music (Me...,
2,Find fun and fabulous shopping links to fashio...,1,3.33606,0.965645,217,https://tinyurl.com/y3y4d85n,"""sheet music"" -is:retweet",2022-06-03,ShopLinksOnline,1532790813742211074,SHOP | Senior Care 911,Featured below are fun and fabulous shopping ...,
3,Cotton Quilt Fabric Tea Time Sheet Music Europ...,1,2.943789,0.949969,278,https://buff.ly/3hMH73f,"""sheet music"" -is:retweet",2022-06-03,acquiltfabric,1532781168789229573,Cotton Quilt Fabric Tea Time Sheet Music Europ...,Cotton Quilt Fabric Tea Time Sheet Music Euro...,
4,Sheet Music - Largo from Concerto in D Major h...,1,2.528452,0.926112,132,https://bit.ly/38r63Ln,"""sheet music"" -is:retweet",2022-06-03,pianolessonsmn,1532729579051397121,Laura's Music Studio: Sheet Music - Largo from...,Sheet Music - Largo from Concerto in D Major ...,
5,"Thanks, Janet! Great review. I found the same ...",1,1.690844,0.844335,1352,https://afb.org/aw/23/5/17937,"""sheet music"" -is:retweet",2022-06-03,laurel_jean,1532687984222429186,A Review of Two Sheet Music Apps: SM Music Rea...,SM Music Reader allows a user to read the mus...,
6,"♫♪♫ ♡ Sheet music for mandolin, guitar, zithe...",1,1.224655,0.772882,286,http://michaelreichenbach.musicaneo.com/,"""sheet music"" -is:retweet",2022-06-03,mandoisland,1532743044591767554,Michael Reichenbach,Michael Reichenbach has published several boo...,training set match
7,Independent music library serves Fox Sports a...,1,1.11442,0.752952,526,https://blog.musiio.com/2022/06/03/melodie-ai-...,"""music library"" -is:retweet",2022-06-03,Musiio,1532648357155737602,How Melodie’s AI-meets-human approach powers s...,Melodie is a Sydney-based production music co...,
8,A visionary production music library that comb...,1,0.86651,0.704019,298,https://bit.ly/3j6luKN,"""music library"" -is:retweet",2022-06-03,SoundFellasLabs,1532657596976484360,Celestial Beats - Futuristic and Energetic | P...,A visionary production music library that com...,
9,The SPIRIO music library is updated monthly an...,1,0.376284,0.592977,465,https://www.steinwaykc.com/spirio#5c3e32dd-4e0...,"""music library"" -is:retweet",2022-06-03,SteinwayKC,1532754079578988547,Spirio Player Piano | Steinway & Sons - Steinw...,The Steinway & Sons Spirio is the world’s fin...,
