# WeRateDogs Twitter Feed

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import requests
import tweepy
import twitter_keys as tk
import json

## Collect the data

- The file twitter-archive-enhanced.csv is already provided and stored in the data directory.  We load it in a pandas DataFrame.

In [2]:
tweet_df = pd.read_csv("data/twitter-archive-enhanced.csv")
tweet_df.head()

Unnamed: 0,tweet_id,in_reply_to_status_id,in_reply_to_user_id,timestamp,source,text,retweeted_status_id,retweeted_status_user_id,retweeted_status_timestamp,expanded_urls,rating_numerator,rating_denominator,name,doggo,floofer,pupper,puppo
0,892420643555336193,,,2017-08-01 16:23:56 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Phineas. He's a mystical boy. Only eve...,,,,https://twitter.com/dog_rates/status/892420643...,13,10,Phineas,,,,
1,892177421306343426,,,2017-08-01 00:17:27 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Tilly. She's just checking pup on you....,,,,https://twitter.com/dog_rates/status/892177421...,13,10,Tilly,,,,
2,891815181378084864,,,2017-07-31 00:18:03 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Archie. He is a rare Norwegian Pouncin...,,,,https://twitter.com/dog_rates/status/891815181...,12,10,Archie,,,,
3,891689557279858688,,,2017-07-30 15:58:51 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Darla. She commenced a snooze mid meal...,,,,https://twitter.com/dog_rates/status/891689557...,13,10,Darla,,,,
4,891327558926688256,,,2017-07-29 16:00:24 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Franklin. He would like you to stop ca...,,,,https://twitter.com/dog_rates/status/891327558...,12,10,Franklin,,,,


- Next, we download the file with image predictions and also load it in a DataFrame.

In [3]:
url = "https://d17h27t6h515a5.cloudfront.net/topher/2017/August/599fd2ad_image-predictions/image-predictions.tsv"
r = requests.get(url)

In [4]:
folder_path = "./data"
if not os.path.exists(folder_path):
    os.makedirs(folder_path)
with open(os.path.join(folder_path, r.url.split("/")[-1]), mode="wb") as file:
    file.write(r.content)

In [5]:
image_df = pd.read_csv("data/image-predictions.tsv", sep="\t")
image_df.head()

Unnamed: 0,tweet_id,jpg_url,img_num,p1,p1_conf,p1_dog,p2,p2_conf,p2_dog,p3,p3_conf,p3_dog
0,666020888022790149,https://pbs.twimg.com/media/CT4udn0WwAA0aMy.jpg,1,Welsh_springer_spaniel,0.465074,True,collie,0.156665,True,Shetland_sheepdog,0.061428,True
1,666029285002620928,https://pbs.twimg.com/media/CT42GRgUYAA5iDo.jpg,1,redbone,0.506826,True,miniature_pinscher,0.074192,True,Rhodesian_ridgeback,0.07201,True
2,666033412701032449,https://pbs.twimg.com/media/CT4521TWwAEvMyu.jpg,1,German_shepherd,0.596461,True,malinois,0.138584,True,bloodhound,0.116197,True
3,666044226329800704,https://pbs.twimg.com/media/CT5Dr8HUEAA-lEu.jpg,1,Rhodesian_ridgeback,0.408143,True,redbone,0.360687,True,miniature_pinscher,0.222752,True
4,666049248165822465,https://pbs.twimg.com/media/CT5IQmsXIAAKY4A.jpg,1,miniature_pinscher,0.560311,True,Rottweiler,0.243682,True,Doberman,0.154629,True


- Now, we download the complete information about the tweets.

In [6]:
auth = tweepy.OAuthHandler(tk.consumer_key, tk.consumer_secret)
auth.set_access_token(tk.access_token, tk.access_secret)

api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, retry_count=2, retry_delay=2)

In [8]:
fails = {}
with open('./data/tweet_json.txt', 'w') as outfile:
    for id in tweet_df.tweet_id:
        try:
            tweet = api.get_status(id, tweet_mode='extended')
            json.dump(tweet._json, outfile)
            outfile.write('\n')
            print(id, ': SUCCESS')
        except tweepy.TweepError as error:
            print(id, ': FAIL')
            fails[id] = error
    # Try again for failed attempts, this will retrieve failures due to connection issues.
    fails_new = {}
    for id in fails.keys():
        try:
            tweet = api.get_status(str(id), tweet_mode='extended')
            json.dump(tweet._json, outfile)
            outfile.write('\n')
            print(id, ': SUCCESS')
        except tweepy.TweepError as error:
            print(id, ': FAIL')
            fails_new[id] = error
    fails = fails_new
print('FAILS:')
print(fails)

892420643555336193 : SUCCESS
892177421306343426 : SUCCESS
891815181378084864 : SUCCESS
891689557279858688 : SUCCESS
891327558926688256 : SUCCESS
891087950875897856 : SUCCESS
890971913173991426 : SUCCESS
890729181411237888 : SUCCESS
890609185150312448 : SUCCESS
890240255349198849 : FAIL
890006608113172480 : SUCCESS
889880896479866881 : SUCCESS
889665388333682689 : SUCCESS
889638837579907072 : SUCCESS
889531135344209921 : SUCCESS
889278841981685760 : SUCCESS
888917238123831296 : SUCCESS
888804989199671297 : SUCCESS
888554962724278272 : SUCCESS
888202515573088257 : FAIL
888078434458587136 : SUCCESS
887705289381826560 : SUCCESS
887517139158093824 : SUCCESS
887473957103951883 : SUCCESS
887343217045368832 : SUCCESS
887101392804085760 : SUCCESS
886983233522544640 : SUCCESS
886736880519319552 : SUCCESS
886680336477933568 : SUCCESS
886366144734445568 : SUCCESS
886267009285017600 : SUCCESS
886258384151887873 : SUCCESS
886054160059072513 : SUCCESS
885984800019947520 : SUCCESS
885528943205470208 :

838921590096166913 : SUCCESS
838916489579200512 : SUCCESS
838831947270979586 : SUCCESS
838561493054533637 : SUCCESS
838476387338051585 : SUCCESS
838201503651401729 : SUCCESS
838150277551247360 : SUCCESS
838085839343206401 : SUCCESS
838083903487373313 : SUCCESS
837820167694528512 : SUCCESS
837482249356513284 : SUCCESS
837471256429613056 : SUCCESS
837366284874571778 : FAIL
837110210464448512 : SUCCESS
837012587749474308 : FAIL
836989968035819520 : SUCCESS
836753516572119041 : SUCCESS
836677758902222849 : SUCCESS
836648853927522308 : SUCCESS
836397794269200385 : SUCCESS
836380477523124226 : SUCCESS
836260088725786625 : SUCCESS
836001077879255040 : SUCCESS
835685285446955009 : SUCCESS
835574547218894849 : SUCCESS
835536468978302976 : SUCCESS
835309094223372289 : SUCCESS
835297930240217089 : SUCCESS
835264098648616962 : SUCCESS
835246439529840640 : SUCCESS
835172783151792128 : SUCCESS
835152434251116546 : SUCCESS
834931633769889797 : SUCCESS
834786237630337024 : SUCCESS
834574053763584002 :

802185808107208704 : SUCCESS
801958328846974976 : SUCCESS
801854953262350336 : SUCCESS
801538201127157760 : SUCCESS
801285448605831168 : SUCCESS
801167903437357056 : SUCCESS
801127390143516673 : SUCCESS
801115127852503040 : SUCCESS
800859414831898624 : SUCCESS
800855607700029440 : SUCCESS
800751577355128832 : SUCCESS
800513324630806528 : SUCCESS
800459316964663297 : SUCCESS
800443802682937345 : SUCCESS
800388270626521089 : SUCCESS
800188575492947969 : SUCCESS
800141422401830912 : SUCCESS
800018252395122689 : SUCCESS
799774291445383169 : SUCCESS
799757965289017345 : SUCCESS
799422933579902976 : SUCCESS
799308762079035393 : SUCCESS
799297110730567681 : SUCCESS
799063482566066176 : SUCCESS
798933969379225600 : SUCCESS
798925684722855936 : SUCCESS
798705661114773508 : SUCCESS
798701998996647937 : SUCCESS
798697898615730177 : SUCCESS
798694562394996736 : SUCCESS
798686750113755136 : SUCCESS
798682547630837760 : SUCCESS
798673117451325440 : SUCCESS
798665375516884993 : SUCCESS
79864404277075

765395769549590528 : SUCCESS
765371061932261376 : SUCCESS
765222098633691136 : SUCCESS
764857477905154048 : SUCCESS
764259802650378240 : SUCCESS
763956972077010945 : SUCCESS
763837565564780549 : SUCCESS
763183847194451968 : SUCCESS
763167063695355904 : SUCCESS
763103485927849985 : SUCCESS
762699858130116608 : SUCCESS
762471784394268675 : SUCCESS
762464539388485633 : SUCCESS


Rate limit reached. Sleeping for: 492


762316489655476224 : SUCCESS
762035686371364864 : SUCCESS
761976711479193600 : SUCCESS
761750502866649088 : SUCCESS
761745352076779520 : SUCCESS
761672994376806400 : SUCCESS
761599872357261312 : SUCCESS
761371037149827077 : SUCCESS
761334018830917632 : SUCCESS
761292947749015552 : SUCCESS
761227390836215808 : SUCCESS
761004547850530816 : SUCCESS
760893934457552897 : SUCCESS
760656994973933572 : SUCCESS
760641137271070720 : SUCCESS
760539183865880579 : SUCCESS
760521673607086080 : SUCCESS
760290219849637889 : SUCCESS
760252756032651264 : SUCCESS
760190180481531904 : SUCCESS
760153949710192640 : SUCCESS
759943073749200896 : SUCCESS
759923798737051648 : SUCCESS
759846353224826880 : SUCCESS
759793422261743616 : SUCCESS
759566828574212096 : FAIL
759557299618865152 : SUCCESS
759447681597108224 : SUCCESS
759446261539934208 : SUCCESS
759197388317847553 : SUCCESS
759159934323924993 : SUCCESS
759099523532779520 : SUCCESS
759047813560868866 : SUCCESS
758854675097526272 : SUCCESS
75882865992270233

726828223124897792 : SUCCESS
726224900189511680 : SUCCESS
725842289046749185 : SUCCESS
725786712245440512 : SUCCESS
725729321944506368 : SUCCESS
725458796924002305 : SUCCESS
724983749226668032 : SUCCESS
724771698126512129 : SUCCESS
724405726123311104 : SUCCESS
724049859469295616 : SUCCESS
724046343203856385 : SUCCESS
724004602748780546 : SUCCESS
723912936180330496 : SUCCESS
723688335806480385 : SUCCESS
723673163800948736 : SUCCESS
723179728551723008 : SUCCESS
722974582966214656 : SUCCESS
722613351520608256 : SUCCESS
721503162398597120 : SUCCESS
721001180231503872 : SUCCESS
720785406564900865 : SUCCESS
720775346191278080 : SUCCESS
720415127506415616 : SUCCESS
720389942216527872 : SUCCESS
720340705894408192 : SUCCESS
720059472081784833 : SUCCESS
720043174954147842 : SUCCESS
719991154352222208 : SUCCESS
719704490224398336 : SUCCESS
719551379208073216 : SUCCESS
719367763014393856 : SUCCESS
719339463458033665 : SUCCESS
719332531645071360 : SUCCESS
718971898235854848 : SUCCESS
71893924195119

697482927769255936 : SUCCESS
697463031882764288 : SUCCESS
697270446429966336 : SUCCESS
697259378236399616 : SUCCESS
697255105972801536 : SUCCESS
697242256848379904 : SUCCESS
696900204696625153 : SUCCESS
696894894812565505 : SUCCESS
696886256886657024 : SUCCESS
696877980375769088 : SUCCESS
696754882863349760 : SUCCESS
696744641916489729 : SUCCESS
696713835009417216 : SUCCESS
696518437233913856 : SUCCESS
696490539101908992 : SUCCESS
696488710901260288 : SUCCESS
696405997980676096 : SUCCESS
696100768806522880 : SUCCESS
695816827381944320 : SUCCESS
695794761660297217 : SUCCESS
695767669421768709 : SUCCESS
695629776980148225 : SUCCESS
695446424020918272 : SUCCESS
695409464418041856 : SUCCESS
695314793360662529 : SUCCESS
695095422348574720 : SUCCESS
695074328191332352 : SUCCESS
695064344191721472 : SUCCESS
695051054296211456 : SUCCESS
694925794720792577 : SUCCESS
694905863685980160 : SUCCESS
694669722378485760 : SUCCESS
694356675654983680 : SUCCESS
694352839993344000 : SUCCESS
69434202872600

680221482581123072 : SUCCESS
680206703334408192 : SUCCESS
680191257256136705 : SUCCESS
680176173301628928 : SUCCESS
680161097740095489 : SUCCESS
680145970311643136 : SUCCESS
680130881361686529 : SUCCESS
680115823365742593 : SUCCESS
680100725817409536 : SUCCESS
680085611152338944 : SUCCESS
680070545539371008 : SUCCESS
680055455951884288 : FAIL
679877062409191424 : SUCCESS
679872969355714560 : SUCCESS
679862121895714818 : SUCCESS
679854723806179328 : SUCCESS
679844490799091713 : SUCCESS
679828447187857408 : SUCCESS
679777920601223168 : SUCCESS
679736210798047232 : SUCCESS
679729593985699840 : SUCCESS
679722016581222400 : SUCCESS
679530280114372609 : SUCCESS
679527802031484928 : SUCCESS
679511351870550016 : SUCCESS
679503373272485890 : SUCCESS
679475951516934144 : SUCCESS
679462823135686656 : SUCCESS
679405845277462528 : SUCCESS
679158373988876288 : SUCCESS
679148763231985668 : SUCCESS
679132435750195208 : SUCCESS
679111216690831360 : SUCCESS
679062614270468097 : SUCCESS
67904748518943948

Rate limit reached. Sleeping for: 604


678410210315247616 : SUCCESS
678399652199309312 : SUCCESS
678396796259975168 : SUCCESS
678389028614488064 : SUCCESS
678380236862578688 : SUCCESS
678341075375947776 : SUCCESS
678334497360859136 : SUCCESS
678278586130948096 : SUCCESS
678255464182861824 : SUCCESS
678023323247357953 : SUCCESS
678021115718029313 : SUCCESS
677961670166224897 : SUCCESS
677918531514703872 : SUCCESS
677895101218201600 : SUCCESS
677716515794329600 : SUCCESS
677700003327029250 : SUCCESS
677698403548192770 : SUCCESS
677687604918272002 : SUCCESS
677673981332312066 : SUCCESS
677662372920729601 : SUCCESS
677644091929329666 : SUCCESS
677573743309385728 : SUCCESS
677565715327688705 : SUCCESS
677557565589463040 : SUCCESS
677547928504967168 : SUCCESS
677530072887205888 : SUCCESS
677335745548390400 : SUCCESS
677334615166730240 : SUCCESS
677331501395156992 : SUCCESS
677328882937298944 : SUCCESS
677314812125323265 : SUCCESS
677301033169788928 : SUCCESS
677269281705472000 : SUCCESS
677228873407442944 : SUCCESS
67718730018761

671511350426865664 : SUCCESS
671504605491109889 : SUCCESS
671497587707535361 : SUCCESS
671488513339211776 : SUCCESS
671486386088865792 : SUCCESS
671485057807351808 : SUCCESS
671390180817915904 : SUCCESS
671362598324076544 : SUCCESS
671357843010908160 : SUCCESS
671355857343524864 : SUCCESS
671347597085433856 : SUCCESS
671186162933985280 : SUCCESS
671182547775299584 : SUCCESS
671166507850801152 : SUCCESS
671163268581498880 : SUCCESS
671159727754231808 : SUCCESS
671154572044468225 : SUCCESS
671151324042559489 : SUCCESS
671147085991960577 : SUCCESS
671141549288370177 : SUCCESS
671138694582165504 : SUCCESS
671134062904504320 : SUCCESS
671122204919246848 : SUCCESS
671115716440031232 : SUCCESS
671109016219725825 : SUCCESS
670995969505435648 : SUCCESS
670842764863651840 : SUCCESS
670840546554966016 : SUCCESS
670838202509447168 : SUCCESS
670833812859932673 : SUCCESS
670832455012716544 : SUCCESS
670826280409919488 : SUCCESS
670823764196741120 : SUCCESS
670822709593571328 : SUCCESS
67081549739135

666353288456101888 : SUCCESS
666345417576210432 : SUCCESS
666337882303524864 : SUCCESS
666293911632134144 : SUCCESS
666287406224695296 : SUCCESS
666273097616637952 : SUCCESS
666268910803644416 : SUCCESS
666104133288665088 : SUCCESS
666102155909144576 : SUCCESS
666099513787052032 : SUCCESS
666094000022159362 : SUCCESS
666082916733198337 : FAIL
666073100786774016 : SUCCESS
666071193221509120 : SUCCESS
666063827256086533 : SUCCESS
666058600524156928 : SUCCESS
666057090499244032 : SUCCESS
666055525042405380 : SUCCESS
666051853826850816 : SUCCESS
666050758794694657 : SUCCESS
666049248165822465 : SUCCESS
666044226329800704 : SUCCESS
666033412701032449 : SUCCESS
666029285002620928 : SUCCESS
666020888022790149 : SUCCESS
890240255349198849 : SUCCESS
888202515573088257 : FAIL
873697596434513921 : FAIL
872668790621863937 : FAIL
872261713294495745 : FAIL
869988702071779329 : FAIL
866816280283807744 : FAIL
862096992088072192 : SUCCESS
861769973181624320 : FAIL
856602993587888130 : FAIL
851953902622