In [9]:
#Import all packages needed
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
import time
import re
import json
import os
import requests
import string
import tweepy
from tweepy import OAuthHandler
from timeit import default_timer as timer
from IPython.display import Image
from IPython.core.display import HTML 
%matplotlib inline

<a id='gather'></a>
## Gather the Data

>1. **Twitter archive file:** download this file manually by clicking the following link: [twitter_archive_enhanced.csv](https://d17h27t6h515a5.cloudfront.net/topher/2017/August/59a4e958_twitter-archive-enhanced/twitter-archive-enhanced.csv)

>2. **The tweet image predictions**, i.e., what breed of dog (or other object, animal, etc.) is present in each tweet according to a neural network. This file (image_predictions.tsv) is hosted on Udacity's servers and should be downloaded programmatically using the Requests library and the following URL: https://d17h27t6h515a5.cloudfront.net/topher/2017/August/599fd2ad_image-predictions/image-predictions.tsv

>3. **Twitter API & JSON:** Each tweet's retweet count and favorite ("like") count at minimum, and any additional data you find interesting. Using the tweet IDs in the WeRateDogs Twitter archive, query the Twitter API for each tweet's JSON data using Python's Tweepy library and store each tweet's entire set of JSON data in a file called tweet_json.txt file. 
>Each tweet's JSON data should be written to its own line. Then read this .txt file line by line into a pandas DataFrame with (at minimum) tweet ID, retweet count, and favorite count. 

<a id='archive-g'></a>
### Twitter Archive

In [2]:
#Read CSV file 
twitter_archive = pd.read_csv('twitter-archive-enhanced.csv')

<a id='predictions-g'></a>
### Image Predictions

In [3]:
# Make directory if it doesn't already exist
folder_name = 'image_predictions'
if not os.path.exists(folder_name):
    os.makedirs(folder_name)

In [4]:
# Get data
url = 'https://d17h27t6h515a5.cloudfront.net/topher/2017/August/599fd2ad_image-predictions/image-predictions.tsv'
response = requests.get(url)

In [5]:
# Create file
with open(os.path.join(folder_name, url.split('/')[-1]), mode='wb') as file:
    file.write(response.content)

In [6]:
predictions = pd.read_csv('image_predictions/image-predictions.tsv', sep='\t')

<a id='api-g'></a>
### Twitter API Data

In [7]:
consumer_key = 'HIDDEN'
consumer_secret = 'HIDDEN'
access_token = 'HIDDEN'
access_secret = 'HIDDEN'

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)

api = tweepy.API(auth_handler = auth,
                 parser = tweepy.parsers.JSONParser(),
                 wait_on_rate_limit = True,
                 wait_on_rate_limit_notify = True)

In [16]:
errors = []

with open ('tweet_json.txt', 'a') as file:
    for tweet_id in twitter_archive['tweet_id']:
        try:
            start_time = time.time()
            tweet = api.get_status(tweet_id, tweet_mode='extended')
            # writes one tweet per line
            file.write(json.dumps(tweet) + '\n')
            end_time = time.time()
            print('ID {} . Time in seconds: {}'.format(tweet_id, end_time-start_time))
        except Exception as e:
            errors.append(tweet_id)
            print("Error for ID: " + str(tweet_id) + str(e))
    print('Finished')

ID 892420643555336193 . Time in seconds: 0.3432650566101074
ID 892177421306343426 . Time in seconds: 0.36792588233947754
ID 891815181378084864 . Time in seconds: 0.3719909191131592
ID 891689557279858688 . Time in seconds: 0.3418569564819336
ID 891327558926688256 . Time in seconds: 0.3563830852508545
ID 891087950875897856 . Time in seconds: 0.3615748882293701
ID 890971913173991426 . Time in seconds: 0.35051870346069336
ID 890729181411237888 . Time in seconds: 0.3573131561279297
ID 890609185150312448 . Time in seconds: 0.36971497535705566
ID 890240255349198849 . Time in seconds: 0.4136519432067871
ID 890006608113172480 . Time in seconds: 0.6161222457885742
ID 889880896479866881 . Time in seconds: 0.3861541748046875
ID 889665388333682689 . Time in seconds: 0.35082483291625977
ID 889638837579907072 . Time in seconds: 0.3706979751586914
ID 889531135344209921 . Time in seconds: 0.35528016090393066
ID 889278841981685760 . Time in seconds: 0.366441011428833
ID 888917238123831296 . Time in seco

ID 866720684873056260 . Time in seconds: 0.4062938690185547
ID 866686824827068416 . Time in seconds: 0.3937349319458008
ID 866450705531457537 . Time in seconds: 0.35666584968566895
ID 866334964761202691 . Time in seconds: 0.35637998580932617
ID 866094527597207552 . Time in seconds: 0.4498758316040039
ID 865718153858494464 . Time in seconds: 0.3542971611022949
ID 865359393868664832 . Time in seconds: 0.35964012145996094
ID 865006731092295680 . Time in seconds: 0.34232306480407715
ID 864873206498414592 . Time in seconds: 0.37114787101745605
ID 864279568663928832 . Time in seconds: 0.35623788833618164
ID 864197398364647424 . Time in seconds: 0.35840606689453125
ID 863907417377173506 . Time in seconds: 0.355867862701416
ID 863553081350529029 . Time in seconds: 0.3613910675048828
ID 863471782782697472 . Time in seconds: 0.34983396530151367
ID 863432100342583297 . Time in seconds: 0.3557000160217285
ID 863427515083354112 . Time in seconds: 0.36278390884399414
ID 863079547188785154 . Time in 

ID 841833993020538882 . Time in seconds: 0.35916996002197266
ID 841680585030541313 . Time in seconds: 0.3834860324859619
ID 841439858740625411 . Time in seconds: 0.35065388679504395
ID 841320156043304961 . Time in seconds: 0.42807817459106445
ID 841314665196081154 . Time in seconds: 0.3504199981689453
ID 841077006473256960 . Time in seconds: 0.34380292892456055
ID 840761248237133825 . Time in seconds: 0.4533498287200928
ID 840728873075638272 . Time in seconds: 0.3812248706817627
ID 840698636975636481 . Time in seconds: 0.3545267581939697
ID 840696689258311684 . Time in seconds: 0.4004991054534912
ID 840632337062862849 . Time in seconds: 0.34806180000305176
ID 840370681858686976 . Time in seconds: 0.44313693046569824
ID 840268004936019968 . Time in seconds: 0.39560794830322266
ID 839990271299457024 . Time in seconds: 0.3515326976776123
ID 839549326359670784 . Time in seconds: 0.35523295402526855
ID 839290600511926273 . Time in seconds: 0.36104273796081543
ID 839239871831150596 . Time in

ID 824775126675836928 . Time in seconds: 0.695559024810791
ID 824663926340194305 . Time in seconds: 0.35376906394958496
ID 824325613288833024 . Time in seconds: 0.35062599182128906
ID 824297048279236611 . Time in seconds: 0.34685683250427246
ID 824025158776213504 . Time in seconds: 0.40143680572509766
ID 823939628516474880 . Time in seconds: 0.3456587791442871
ID 823719002937630720 . Time in seconds: 0.37015795707702637
ID 823699002998870016 . Time in seconds: 0.3549973964691162
ID 823581115634085888 . Time in seconds: 0.35658812522888184
ID 823333489516937216 . Time in seconds: 0.35183000564575195
ID 823322678127919110 . Time in seconds: 0.36151599884033203
ID 823269594223824897 . Time in seconds: 0.36272716522216797
ID 822975315408461824 . Time in seconds: 0.357147216796875
ID 822872901745569793 . Time in seconds: 0.399914026260376
ID 822859134160621569 . Time in seconds: 0.3538658618927002
ID 822647212903690241 . Time in seconds: 0.3690319061279297
ID 822610361945911296 . Time in se

ID 807010152071229440 . Time in seconds: 0.3895108699798584
ID 806629075125202948 . Time in seconds: 0.35403013229370117
ID 806620845233815552 . Time in seconds: 0.34610700607299805
ID 806576416489959424 . Time in seconds: 0.38033103942871094
ID 806542213899489280 . Time in seconds: 0.3654811382293701
ID 806242860592926720 . Time in seconds: 0.383023738861084
ID 806219024703037440 . Time in seconds: 0.3497171401977539
ID 805958939288408065 . Time in seconds: 0.3497626781463623
ID 805932879469572096 . Time in seconds: 0.35469484329223633
ID 805826884734976000 . Time in seconds: 0.3541679382324219
ID 805823200554876929 . Time in seconds: 0.35953402519226074
ID 805520635690676224 . Time in seconds: 0.38890719413757324
ID 805487436403003392 . Time in seconds: 0.4297361373901367
ID 805207613751304193 . Time in seconds: 0.3397092819213867
ID 804738756058218496 . Time in seconds: 0.3438999652862549
ID 804475857670639616 . Time in seconds: 0.3783261775970459
ID 804413760345620481 . Time in sec

ID 789903600034189313 . Time in seconds: 0.3420679569244385
ID 789628658055020548 . Time in seconds: 0.3396611213684082
ID 789599242079838210 . Time in seconds: 0.3871431350708008
ID 789530877013393408 . Time in seconds: 0.3590550422668457
ID 789314372632018944 . Time in seconds: 0.36669397354125977
ID 789280767834746880 . Time in seconds: 0.3580479621887207
ID 789268448748703744 . Time in seconds: 0.37395501136779785
ID 789137962068021249 . Time in seconds: 0.3596079349517822
ID 788908386943430656 . Time in seconds: 0.3472940921783447
ID 788765914992902144 . Time in seconds: 0.37413883209228516
ID 788552643979468800 . Time in seconds: 0.35465502738952637
ID 788412144018661376 . Time in seconds: 0.3430666923522949
ID 788178268662984705 . Time in seconds: 0.41994404792785645
ID 788150585577050112 . Time in seconds: 0.35091495513916016
ID 788070120937619456 . Time in seconds: 0.3654482364654541
ID 788039637453406209 . Time in seconds: 0.35166215896606445
ID 787810552592695296 . Time in s

ID 771908950375665664 . Time in seconds: 0.3627431392669678
ID 771770456517009408 . Time in seconds: 0.35594797134399414
ID 771500966810099713 . Time in seconds: 0.3623678684234619
ID 771380798096281600 . Time in seconds: 0.3467090129852295
ID 771171053431250945 . Time in seconds: 0.36184215545654297
ID 771136648247640064 . Time in seconds: 0.35735607147216797
ID 771102124360998913 . Time in seconds: 0.36348700523376465
ID 771014301343748096 . Time in seconds: 0.3799018859863281
Error for ID: 771004394259247104[{'code': 179, 'message': 'Sorry, you are not authorized to see this status.'}]
ID 770787852854652928 . Time in seconds: 0.3631019592285156
ID 770772759874076672 . Time in seconds: 0.34639620780944824
Error for ID: 770743923962707968[{'code': 144, 'message': 'No status found with that ID.'}]
ID 770655142660169732 . Time in seconds: 0.3492698669433594
ID 770414278348247044 . Time in seconds: 0.35884594917297363
ID 770293558247038976 . Time in seconds: 0.4388432502746582
ID 7700937

Rate limit reached. Sleeping for: 562


ID 758740312047005698 . Time in seconds: 1040.834567308426
ID 758474966123810816 . Time in seconds: 0.35759806632995605
ID 758467244762497024 . Time in seconds: 0.39803218841552734
ID 758405701903519748 . Time in seconds: 0.4056389331817627
ID 758355060040593408 . Time in seconds: 0.42026710510253906
ID 758099635764359168 . Time in seconds: 0.35036420822143555
ID 758041019896193024 . Time in seconds: 0.3544628620147705
ID 757741869644341248 . Time in seconds: 0.3581700325012207
ID 757729163776290825 . Time in seconds: 0.37550902366638184
ID 757725642876129280 . Time in seconds: 0.36950182914733887
ID 757611664640446465 . Time in seconds: 0.34914112091064453
ID 757597904299253760 . Time in seconds: 0.3603038787841797
ID 757596066325864448 . Time in seconds: 0.3659350872039795
ID 757400162377592832 . Time in seconds: 0.369549036026001
ID 757393109802180609 . Time in seconds: 0.34621095657348633
ID 757354760399941633 . Time in seconds: 0.3656492233276367
ID 756998049151549440 . Time in se

ID 744971049620602880 . Time in seconds: 0.3616960048675537
ID 744709971296780288 . Time in seconds: 0.35588979721069336
ID 744334592493166593 . Time in seconds: 0.36476802825927734
ID 744234799360020481 . Time in seconds: 0.4386632442474365
ID 744223424764059648 . Time in seconds: 0.8784019947052002
ID 743980027717509120 . Time in seconds: 0.703341007232666
ID 743895849529389061 . Time in seconds: 0.8268947601318359
ID 743835915802583040 . Time in seconds: 0.7281546592712402
ID 743609206067040256 . Time in seconds: 0.35652828216552734
ID 743595368194129920 . Time in seconds: 0.36345601081848145
ID 743545585370791937 . Time in seconds: 0.40409183502197266
ID 743510151680958465 . Time in seconds: 0.34756898880004883
ID 743253157753532416 . Time in seconds: 0.3624858856201172
ID 743222593470234624 . Time in seconds: 0.34717798233032227
ID 743210557239623680 . Time in seconds: 0.4204087257385254
ID 742534281772302336 . Time in seconds: 0.36771392822265625
ID 742528092657332225 . Time in s

ID 720389942216527872 . Time in seconds: 0.3455169200897217
ID 720340705894408192 . Time in seconds: 0.37031006813049316
ID 720059472081784833 . Time in seconds: 0.36870408058166504
ID 720043174954147842 . Time in seconds: 0.38874316215515137
ID 719991154352222208 . Time in seconds: 0.36557888984680176
ID 719704490224398336 . Time in seconds: 0.35980701446533203
ID 719551379208073216 . Time in seconds: 0.3638730049133301
ID 719367763014393856 . Time in seconds: 0.35315585136413574
ID 719339463458033665 . Time in seconds: 0.3910820484161377
ID 719332531645071360 . Time in seconds: 0.3453340530395508
ID 718971898235854848 . Time in seconds: 0.3708198070526123
ID 718939241951195136 . Time in seconds: 0.400501012802124
ID 718631497683582976 . Time in seconds: 0.5166988372802734
ID 718613305783398402 . Time in seconds: 0.3581533432006836
ID 718540630683709445 . Time in seconds: 0.3509349822998047
ID 718460005985447936 . Time in seconds: 0.35839295387268066
ID 718454725339934721 . Time in se

ID 707297311098011648 . Time in seconds: 0.3556630611419678
ID 707059547140169728 . Time in seconds: 0.3614490032196045
ID 707038192327901184 . Time in seconds: 0.35277891159057617
ID 707021089608753152 . Time in seconds: 0.38010382652282715
ID 707014260413456384 . Time in seconds: 0.43663692474365234
ID 706904523814649856 . Time in seconds: 0.4441561698913574
ID 706901761596989440 . Time in seconds: 0.37144899368286133
ID 706681918348251136 . Time in seconds: 0.3599271774291992
ID 706644897839910912 . Time in seconds: 0.3546152114868164
ID 706593038911545345 . Time in seconds: 0.3715171813964844
ID 706538006853918722 . Time in seconds: 0.36498403549194336
ID 706516534877929472 . Time in seconds: 0.4117460250854492
ID 706346369204748288 . Time in seconds: 0.363400936126709
ID 706310011488698368 . Time in seconds: 0.38491106033325195
ID 706291001778950144 . Time in seconds: 0.36654090881347656
ID 706265994973601792 . Time in seconds: 0.37514424324035645
ID 706169069255446529 . Time in s

ID 696713835009417216 . Time in seconds: 0.36653804779052734
ID 696518437233913856 . Time in seconds: 0.3636460304260254
ID 696490539101908992 . Time in seconds: 0.3565239906311035
ID 696488710901260288 . Time in seconds: 0.36763596534729004
ID 696405997980676096 . Time in seconds: 0.3571639060974121
ID 696100768806522880 . Time in seconds: 0.384829044342041
ID 695816827381944320 . Time in seconds: 0.35600781440734863
ID 695794761660297217 . Time in seconds: 0.3844151496887207
ID 695767669421768709 . Time in seconds: 0.3464789390563965
ID 695629776980148225 . Time in seconds: 0.35701489448547363
ID 695446424020918272 . Time in seconds: 0.3472757339477539
ID 695409464418041856 . Time in seconds: 0.35614013671875
ID 695314793360662529 . Time in seconds: 0.3513469696044922
ID 695095422348574720 . Time in seconds: 0.3545191287994385
ID 695074328191332352 . Time in seconds: 0.3761270046234131
ID 695064344191721472 . Time in seconds: 0.3600199222564697
ID 695051054296211456 . Time in seconds

ID 687127927494963200 . Time in seconds: 0.3657350540161133
ID 687124485711986689 . Time in seconds: 0.38518691062927246
ID 687109925361856513 . Time in seconds: 0.37912702560424805
ID 687102708889812993 . Time in seconds: 0.4487619400024414
ID 687096057537363968 . Time in seconds: 0.37852907180786133
ID 686947101016735744 . Time in seconds: 0.3611409664154053
ID 686760001961103360 . Time in seconds: 0.3513221740722656
ID 686749460672679938 . Time in seconds: 0.3663351535797119
ID 686730991906516992 . Time in seconds: 0.36635303497314453
ID 686683045143953408 . Time in seconds: 0.372406005859375
ID 686618349602762752 . Time in seconds: 0.3621490001678467
ID 686606069955735556 . Time in seconds: 0.33742785453796387
ID 686394059078897668 . Time in seconds: 0.3800220489501953
ID 686386521809772549 . Time in seconds: 0.3775758743286133
ID 686377065986265092 . Time in seconds: 0.3817729949951172
ID 686358356425093120 . Time in seconds: 0.3542940616607666
ID 686286779679375361 . Time in seco

ID 680206703334408192 . Time in seconds: 0.3621532917022705
ID 680191257256136705 . Time in seconds: 0.3644528388977051
ID 680176173301628928 . Time in seconds: 0.4331929683685303
ID 680161097740095489 . Time in seconds: 0.3662540912628174
ID 680145970311643136 . Time in seconds: 0.4180600643157959
ID 680130881361686529 . Time in seconds: 0.3673698902130127
ID 680115823365742593 . Time in seconds: 0.3792102336883545
ID 680100725817409536 . Time in seconds: 0.3905038833618164
ID 680085611152338944 . Time in seconds: 0.4295363426208496
ID 680070545539371008 . Time in seconds: 0.35741686820983887
Error for ID: 680055455951884288[{'code': 144, 'message': 'No status found with that ID.'}]
ID 679877062409191424 . Time in seconds: 0.37607502937316895
ID 679872969355714560 . Time in seconds: 0.37260007858276367
ID 679862121895714818 . Time in seconds: 0.36080312728881836
ID 679854723806179328 . Time in seconds: 0.34322690963745117
ID 679844490799091713 . Time in seconds: 0.3549361228942871
ID 

Rate limit reached. Sleeping for: 560


ID 676975532580409345 . Time in seconds: 565.5681300163269
ID 676957860086095872 . Time in seconds: 0.42624998092651367
ID 676949632774234114 . Time in seconds: 0.3523130416870117
ID 676948236477857792 . Time in seconds: 0.3513147830963135
ID 676946864479084545 . Time in seconds: 0.3792300224304199
ID 676942428000112642 . Time in seconds: 0.3857121467590332
ID 676936541936185344 . Time in seconds: 0.5097160339355469
ID 676916996760600576 . Time in seconds: 0.8015537261962891
ID 676897532954456065 . Time in seconds: 0.7503519058227539
ID 676864501615042560 . Time in seconds: 0.7975258827209473
ID 676821958043033607 . Time in seconds: 0.6918048858642578
ID 676819651066732545 . Time in seconds: 0.35929298400878906
ID 676811746707918848 . Time in seconds: 0.3710360527038574
ID 676776431406465024 . Time in seconds: 0.3561677932739258
ID 676617503762681856 . Time in seconds: 0.37014126777648926
ID 676613908052996102 . Time in seconds: 0.3617568016052246
ID 676606785097199616 . Time in second

ID 673956914389192708 . Time in seconds: 0.360584020614624
ID 673919437611909120 . Time in seconds: 0.3835718631744385
ID 673906403526995968 . Time in seconds: 0.39546990394592285
ID 673887867907739649 . Time in seconds: 0.348560094833374
ID 673716320723169284 . Time in seconds: 0.36315274238586426
ID 673715861853720576 . Time in seconds: 0.3879380226135254
ID 673711475735838725 . Time in seconds: 0.37122201919555664
ID 673709992831262724 . Time in seconds: 0.36493587493896484
ID 673708611235921920 . Time in seconds: 0.3687121868133545
ID 673707060090052608 . Time in seconds: 0.38696980476379395
ID 673705679337693185 . Time in seconds: 0.36664915084838867
ID 673700254269775872 . Time in seconds: 0.37487101554870605
ID 673697980713705472 . Time in seconds: 0.35335803031921387
ID 673689733134946305 . Time in seconds: 0.3645339012145996
ID 673688752737402881 . Time in seconds: 0.3513679504394531
ID 673686845050527744 . Time in seconds: 0.39761805534362793
ID 673680198160809984 . Time in s

ID 671109016219725825 . Time in seconds: 0.35520505905151367
ID 670995969505435648 . Time in seconds: 0.36893200874328613
ID 670842764863651840 . Time in seconds: 0.35506510734558105
ID 670840546554966016 . Time in seconds: 0.35843396186828613
ID 670838202509447168 . Time in seconds: 0.34961795806884766
ID 670833812859932673 . Time in seconds: 0.4233427047729492
ID 670832455012716544 . Time in seconds: 0.35229015350341797
ID 670826280409919488 . Time in seconds: 0.37547898292541504
ID 670823764196741120 . Time in seconds: 0.3592860698699951
ID 670822709593571328 . Time in seconds: 0.3719000816345215
ID 670815497391357952 . Time in seconds: 0.35593605041503906
ID 670811965569282048 . Time in seconds: 0.3757498264312744
ID 670807719151067136 . Time in seconds: 0.36552000045776367
ID 670804601705242624 . Time in seconds: 0.3804290294647217
ID 670803562457407488 . Time in seconds: 0.3586769104003906
ID 670797304698376195 . Time in seconds: 0.37802910804748535
ID 670792680469889025 . Time i

ID 668625577880875008 . Time in seconds: 0.35765910148620605
ID 668623201287675904 . Time in seconds: 0.3508138656616211
ID 668620235289837568 . Time in seconds: 0.3511769771575928
ID 668614819948453888 . Time in seconds: 0.35134196281433105
ID 668587383441514497 . Time in seconds: 0.4342191219329834
ID 668567822092664832 . Time in seconds: 0.3491179943084717
ID 668544745690562560 . Time in seconds: 0.4270341396331787
ID 668542336805281792 . Time in seconds: 0.3657851219177246
ID 668537837512433665 . Time in seconds: 0.3661990165710449
ID 668528771708952576 . Time in seconds: 0.3612239360809326
ID 668507509523615744 . Time in seconds: 0.3603041172027588
ID 668496999348633600 . Time in seconds: 0.37006306648254395
ID 668484198282485761 . Time in seconds: 0.47121191024780273
ID 668480044826800133 . Time in seconds: 0.36682629585266113
ID 668466899341221888 . Time in seconds: 0.360576868057251
ID 668297328638447616 . Time in seconds: 0.37282800674438477
ID 668291999406125056 . Time in sec

ID 666071193221509120 . Time in seconds: 0.3957040309906006
ID 666063827256086533 . Time in seconds: 0.3601720333099365
ID 666058600524156928 . Time in seconds: 0.38234400749206543
ID 666057090499244032 . Time in seconds: 0.35860681533813477
ID 666055525042405380 . Time in seconds: 0.3513777256011963
ID 666051853826850816 . Time in seconds: 0.3667309284210205
ID 666050758794694657 . Time in seconds: 0.3787729740142822
ID 666049248165822465 . Time in seconds: 0.360522985458374
ID 666044226329800704 . Time in seconds: 0.3517630100250244
ID 666033412701032449 . Time in seconds: 0.34333181381225586
ID 666029285002620928 . Time in seconds: 0.3558328151702881
ID 666020888022790149 . Time in seconds: 0.3531956672668457
Finished


In [17]:
errors

[888202515573088257,
 873697596434513921,
 872668790621863937,
 872261713294495745,
 869988702071779329,
 866816280283807744,
 861769973181624320,
 856602993587888130,
 851953902622658560,
 845459076796616705,
 844704788403113984,
 842892208864923648,
 837366284874571778,
 837012587749474308,
 829374341691346946,
 827228250799742977,
 812747805718642688,
 802247111496568832,
 779123168116150273,
 775096608509886464,
 771004394259247104,
 770743923962707968,
 759566828574212096,
 754011816964026368,
 680055455951884288]

#### Re-try error tweets

In [18]:
# Try again to gather the missing tweets. 
errors2 = [] 

with open('tweet_json.txt', 'a') as file:
    for tweet_id in errors:
        try:
            tweet = api.get_status(tweet_id, tweet_mode='extended')._json
            file.write(json.dumps(tweet) + '\n')
            
        except Exception as e:
            print("Error for ID: " + str(tweet_id) + str(e))
            errors2.append(tweet_id)

Error for ID: 888202515573088257[{'code': 144, 'message': 'No status found with that ID.'}]
Error for ID: 873697596434513921[{'code': 144, 'message': 'No status found with that ID.'}]
Error for ID: 872668790621863937[{'code': 144, 'message': 'No status found with that ID.'}]
Error for ID: 872261713294495745[{'code': 144, 'message': 'No status found with that ID.'}]
Error for ID: 869988702071779329[{'code': 144, 'message': 'No status found with that ID.'}]
Error for ID: 866816280283807744[{'code': 144, 'message': 'No status found with that ID.'}]
Error for ID: 861769973181624320[{'code': 144, 'message': 'No status found with that ID.'}]
Error for ID: 856602993587888130[{'code': 144, 'message': 'No status found with that ID.'}]
Error for ID: 851953902622658560[{'code': 144, 'message': 'No status found with that ID.'}]
Error for ID: 845459076796616705[{'code': 144, 'message': 'No status found with that ID.'}]
Error for ID: 844704788403113984[{'code': 144, 'message': 'No status found with 

In [19]:
errors == errors2

True

In [20]:
tweet_df = []

with open('tweet_json.txt') as json_file:
    for line in json_file:
        json_data = json.loads(line)
        tweet_id = json_data['id']
        favorite_count = json_data['favorite_count']
        retweet_count = json_data['retweet_count']
                
        tweet_df.append({'tweet_id': tweet_id,
                        'favorite_count': favorite_count,
                        'retweet_count': retweet_count})

# create a new DataFrame 
df = pd.DataFrame(tweet_df, columns = ['tweet_id', 'favorites', 'retweets'])

# Save the dataFrame in file
df.to_csv('tweet_json.txt', encoding = 'utf-8', index=False)