### Modules used in this notebook:
* Pandas
* Numpy
* Matplotlib
* Seaborn
* Tweepy
* Requests
* Plotly
* Datetime

In [None]:
pip install tweepy

In [3]:
import requests
import tweepy as twpy
import datetime as dt

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import plotly.express as px

%matplotlib inline

### Gather

**`twitter-archive-enhanced` table**

In [21]:
def open_set(csv, sep=','):
    df = pd.read_csv(csv, low_memory=False, sep=sep)
    
    return df

In [22]:
df_tw_arch = open_set('twitter-archive-enhanced.csv')
df_tw_arch.head()

Unnamed: 0,tweet_id,in_reply_to_status_id,in_reply_to_user_id,timestamp,source,text,retweeted_status_id,retweeted_status_user_id,retweeted_status_timestamp,expanded_urls,rating_numerator,rating_denominator,name,doggo,floofer,pupper,puppo
0,892420643555336193,,,2017-08-01 16:23:56 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Phineas. He's a mystical boy. Only eve...,,,,https://twitter.com/dog_rates/status/892420643...,13,10,Phineas,,,,
1,892177421306343426,,,2017-08-01 00:17:27 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Tilly. She's just checking pup on you....,,,,https://twitter.com/dog_rates/status/892177421...,13,10,Tilly,,,,
2,891815181378084864,,,2017-07-31 00:18:03 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Archie. He is a rare Norwegian Pouncin...,,,,https://twitter.com/dog_rates/status/891815181...,12,10,Archie,,,,
3,891689557279858688,,,2017-07-30 15:58:51 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Darla. She commenced a snooze mid meal...,,,,https://twitter.com/dog_rates/status/891689557...,13,10,Darla,,,,
4,891327558926688256,,,2017-07-29 16:00:24 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Franklin. He would like you to stop ca...,,,,https://twitter.com/dog_rates/status/891327558...,12,10,Franklin,,,,


In [11]:
df_tw_arch.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2356 entries, 0 to 2355
Data columns (total 17 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   tweet_id                    2356 non-null   int64  
 1   in_reply_to_status_id       78 non-null     float64
 2   in_reply_to_user_id         78 non-null     float64
 3   timestamp                   2356 non-null   object 
 4   source                      2356 non-null   object 
 5   text                        2356 non-null   object 
 6   retweeted_status_id         181 non-null    float64
 7   retweeted_status_user_id    181 non-null    float64
 8   retweeted_status_timestamp  181 non-null    object 
 9   expanded_urls               2297 non-null   object 
 10  rating_numerator            2356 non-null   int64  
 11  rating_denominator          2356 non-null   int64  
 12  name                        2356 non-null   object 
 13  doggo                       2356 

In [13]:
df_tw_arch.shape

(2356, 17)

**`image-predictions` table**

In [16]:
url = 'https://d17h27t6h515a5.cloudfront.net/topher/2017/August/599fd2ad_image-predictions/image-predictions.tsv'

In [20]:
r = requests.get(url)

try:
    f = open('image-predictions.tsv', 'wb')
    f.write(r.content)
    
finally:
    f.close()

In [29]:
df_image_pred = open_set('image-predictions.tsv', sep='\t')
df_image_pred.head()

Unnamed: 0,tweet_id,jpg_url,img_num,p1,p1_conf,p1_dog,p2,p2_conf,p2_dog,p3,p3_conf,p3_dog
0,666020888022790149,https://pbs.twimg.com/media/CT4udn0WwAA0aMy.jpg,1,Welsh_springer_spaniel,0.465074,True,collie,0.156665,True,Shetland_sheepdog,0.061428,True
1,666029285002620928,https://pbs.twimg.com/media/CT42GRgUYAA5iDo.jpg,1,redbone,0.506826,True,miniature_pinscher,0.074192,True,Rhodesian_ridgeback,0.07201,True
2,666033412701032449,https://pbs.twimg.com/media/CT4521TWwAEvMyu.jpg,1,German_shepherd,0.596461,True,malinois,0.138584,True,bloodhound,0.116197,True
3,666044226329800704,https://pbs.twimg.com/media/CT5Dr8HUEAA-lEu.jpg,1,Rhodesian_ridgeback,0.408143,True,redbone,0.360687,True,miniature_pinscher,0.222752,True
4,666049248165822465,https://pbs.twimg.com/media/CT5IQmsXIAAKY4A.jpg,1,miniature_pinscher,0.560311,True,Rottweiler,0.243682,True,Doberman,0.154629,True


In [30]:
df_image_pred.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2075 entries, 0 to 2074
Data columns (total 12 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   tweet_id  2075 non-null   int64  
 1   jpg_url   2075 non-null   object 
 2   img_num   2075 non-null   int64  
 3   p1        2075 non-null   object 
 4   p1_conf   2075 non-null   float64
 5   p1_dog    2075 non-null   bool   
 6   p2        2075 non-null   object 
 7   p2_conf   2075 non-null   float64
 8   p2_dog    2075 non-null   bool   
 9   p3        2075 non-null   object 
 10  p3_conf   2075 non-null   float64
 11  p3_dog    2075 non-null   bool   
dtypes: bool(3), float64(3), int64(2), object(4)
memory usage: 152.1+ KB


In [32]:
df_image_pred.shape

(2075, 12)

**obtaining additional tweet data from `Twitter API` and `Tweepy`**

In [69]:
bearer_token = 

In [None]:
# custom to extract tweet data
def get_tweets(id):
          
        # Authorization to bearer_token
        auth = twpy.OAuth2BearerHandler(bearer_token)
        
        # Calling api
        api = tweepy.API(auth, wait_on_rate_limit = True, wait_on_rate_limit_notify = True)
  
        # extract tweets data using extended mode
        api.get_status(id, tweet_mode='extended')
        
        # Empty Array
        tmp=[] 
  
        # create array of tweet information: username, 
        # tweet id, date/time, text
        tweets_for_csv = [tweet._json for tweet in tweets] # CSV file created 
        for j in tweets_for_csv:
  
            # Appending tweets to the empty array tmp
            tmp.append(j) 
  
        # Printing the tweets
        print(tmp)
  

In [70]:
auth = twpy.OAuth2BearerHandler(bearer_token)
api = twpy.API(auth)

In [71]:
new = api.get_status(id, tweet_mode='extended')
type(new)

tweepy.models.Status

In [92]:
new.place

In [93]:
new.geo

In [95]:
new.retweet_count

7010

In [97]:
new.favorite_count

33829

In [98]:
new.lang

'en'

In [None]:
new._json

In [72]:
tw_data = []

In [None]:
del_tweets = []

In [53]:
tw_ids = list(df_tw_arch.tweet_id)
id = tw_ids[0]

In [None]:
for id in tw_ids:
    try:
        tw_status = api.get_status(id, tweet_mode='extended')
        try:
            f = open('tweet_json.txt', 'w', encoding='utf-8')
            f.write(tw_status)
        rt_count = tw_status.retweet_count
        fv_count = tw_status.favorite_count
        place = tw_status.place
        geo = tw_status.geo
        lang = tw_status.lang
        
        print(rt_count, fv_count)
        tw_data.append({'tweet_id': id,
                        'retweet_count': rt_count,
                        'favorite_count': fv_count,
                        'place': place
                        'geo': geo,
                        'lang'lang})
    except Exception as e:
        print(id)
        del_tweets.append({'id': id})

7010 33829
5302 29340
3482 22069
7228 36955
7765 35323
2602 17814
1666 10370
15761 56893
3624 24530
6101 27971
6153 27051
4168 24575
8353 42073
3718 23689
1884 13358
4447 22130
3765 25652
3538 22482
2879 17323
888202515573088257
2895 19158
4536 26637
9855 40744
15031 60215
8816 29605
4993 26994
6321 30401
2632 10501
3737 19743
2631 18555
4 105
5292 24537
93 0
5616 28624
5331 31612
3108 18015
15444 0
3670 19267
15032 67316
4675 24392
3913 21334
4793 23873
16931 63347
2506 17949
2865 19180
8274 40221
3084 19960
5538 32527
3248 20993
4099 24808
5064 24594
9636 39214
4084 25739
2855 21454
8964 44575
8 113
13404 43468
4494 20494
2333 15058
3200 18719
5216 25038
3468 23680
3654 24346
2930 19716
9 282
2628 20717
37465 92919
2651 14867
5744 0
3958 19816
2244 16789
3435 17142
5930 26451
1078 0
5531 0
1078 6781
5744 37161
15814 69337
70 0
3170 19910
4348 24164
4629 23876
2805 18222
3975 21053
2002 16575
3905 24698
3584 22123
4160 19425
5118 24252
3974 22560
3919 24609
12218 0
3483 22977
3378 201

1054 3596
688 3556
440 2585
866 3777
1758 6195
3143 11298
2423 7782
4677 9925
6903 0
2711 8617
1337 5869
1539 6491
771004394259247104
1105 4625
1328 4894
770743923962707968
1670 6933
1928 5952
1357 5859
2818 0
2127 7153
8843 29320
1564 6050
7134 0
1601 5089
6006 13418
2444 0
827 3941
1099 3864
1189 4739
5390 0
3163 12787
3273 10205
1266 4474
4964 15001
2152 7065
3481 11552
2652 9562
766864461642756096
1260 4755
348 2466
730 3801
1459 5647
1735 6349
2424 0
778 3952
461 3544
1997 6812
1133 4860
3119 25124
2007 6686
3122 10943
1650 6091
1395 5649
50 734
3822 11779
1359 5066
2756 0
2125 6942
3316 11375
5926 10525
3841 9658
1030 4508
14318 29852
1831 5052
3590 0
794 4026
26779 46327
1102 3888
16674 0
1342 4839
1001 4197
1447 5010
3217 10660
890 3578
1753 6188
1190 4685
3349 7086
1288 3929
10582 25114
798 3714
1655 5362
28 0
1934 5540
759923798737051648
1801 6299
1764 5591
759566828574212096
1106 4408
2277 7954
454 1566
1804 5707
1068 0
3829 13645
1852 6082
843 3331
3559 10494
1500 5386
918 

2622 6804
874 2937
904 2454
620 2244
1460 3583
2181 7060
825 2675
940 2649
818 3148
589 2003
520 1860
671 2002
14912 33854
155 1631
1019 2287
398 1408
332 1217
988 2820
535 1913
1046 3187
1590 6980
598 2078
424 1544
887 2828
1079 3818
1040 2974
8915 16827
1532 4548
849 2954
1799 6073
1372 2875
681981167097122816
746 2277
3698 11833
400 1630
820 2427
419 1735
1179 3260
5216 13201
245 1519
3709 8571
672 2417
1629 3604
857 2967
998 2969
236 1336
470 1361
441 2244
1341 3424
586 2217
1825 4230
938 2927
394 1923
563 2216
326 1616
1185 3194
607 1986
742 2192
939 2612
629 2415
1193 3315
1736 3893
440 1576
674 2347
443 1330
262 1363
1050 2594
442 1983
1400 3570
672 2145
1563 3316
857 2118
830 2500
1253 3308
8161 11763
776 2391
680055455951884288
581 1803
598 2015
557 2257
1104 2617
687 2188
12619 33216
998 2859
718 1916
749 1995
426 1540
1888 4376
653 2430
1149 3075
1339 2920
585 1928
17038 29455
1074 2169
7254 19472
927 2560
1036 2714
2316 5445
7216 15507
587 2057
1096 2602
1018 2074
409 1499
