## Run the code below by inserting your month of choice and access tokens in a yaml file

In [1]:
# Import Tweeter APIs
import tweepy as tp
import GetOldTweets3 as got

# Import libraries for data reading
import pandas as pd

#For reading secured access code and tokens file
import yaml

In [2]:
#Twitter API access token and consumer key with their authentication code read from a yaml file.
# Keep the secret keys private and not public
with open(r"secret.yml") as file:
    secret_list = yaml.load(file, Loader=yaml.FullLoader)
    
#Access the Twitter API
# In case you do not have a yaml file, replace the secret_list dictionary values with the twitter API keys with the same value names
auth = tp.OAuthHandler(secret_list["consumer_key"], secret_list["consumer_secret"])
auth.set_access_token(secret_list["access_token"], secret_list["access_secret"])
api = tp.API(auth, wait_on_rate_limit=True)

In [4]:
tweet_query = "@AIRTEL_KE"
month = 8

In [5]:
#Set the criteria for searching the tweets
year = 2020
endmonth = Month + 1

#Decide the year and the next month
if month>8:
    year = 2019
elif month == 12:
    endmonth = 1
    
# Create the criteria to use to search for the tweets you need
tweetCriteria = got.manager.TweetCriteria().setQuerySearch(tweet_query)\
                                            .setSince(str(year)+"-"+str(month)+"-01").setUntil(str(year)+"-"+str(endmonth)+"-01")

#Query for the tweets
tweets = got.manager.TweetManager.getTweets(tweetCriteria)

In [6]:
# Create a list holding lists with tweet details we want
tweets_lst = [[tw.id, tw.date, tw.text, tw.username, tw.retweets, tw.favorites, tw.geo, tw.mentions, tw.hashtags] for tw in tweets]

In [7]:
# Confirm that we received the number of tweets requested
len(tweets_lst)

17497

In [8]:
# Create a dataframe of the tweets we queried
tweets_df = pd.DataFrame(tweets_lst, columns=["ID", "Date", "Post", "Username","Retweets", "Favorites", "Geo", "Mentions", "Hashtags"])
tweets_df.sample(10)

Unnamed: 0,ID,Date,Post,Username,Retweets,Favorites,Geo,Mentions,Hashtags
15656,1290217327716380672,2020-08-03 09:25:56+00:00,Hi @PrinceToddAlly. checks show you are subscr...,AIRTEL_KE,0,0,,@PrinceToddAlly,
8714,1293441666985074689,2020-08-12 06:58:19+00:00,It is not long since I last used the line. You...,MzalendoMwalimu,0,0,,,
6738,1294377185931931656,2020-08-14 20:55:44+00:00,Why would you bring an offer and then remove i...,richardbyzantin,1,5,,,
3197,1296120103025758209,2020-08-19 16:21:27+00:00,But why my @AIRTEL_KE ?,realOgenoG,0,0,,@AIRTEL_KE,
850,1297429482689748994,2020-08-23 07:04:28+00:00,It's usually pathetic kwanza masaa ya kustream...,Amwangike,0,0,,@AIRTEL_KE,
7686,1293866196111110144,2020-08-13 11:05:14+00:00,Under the data option.^Jamo,AIRTEL_KE,0,0,,,
1471,1297141103636942848,2020-08-22 11:58:33+00:00,@AIRTEL_KE Any intention of you providing a de...,alan_wachira,0,0,,@AIRTEL_KE,
8470,1293525154312708096,2020-08-12 12:30:03+00:00,@AIRTEL_KE wasee data inaexpire before nitumie...,osoraaw,0,0,,@AIRTEL_KE,
11059,1292169158298394625,2020-08-08 18:41:49+00:00,Hey @AIRTEL_KE everytime I refresh my phone or...,OwnRentProperty,0,0,,@AIRTEL_KE,
11574,1292030334075375617,2020-08-08 09:30:11+00:00,Noted. ^Jamo,AIRTEL_KE,0,0,,,


In [10]:
# Filter the tweets that mention @AIRTEL_KE since those are the tweets with questions and queries.
mention_df = tweets_df[tweets_df["Mentions"].str.contains("@AIRTEL_KE") | tweets_df["Mentions"].str.contains("@airtel_ke")]
print(airtel_mention_df.shape)

# To avoid having to repeat the querying process again, we save the results we got
mention_df.to_csv(path_or_buf="Mentions.csv", index_label= "index")
mention_df.sample(20)

(4036, 9)


Unnamed: 0,ID,Date,Post,Username,Retweets,Favorites,Geo,Mentions,Hashtags
10522,1292425167570968578,2020-08-09 11:39:06+00:00,30hrs later @AIRTEL_KE,anericod,0,0,,@AIRTEL_KE,
2829,1296392419240480768,2020-08-20 10:23:33+00:00,@AIRTEL_KE @TelkomKenya don't have this hidden...,am_bwire,0,0,,@AIRTEL_KE @TelkomKenya,
17,1297585094387855362,2020-08-23 17:22:49+00:00,Airtel jaisa ghatiya network kisi bhi company ...,DivyanandMishr2,0,1,,@Airtel_Presence @airtelindia @AirtelNigeria @...,
12750,1291636043788308483,2020-08-07 07:23:24+00:00,"@AIRTEL_KE hizi nini , I have stopped your pro...",NziokaSenior,0,0,,@AIRTEL_KE,
16300,1289825968597557249,2020-08-02 07:30:49+00:00,@AIRTEL_KE @TelkomKenya @TelkomCare_Ke @Safari...,L_Dean254,0,1,,@AIRTEL_KE @TelkomKenya @TelkomCare_Ke @Safari...,
14731,1290649959080112131,2020-08-04 14:05:04+00:00,"@AIRTEL_KE Hi, is it a MUST to upgrade my sim ...",Shiku923,0,0,,@AIRTEL_KE,
2940,1296362802345541632,2020-08-20 08:25:51+00:00,@AIRTEL_KE now that I can not access your app ...,kimenyi5,0,0,,@AIRTEL_KE,
12653,1291657822854316033,2020-08-07 08:49:57+00:00,@AIRTEL_KE mimi hamtanigore aki,kilundeezy,0,0,,@AIRTEL_KE,
2704,1296418805737041921,2020-08-20 12:08:24+00:00,@AIRTEL_KE,kjofranc,1,1,,@AIRTEL_KE,
4825,1295571064488173568,2020-08-18 03:59:46+00:00,@AIRTEL_KE i just bought a airtime from a shop...,njengadomi,0,0,,@AIRTEL_KE,


In [12]:
# Get the list we already created from the earlier query.
mention_df = pd.read_csv("Mentions.csv")
mention_df.drop(columns=['index'], inplace=True)
mention_df.sample(20)

Unnamed: 0,ID,Date,Post,Username,Retweets,Favorites,Geo,Mentions,Hashtags
2703,1292015624504455168,2020-08-08 08:31:44+00:00,cc. @AIRTEL_KE,MLAnyanda,1,1,,@AIRTEL_KE,
3933,1289507174725455873,2020-08-01 10:24:02+00:00,@AIRTEL_KE paid my postpaid bill but yet to be...,KevinWairia,0,0,,@AIRTEL_KE,
45,1297526321938075648,2020-08-23 13:29:16+00:00,Sijai choka na kitu kama Vile nimechoka na @AI...,gaceriwambui,0,0,,@AIRTEL_KE,
652,1296308958593011714,2020-08-20 04:51:54+00:00,@AIRTEL_KE Morning guys. Are the binge bundles...,Chege_Mathu,0,0,,@AIRTEL_KE,
2992,1291581714997878785,2020-08-07 03:47:31+00:00,@AIRTEL_KE Hi i think not happy about the new ...,lucasondieki,0,0,,@AIRTEL_KE,
191,1297252903367966720,2020-08-22 19:22:48+00:00,"Wewe @AIRTEL_KE we know our rights , na connec...",migunna,2,14,,@AIRTEL_KE,
835,1295868774457978880,2020-08-18 23:42:46+00:00,@AirtelNigeria @AIRTEL_KE,oluwamoney8,0,7,,@AirtelNigeria @AIRTEL_KE,
859,1295820324571881473,2020-08-18 20:30:15+00:00,@AIRTEL_KE what's happening to the internet to...,gmas_,0,0,,@AIRTEL_KE,
1663,1294273957294637056,2020-08-14 14:05:32+00:00,@AIRTEL_KE Poor internt connectivity even in N...,rkuruga,0,0,,@AIRTEL_KE,
1359,1294964931889168384,2020-08-16 11:51:13+00:00,@AIRTEL_KE are slowly turning into safaricom a...,tessforcedme,0,1,,@AIRTEL_KE @TelkomKenya,


In [None]:
#This searches for replies for tweet by taking the name of the user and the tweet ID and looks for all the tweets after that tweet ID with with the username

replies=[] #This holds all our posts with their replies in form of dictionaries per each reply

# We loop through our dataframe of tweets getting the value of ID for each row which is the tweet ID as well as get the current number of the loop
for x, tweet_id in enumerate(mention_df["ID"]):
    
    name = mention_df.Username.iloc[x] # We get the username of the current tweet
    replyTweets = [] # List of tweets that have the "in_reply_to_status_id_str" attribute equal to the value of our current tweet ID
    print(x, tweet_id)
    # Query for all the tweets that are addressed to our user that were posted after our current tweet
    try:
        queryResult = tp.Cursor(api.search,q='to:'+name, since_id = tweet_id, timeout=999999).items()
    except:
        queryResult = []
    try:
        for twt in queryResult:

            # Loop through every tweet gotten to check if they have the attribute "in_reply_to_status_id_str" and it is equal to our current tweet ID
            if hasattr(twt, 'in_reply_to_status_id_str'):
                if (twt.in_reply_to_status_id_str==tweet_id):
                    replyTweets.append(twt)
    except:
        print("error")
        replyTweets =[]
                
    #Loop through reply tweets found and create an entry of the post and the reply
    for tweet in replyTweets:
        row = {'ID':tweet_id, 'Date': mention_df.Date.iloc[x], 'Username':name, 
                'Post': mention_df.Post.iloc[x],  'Replier': tweet.user.screen_name, 
                'Mentions': mention_df.Mentions.iloc[x],  'Hashtags': mention_df.Hashtags.iloc[x],  
                'Reply_date':tweet.created_at, 'Reply': tweet.text.replace('\n', ' '), 
                'Reply_mentions':' '.join(x['screen_name'] for x in tweet.entities['user_mentions']), 
                'Reply_Hashtags':' '.join(x['text'] for x in tweet.entities['hashtags'])}
        replies.append(row)
    # save our final data into a csv file
    replies_df = pd.DataFrame(replies)
    replies_df.to_csv(path_or_buf= "AirtelService.csv", index_label="index")


0 1297587835671924736
1 1297587350051196928
2 1297587308863139840
3 1297586996068659200
4 1297586963227324420
5 1297586909288583170
6 1297586291371134976
7 1297585368318062598
8 1297585094387855362
9 1297584147783876608
10 1297584003944525825
11 1297582642603405312
12 1297582572692791299
13 1297582315409940482
14 1297579138874773504
15 1297576270306344966
16 1297575796098453505
17 1297574525010231303
18 1297573059595919362
19 1297572150388432898
20 1297570678523207680
21 1297567098953248768
22 1297566987258929155
23 1297562058242904064
24 1297559037656145921
25 1297557069944565760
26 1297557002776977409
27 1297556155686944770
28 1297555683366273028
29 1297553535102529537
30 1297552588339064836
31 1297550008737312769
32 1297544585116356608
33 1297542792017829889
34 1297542378824359937
35 1297542112528015363
36 1297541185351962627
37 1297540471447855105
38 1297537731577487360
39 1297536838060605440
40 1297534430521765889
41 1297529674042613761
42 1297528594629922818
43 129752824268190515

346 1296926785779765249
347 1296922520277442560
348 1296919704074887168
349 1296914963395641345
350 1296912188259676160
351 1296911765347938310
352 1296911605112934402
353 1296904754581446666
354 1296904605998120962
355 1296902857069920256
356 1296899316137721859
357 1296899104392523776
358 1296897626911256583
359 1296897345074925575
360 1296892619319062529
361 1296890543264473089
362 1296886552237428736
363 1296884648321200130
364 1296884611566501888
365 1296883525145317376
366 1296882039413714944
367 1296881242311462914
368 1296880715821457409
369 1296879410465902592
370 1296878358224134145
371 1296877091129102343
372 1296875189490393089
373 1296872679350435840
374 1296870737115074561
375 1296869815735521281
376 1296869278885597186
377 1296869022068244480
378 1296863616310837248
379 1296863288270233600
380 1296858823353937922
381 1296858559699980288
382 1296858001849163776
383 1296857892373635072
384 1296857879736131589
385 1296855356220547074
386 1296855265187332097
387 129685495950

In [None]:
# save our final data into a csv file
replies_df = pd.DataFrame(replies)
replies_df.to_csv(path_or_buf= "AirtelService.csv", index_label="index")

In [14]:
for t in []:
    print(t)