# New Code For Group Scrapping


These code gets all the tweets from Airtel_Ke and looks at which tweet is a reply the it gets the tweet been replied to. Hence it works in the opposite way to the previous code shared.

#### Import libraries and packages

In [43]:
# Import Tweeter APIs
import tweepy as tp
import GetOldTweets3 as got

# Import libraries for data reading
import pandas as pd

#For reading secured access code and tokens file
import yaml

##### Set up the API

In [44]:
#Twitter API access token and consumer key with their authentication code read from a yaml file.
# Keep the secret keys private and not public
with open(r"secret.yml") as file:
    secret_list = yaml.load(file, Loader=yaml.FullLoader)
    
#Access the Twitter API
auth = tp.OAuthHandler(secret_list["consumer_key"], secret_list["consumer_secret"])
auth.set_access_token(secret_list["access_token"], secret_list["access_secret"])
api = tp.API(auth, wait_on_rate_limit=True)

#### Set up Query Parameters

In [21]:
tweet_query = "AIRTEL_KE"
month = 8 # Place the value of your month here

#### Monthly Retrival Code

In [24]:
#Set the criteria for searching the tweets
year = 2020
endmonth = month + 1

#Decide the year and the next month
if month>8:
    year = 2019
elif month == 12:
    endmonth = 1
    
# Create the criteria to use to search for the tweets you need
tweetCriteria = got.manager.TweetCriteria().setUsername(tweet_query)\
                                            .setSince(str(year)+"-"+str(month)+"-01").setUntil(str(year)+"-"+str(endmonth)+"-01")

#Query for the tweets
tweets = got.manager.TweetManager.getTweets(tweetCriteria)

#### Retrieve data from a specific time to another

In [36]:
# ----> Test Code

#Set the criteria for searching the tweets
tweetCriteria = got.manager.TweetCriteria().setUsername(tweet_query)\
                                            .setSince("2019-08-01").setUntil("2020-08-01")

#Query for the tweets
tweets = got.manager.TweetManager.getTweets(tweetCriteria)

In [37]:
# Create a list holding lists with tweet details we want
tweets_lst = [[tw.id, tw.date, tw.text, tw.username, tw.retweets, tw.favorites, tw.geo, tw.mentions, tw.hashtags] for tw in tweets]

In [38]:
# Confirm that we received the number of tweets requested
len(tweets_lst)

69013

#### Store retrieved data for use later in case code crashes

In [39]:
# Create a dataframe of the tweets we queried
tweets_df = pd.DataFrame(tweets_lst, columns=["ID", "Date", "Post", "Username","Retweets", "Favorites", "Geo", "Mentions", "Hashtags"])
tweets_df.to_csv(path_or_buf="AirtelTweets.csv", index_label="index")
tweets_df.sample(20)

Unnamed: 0,ID,Date,Post,Username,Retweets,Favorites,Geo,Mentions,Hashtags
48890,1195696136255754240,2019-11-16 13:32:28+00:00,Resolution in progress. Feedback will be share...,AIRTEL_KE,0,0,,,
27892,1243509835704008706,2020-03-27 12:07:02+00:00,I have refreshed your line. Kindly restart han...,AIRTEL_KE,0,0,,,
18950,1261296925032288257,2020-05-15 14:06:35+00:00,"Checking with the team ,will revert shortly ^Caro",AIRTEL_KE,0,0,,,
56735,1181210605002248192,2019-10-07 14:12:08+00:00,Its already there .^Caro,AIRTEL_KE,0,1,,,
49182,1195052594768429057,2019-11-14 18:55:16+00:00,Please confirm the location and the nearest la...,AIRTEL_KE,0,0,,,
51062,1191759799039315968,2019-11-05 16:50:52+00:00,,AIRTEL_KE,0,0,,,
14056,1268161314104283138,2020-06-03 12:43:13+00:00,Checking.^Caro,AIRTEL_KE,0,0,,,
10381,1273954321147801610,2020-06-19 12:22:33+00:00,"Hi Maina,thanks for bringing this to our atten...",AIRTEL_KE,0,0,,,
23597,1253641946268139520,2020-04-24 11:08:26+00:00,"Hello,currently the option is not there.^Caro",AIRTEL_KE,0,0,,,
15137,1266622377837891584,2020-05-30 06:48:02+00:00,Nisaidie number yako nikuangalilie. ^Jamo,AIRTEL_KE,0,0,,,


#### Retrieve stored data for use

In [47]:
tweets_df = pd.read_csv("AirtelTweets.csv",index_col=["index"])
tweets_df.sample(20)

Unnamed: 0_level_0,ID,Date,Post,Username,Retweets,Favorites,Geo,Mentions,Hashtags
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
65516,1164121527056343041,2019-08-21 10:26:15+00:00,Please visit our shop for further assistance ^...,AIRTEL_KE,0,0,,,
63176,1168550548728995842,2019-09-02 15:45:36+00:00,On it.^Caro,AIRTEL_KE,0,1,,,
23858,1253378316607856644,2020-04-23 17:40:51+00:00,Issue being checked under ticket C35266341 . ^...,AIRTEL_KE,0,0,,,
43296,1205156280412721152,2019-12-12 16:03:42+00:00,Hi Akiva. Kindly note that the offer ended. ^Jamo,AIRTEL_KE,0,0,,,
68003,1158953122779131904,2019-08-07 04:08:51+00:00,Please share your number for assistance ^Jamo,AIRTEL_KE,0,0,,,
29623,1238407738335641607,2020-03-13 10:13:07+00:00,"Hi Grauchy, apologies for delayed response. pl...",AIRTEL_KE,0,0,,,
55321,1184378740211474432,2019-10-16 08:01:11+00:00,"Hi, kindly note we have identified the issue a...",AIRTEL_KE,0,0,,,
58348,1177412243786354688,2019-09-27 02:38:48+00:00,"@GachanjaJohn_ Hello,currently we have the 2Gb...",AIRTEL_KE,0,1,,@GachanjaJohn_,
59526,1175323173325139968,2019-09-21 08:17:35+00:00,@FeelMeFloh Check dm ^Jamo,AIRTEL_KE,0,0,,@FeelMeFloh,
22413,1255368632957374464,2020-04-29 05:29:40+00:00,We all need to exercise caution and safety to ...,AIRTEL_KE,1,2,,,#BeSmartBeSafe


#### Check for posts being replied to By Airtel

In [48]:
AirtelService = []
for x, tweet_id in enumerate(tweets_df.ID):
    if x> 59418:
        status = api.get_status(tweet_id)
    #     print(x)

        if hasattr(status, 'in_reply_to_status_id_str'):
            QId = status.in_reply_to_status_id_str
            if QId != None:
    #             print(tweet_id, " => In reply to => ",QId)
                try:
                    QStatus = api.get_status(QId)
                    row = {'ID':QId,'Date': QStatus.created_at, 
                           'Username':QStatus.user.screen_name,'Post': QStatus.text.replace('\n', ' '),
                           'Mentions': ' '.join(x['screen_name'] for x in QStatus.entities['user_mentions']),  
                           'Hashtags': ' '.join(x['text'] for x in QStatus.entities['hashtags']),
                           'ReplierID':tweet_id,'Replier':tweets_df.Username.iloc[x] ,
                           'Reply_date':tweets_df.Date.iloc[x],'Reply': tweets_df.Post.iloc[x], 
                           'Reply_mentions':tweets_df.Mentions.iloc[x],'Reply_Hashtags':tweets_df.Hashtags.iloc[x]}
                except:
                    row = {'ID':QId,'Date': 'No Tweet Found', 
                           'Username':'No Tweet Found','Post': 'No Tweet Found',
                           'Mentions': 'No Tweet Found',  
                           'Hashtags': 'No Tweet Found',
                           'ReplierID':tweet_id,'Replier':tweets_df.Username.iloc[x] ,
                           'Reply_date':tweets_df.Date.iloc[x],'Reply': tweets_df.Post.iloc[x], 
                           'Reply_mentions':tweets_df.Mentions.iloc[x],'Reply_Hashtags':tweets_df.Hashtags.iloc[x]}
                AirtelService.append(row)
        # To prevent the loop going through all the data during testing
    #         if x >20:
    #             break
        # save our final data into a csv file
        df = pd.DataFrame(AirtelService)
        df.to_csv(path_or_buf= "FinalAirtelService4.csv", index_label="index")
            


#### Concatenate different csv files together and save the final data in a csv

In [52]:
Airtel_df = pd.read_csv("FinalAirtelService.csv",index_col=["index"])
for fileno in range(1,5):
    
    df = pd.read_csv("FinalAirtelService"+str(fileno)+".csv",index_col=["index"])
#     print(Airtel_df.shape, df.shape)
    Airtel_df = pd.concat([Airtel_df, df], ignore_index=True)
    

In [53]:
Airtel_df.sample(20)

Unnamed: 0,ID,Date,Username,Post,Mentions,Hashtags,ReplierID,Replier,Reply_date,Reply,Reply_mentions,Reply_Hashtags
39400,1240530404647612416,2020-03-19 06:47:51,_Kaumoni,@GeorgeDrammeh @AIRTEL_KE Have been refreshing...,GeorgeDrammeh AIRTEL_KE,,1240536235443175425,AIRTEL_KE,2020-03-19 07:11:01+00:00,"Hello Muia, We apologize for the inconvenience...",,
79248,1157574276901478400,2019-08-03 08:49:48,Stephen_KMwaura,@AIRTEL_KE it's very unfair for data bundles t...,AIRTEL_KE,,1157574937457987584,AIRTEL_KE,2019-08-03 08:52:26+00:00,Feedback noted.^Jamo,,
45820,1222514025155715077,2020-01-29 13:37:11,kotnakurubranch,"Vile hii @AIRTEL_KE network iko thrash, bado i...",AIRTEL_KE,,1222517120543662080,AIRTEL_KE,2020-01-29 13:49:29+00:00,Hi. We appreciate your views. Feedback well no...,,
76252,1164156418418782216,2019-08-21 12:44:53,GathogoKen,@AIRTEL_KE It says Loading... Loading then con...,AIRTEL_KE,,1164159045076307973,AIRTEL_KE,2019-08-21 12:55:20+00:00,"Line well re provisioned,please restart your d...",,
69480,1175365853812076544,2019-09-21 11:07:11,osoroKE,"@AIRTEL_KE @Hs_Aboud Hi Jamo, how is Caro",AIRTEL_KE Hs_Aboud,,1175367077160341506,AIRTEL_KE,2019-09-21 11:12:03+00:00,@osoroKE shes fine ^Jamo @Hs_Aboud,@osoroKE @Hs_Aboud,
46616,1219881235150970880,2020-01-22 07:15:25,Myqall,@AIRTEL_KE Be notified that I will not go call...,AIRTEL_KE,,1219884042658689026,AIRTEL_KE,2020-01-22 07:26:34+00:00,Please do share the case number for follow up....,,
59395,1195195552281038848,2019-11-15 04:23:19,1Njeru,"@JLetangule @AIRTEL_KE Bro, bado uko juu hii s...",JLetangule AIRTEL_KE,,1195204657993248769,AIRTEL_KE,2019-11-15 04:59:30+00:00,^Jamo,,
69894,1174713505364226049,2019-09-19 15:54:59,Shark_Kenya,The Europa League returns tonight! Where will...,,,1174715799530926080,AIRTEL_KE,2019-09-19 16:04:06+00:00,"@ItsShark15 Kabisa, Keep it Locked ^Jamo",,
34197,1253776058412924929,2020-04-24 20:01:21,Swtgal14,@AIRTEL_KE am using your wifi rooter.. Needs ...,AIRTEL_KE,,1253777366406627329,AIRTEL_KE,2020-04-24 20:06:32+00:00,"Hi Swtgal, kindly confirm if the router is new...",,
29960,1261162769090568192,2020-05-15 05:13:30,KalosSays,@nahashon87 I am a royal @AIRTEL_KE customer s...,nahashon87 AIRTEL_KE,,1261167316936638464,AIRTEL_KE,2020-05-15 05:31:34+00:00,,,


In [54]:
Airtel_df.shape

(79640, 12)

In [59]:
Airtel_df.drop_duplicates(inplace=True)
Airtel_df.reset_index(drop=True, inplace=True)
Airtel_df.shape

(74766, 12)

In [60]:
Airtel_df.to_csv(path_or_buf="AirtelFinalServices.csv", index_label="index")