# NLP COVID Vax Data

In [115]:
import pandas as pd
import numpy as np

pd.set_option("max_colwidth", 0)
pd.set_option('max_columns', 35)
np.set_printoptions(suppress=True) # Suppress scientific notation where possible

## Import & combine tweetIDs
Tweet IDs come from this dataset: https://github.com/gmuric/avax-tweets-dataset and were obtained by searching twitter for anti-COVID-19 vaccine-related hashtags between October 2020 and April 2021

In [91]:
def combine_tweetIDs(year_month, days_in_month):
    ''' Imports text files containing tweetIDs and combines a month-worth of 
    tweetIDs into a single df
    '''
    tweet_list = []
    for day in range(1, days_in_month + 1):
        if day < 10:
            day = '0' + str(day)
        else:
            day = str(day)
        for hour in range(0,24):
            if hour < 10:
                hour = '0' + str(hour)
            else:
                hour = str(hour)
            try:
                link = ('avax-tweets-dataset/streaming-tweetids/' 
                            + year_month + '/'
                           + year_month + '-'
                            + day + '-'
                            +hour +'.txt'
                               )
                new_tweets = pd.read_csv(link, header=None)
                tweet_list.append(new_tweets)  
            except FileNotFoundError:
                print('File not found')
    return pd.concat(tweet_list, ignore_index=True)

### Collecting 1 month of tweetIDs & exporting to Hydrator app

#### November 2020 

In [57]:
# November 2020

tweetIDs_2020_11 = combine_tweetIDs(year_month='2020-11', days_in_month=30)
tweetIDs_2020_11.columns = ['TweetID']
tweetIDs_2020_11.head(3)

Unnamed: 0,TweetID
0,1322795488928673792
1,1322795535565156353
2,1322795640275968000


In [58]:
tweetIDs_2020_11.shape

(202897, 1)

In [59]:
# exporting November 2020 tweetIDs

tweetIDs_2020_11.to_csv('my_twitter_data/tweetIDs_2020_11.csv', header=False, index=False)

#### December 2020

In [47]:
# December 2020

tweetIDs_2020_12 = combine_tweetIDs(year_month='2020-12', days_in_month=31)
tweetIDs_2020_12.columns = ['TweetID']
tweetIDs_2020_12.head(3)

Unnamed: 0,TweetID
0,1333682291101605889
1,1333682356675354624
2,1333682389634207744


In [48]:
tweetIDs_2020_12.shape

(269915, 1)

In [49]:
# two rows say 'tweetid' instead of an id number

tweetIDs_2020_12[tweetIDs_2020_12['TweetID'] == 'tweetid']

Unnamed: 0,TweetID
209554,tweetid
236997,tweetid


In [50]:
# dropping the 'tweetid' rows

tweetIDs_2020_12 = tweetIDs_2020_12.drop([209554, 236997])

In [51]:
tweetIDs_2020_12.shape

(269913, 1)

In [56]:
# exporting December 2020 tweetIDs

tweetIDs_2020_12.to_csv('my_twitter_data/tweetIDs_2020_12.csv', header=False, index=False)

#### January 2021

In [70]:
# January 2021
# one file (17th at 15:00) was missing - so used try/except to skip it

tweetIDs_2021_01 = combine_tweetIDs(year_month='2021-01', days_in_month=31)
tweetIDs_2021_01.columns = ['TweetID']
tweetIDs_2021_01.head(3)

File not found


Unnamed: 0,TweetID
0,1344916548725698561
1,1344916986120335360
2,1344917226009350144


In [71]:
tweetIDs_2021_01.shape

(179787, 1)

In [74]:
# exporting Jan 2021 tweetIDs

tweetIDs_2021_01.to_csv('my_twitter_data/tweetIDs_2021_01.csv', header=False, index=False)

#### February 2021 

In [84]:
# February 2021

tweetIDs_2021_02 = combine_tweetIDs(year_month='2021-02', days_in_month=28)
tweetIDs_2021_02.columns = ['TweetID']
tweetIDs_2021_02.head(3)

Unnamed: 0,TweetID
0,1356150339355828226
1,1356150618692399105
2,1356150787580198916


In [85]:
tweetIDs_2021_02.shape

(202916, 1)

In [86]:
# exporting Feb 2021 tweetIDs

tweetIDs_2021_02.to_csv('my_twitter_data/tweetIDs_2021_02.csv', header=False, index=False)

#### March 2021

In [87]:
# March 2021
# one file not found

tweetIDs_2021_03 = combine_tweetIDs(year_month='2021-03', days_in_month=31)
tweetIDs_2021_03.columns = ['TweetID']
tweetIDs_2021_03.head(3)

File not found


Unnamed: 0,TweetID
0,1366297418606776323
1,1366297481680723969
2,1366297498529202178


In [88]:
tweetIDs_2021_03.shape

(419432, 1)

In [89]:
# exporting March 2021 tweetIDs

tweetIDs_2021_03.to_csv('my_twitter_data/tweetIDs_2021_03.csv', header=False, index=False)

#### April 2021
Through 4.20.21

In [92]:
# April 2021 - dataset ends after the 20th

tweetIDs_2021_04 = combine_tweetIDs(year_month='2021-04', days_in_month=20)
tweetIDs_2021_04.columns = ['TweetID']
tweetIDs_2021_04.head(3)

Unnamed: 0,TweetID
0,1377516074640539648
1,1377516103535095809
2,1377516134585532420


In [93]:
tweetIDs_2021_04.shape

(484372, 1)

In [94]:
# exporting April 2021 tweetIDs

tweetIDs_2021_04.to_csv('my_twitter_data/tweetIDs_2021_04.csv', header=False, index=False)

## Combining hydrated Tweets 
Tweets have now been 'hydrated' by running through the Hydrator app: https://github.com/DocNow/hydrator


### Importing and combining hydrated Tweets 

Some definitions:  
>- Quote_id only surfaces when the Tweet is a quote Tweet. The field contains the Tweet ID of the quoted Tweet  
>- Re-tweet_count = number of times this Tweet has been retweeted

In [116]:
# Re-importing hydrated November 2020 data

hydrated_2020_11 = pd.read_csv('my_twitter_data/hydrated_2020_11.csv')
hydrated_2020_11.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,coordinates,created_at,hashtags,media,urls,favorite_count,id,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,lang,place,possibly_sensitive,quote_id,retweet_count,retweet_id,retweet_screen_name,source,text,tweet_url,user_created_at,user_id,user_default_profile_image,user_description,user_favourites_count,user_followers_count,user_friends_count,user_listed_count,user_location,user_name,user_screen_name,user_statuses_count,user_time_zone,user_urls,user_verified
0,,Sun Nov 01 07:04:54 +0000 2020,,,https://twitter.com/LotusOak2/status/1322642287449591814,0,1322796743298265088,,,,en,,False,1.322642e+18,0,,,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>",Stay away from that vaccine. Population control. https://t.co/DTMFGsNRzX,https://twitter.com/BAChatwin/status/1322796743298265088,Mon Sep 19 17:13:57 +0000 2011,376304718,False,☀️🇨🇦🌍Truth☀️🇨🇦🌍 Freedom ☀️🇨🇦🌏Health ☀️🇨🇦🌍Eyes wide open\nAwake patriot and CF veteran (13+yrs),1643,130,658,0,"Niagara, Ontario",Barb Chatwin,BAChatwin,2659,,,False
1,,Sun Nov 01 07:10:02 +0000 2020,,,,0,1322798033596272640,,,,en,,,,489,1.322434e+18,sunnynwaobi1,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>","Isn't this a systematic depopulation happing in Obigbo?, where young unarmed citizens are randomly picked, some tortured while some are killed. #Obigbomassacre @StateDept @AmnestyNigeria @ https://t.co/TsakU0l3Fm",https://twitter.com/jumoke_ladan/status/1322798033596272640,Wed Jul 01 19:30:15 +0000 2009,52824297,False,Am a Christian by God's grace. Called by Christ to serve and love Him as a Catholic. Perpetually loving Him.,7009,224,359,1,"Lagos, Nigeria",Olajumoke Ladan,jumoke_ladan,1812,,,False
2,,Sun Nov 01 07:13:25 +0000 2020,,,,0,1322798887694905346,,,,en,,,,122,1.322792e+18,conspiracyb0t,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>",Bill Gates is one of the biggest proponents of mass depopulation.,https://twitter.com/wambo161/status/1322798887694905346,Fri Apr 08 16:05:41 +0000 2011,279106686,False,Follower of Jesus.,27413,205,711,3,la montaña de roble,James of Oak Mountain,wambo161,48229,,,False
3,,Sun Nov 01 07:31:24 +0000 2020,,,,0,1322803412300431361,,,,en,,,1.322434e+18,41,1.322774e+18,Onyinye51838197,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>",The world should be on notice that Nelson nwike river state governor and the Nigeria army are committing genocide and pogrom in Biafra land https://t.co/fTC8gNNSsZ,https://twitter.com/EmmanuelNwanyim/status/1322803412300431361,Tue Apr 21 07:16:59 +0000 2020,1252496422865375238,False,i was born to shine take it or live it...blessed son by God in heaven..🙌🏼🙌🏼🙌🏼..peace 👍🏻 ...Biafran citizen Israel 🇮🇱 citizen,8947,121,307,1,Israel,Emmanuel nwanyim,EmmanuelNwanyim,16345,,,False
4,,Sun Nov 01 07:01:51 +0000 2020,,,,1,1322795973337403395,bongzmessi,1.32217e+18,217711495.0,en,,,,0,,,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>",@bongzmessi To come to think of it perhaps this was done so they multiply and outnumber us but. To them the depopulation agenda is at top of their list. Phela my King bake baphahluke labantu telling things they were not supposed like the one on an interview that they injected AIDS to blacks,https://twitter.com/Boniswa63207956/status/1322795973337403395,Mon May 25 12:43:06 +0000 2020,1264899714950062080,False,Mayibuye iAfrika✊🏾. \n\nI don't call my people Kings and Queens to tickle my throat it's because we are Royalty 👸🏾🤴🏿.\nTribalism 🤮Feminism 🤮Xenophobia🤮,39392,3658,3680,2,,Boniswa,Boniswa63207956,40107,,,False


In [76]:
hydrated_2020_11.shape

(40764, 35)

In [117]:
# Re-importing hydrated December 2020 data

hydrated_2020_12 = pd.read_csv('my_twitter_data/hydrated_2020_12.csv')
hydrated_2020_12.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,coordinates,created_at,hashtags,media,urls,favorite_count,id,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,lang,place,possibly_sensitive,quote_id,retweet_count,retweet_id,retweet_screen_name,source,text,tweet_url,user_created_at,user_id,user_default_profile_image,user_description,user_favourites_count,user_followers_count,user_friends_count,user_listed_count,user_location,user_name,user_screen_name,user_statuses_count,user_time_zone,user_urls,user_verified
0,,Tue Dec 01 08:23:18 +0000 2020,NoVaccineForMe,,https://www.facebook.com/MrWillFairfield/videos/384251992885450/,0,1333688107556872193,,,,und,,False,,0,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>",https://t.co/3qrtBnDF8v? #NoVaccineForMe,https://twitter.com/Davis1973Sarah/status/1333688107556872193,Tue Nov 04 09:34:38 +0000 2014,2860208369,False,mother ... wife ... animal lover ... music lover ... getting Fit not lit ... trusting my immune system Enough is Enough,12951,832,2173,4,,Sarah Davis,Davis1973Sarah,4847,,,False
1,,Tue Dec 01 08:30:21 +0000 2020,,,,2,1333689884280844289,noma4freedom,1.333688e+18,8.62806e+17,en,,,,0,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>",@patrioticnomes @LBC @NickFerrariLBC Not that long ago people predicting this were called conspiracy theorists hey but according to some polls it appears 50% of the public want mandated vaccines or at least for non vaxxed to be refused entry to places 🙄😔 sad times.,https://twitter.com/gareth78t/status/1333689884280844289,Fri Jul 11 09:24:47 +0000 2014,2698364615,False,I like a certain amount of privacy therefore I choose not to say too much about myself on the interweb!,3151,118,382,1,wherever I am at the time,GT,gareth78t,2260,,,False
2,,Tue Dec 01 08:16:39 +0000 2020,,,,0,1333686436164366336,,,,en,,,1.333624e+18,1,1.333629e+18,violetblue,"<a href=""https://tapbots.com/software/tweetbot/mac"" rel=""nofollow"">Tweetbot for Mac</a>","""nearly 90,000 households have relocated outside of the city"" &lt; in the past six months... aka why the houses around me are dark and quiet now. https://t.co/dONbAaQaPI",https://twitter.com/mars_seven/status/1333686436164366336,Sat Dec 27 01:49:16 +0000 2008,18399799,False,Huh? What? Never mind.,1257,276,609,74,San Francisco,mars_seven,mars_seven,74839,,http://www.mars-seven.com/,False
3,,Tue Dec 01 08:10:48 +0000 2020,michaelgove BorisJohnson matthancock scamdemic covid forcedvaccine,,https://www.mirror.co.uk/news/politics/michael-gove-says-covid-19-23095692.amp?__twitter_impression=true,0,1333684963250741249,,,,en,,False,,0,,,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>","If #michaelgove #BorisJohnson and #matthancock say it will not happen, then its 100% guaranteed that it will. https://t.co/yaD4nfOv3f\n#scamdemic #covid #forcedvaccine",https://twitter.com/nicktweetuk/status/1333684963250741249,Mon Feb 20 10:16:05 +0000 2017,833621238039924736,False,,7568,52,101,0,,Nick,nicktweetuk,7742,,,False
4,,Tue Dec 01 08:01:33 +0000 2020,,,,14,1333682634640265216,,,,en,,,,1,,,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>",@Jon_statistics It's all part of the depopulation planning.,https://twitter.com/YrHenGwrcyn/status/1333682634640265216,Tue Nov 15 02:33:55 +0000 2011,412735280,False,"Views and opinions my own, retweet ≠ endorsement.\nnow on GAB - @bladerider",135290,1011,481,2,Stalag-15,"Bladerider - ""Everything WOKE turns to shit""",YrHenGwrcyn,45786,,,False


In [96]:
hydrated_2020_12.shape

(84058, 35)

In [119]:
# Re-importing hydrated January 2021 data

hydrated_2021_01 = pd.read_csv('my_twitter_data/hydrated_2021_01.csv')
hydrated_2021_01.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,coordinates,created_at,hashtags,media,urls,favorite_count,id,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,lang,place,possibly_sensitive,quote_id,retweet_count,retweet_id,retweet_screen_name,source,text,tweet_url,user_created_at,user_id,user_default_profile_image,user_description,user_favourites_count,user_followers_count,user_friends_count,user_listed_count,user_location,user_name,user_screen_name,user_statuses_count,user_time_zone,user_urls,user_verified
0,,Fri Jan 01 08:22:10 +0000 2021,informedconsent toryliars resignHancock genow pfizer mhra,,,0,1344921847587074048,itsBenRandell,1.34492e+18,50371532.0,en,,,,0,,,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>",@itsBenRandell Believe this is actually illegal for those already had 1st dose. No #informedconsent #toryliars #resignHancock #genow #pfizer #mhra,https://twitter.com/iamnothelch/status/1344921847587074048,Thu Dec 24 20:26:42 +0000 2020,1342205037179789319,False,"""subsistence level twitterage""\nYou got Brexit done. Now fuck off.\nMember of both #monoCulture and #CultOfSelfish Resistance movements.\n#FBPA #FBPE #FBPPR",13650,1771,2415,1,,iamnothelch 💙 #FBPPR #FBNHS #JohnsonOut,iamnothelch,17130,,,False
1,,Fri Jan 01 08:16:08 +0000 2021,,,,1,1344920328586350592,,,,en,,,,0,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","@syghe @empalattella @LLinWood @HelenKennedy Libs LOVE masks bc now they can go out in public w/o feeling self-conscious and ashamed by their appearance, while also fueling their relentlessly thirsty EGO by virtue shaming others. Lol Can’t wait for them all to get vaxxed.",https://twitter.com/realriasprague1/status/1344920328586350592,Wed Dec 30 12:06:09 +0000 2020,1344253388742160384,False,Trump 2020. American 🇺🇸 Patriot. Constantly banned from this $hit platform. Ready to fight back like it’s 1776. @LLinwood is the reason I’m back.,107,5,14,0,United States,Maria Sprague,realriasprague1,70,,,False
2,,Fri Jan 01 08:52:29 +0000 2021,Left ChinaVirus Vaccine,,,0,1344929478087159810,,,,en,,,1.344795e+18,2,1.344928e+18,votenickmoutos,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>","I’m all for the mask muppets of the #Left getting the #ChinaVirus #Vaccine. Whether it sterilizes them, is toxic, I don’t care. #NoVaccineForMe Let them vaccinate themselves out of existence! https://t.co/SzZSrCEgrl",https://twitter.com/Maria95321214/status/1344929478087159810,Tue May 19 17:16:38 +0000 2020,1262794215723057153,False,🚀🌿IT'S HARD TO GIVE UNLIMITED POWER TO LIMITED MIND.🌿🚀TRUMP♥️Q♥️JFKjr ♥️🌿🚀,24079,223,859,0,"Roma, IT🚀Prague,CZ",🍀MARIA🍀,Maria95321214,11672,,,False
3,,Fri Jan 01 08:42:33 +0000 2021,,,,1,1344926977891954688,joesnoboard,1.34491e+18,94901637.0,en,,,,1,,,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>","@joesnoboard @MduBrianC1 @Lungah__ Now, with you intellectual superiority, do you ever think that Gates or any leader would openly admit their commitment to depopulation?\nThe newspaper in question has not redacted their article nor apologised to Gates.",https://twitter.com/onderrok/status/1344926977891954688,Thu Aug 07 14:48:52 +0000 2014,2736252034,False,"Tech entrepreneur, coin collector, social commentator. In that order!",1916,142,265,2,The Toilet,Anon Cognito,onderrok,5352,,Https://Kidztours.co.za,False
4,,Fri Jan 01 08:12:16 +0000 2021,NoMasks NoVaccines,,https://twitter.com/CBSNews/status/1344911164740874241,0,1344919354526191616,,,,en,,False,1.344911e+18,0,,,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>",He is a hero! Vaccines will kill us all! #NoMasks! #NoVaccines! https://t.co/hGaqSvA3nu,https://twitter.com/riker17/status/1344919354526191616,Wed Jan 02 20:30:51 +0000 2008,11770332,False,My opinions are real and blunt. I take no guff. Political Correctness is wrong! Proud No Agenda Knight. #Freedom #AmericaFirst @twrsradio ITM! #NoVaccines ever!,97971,1025,4998,50,"Dos Palos, CA",COVID #hoax survivor Will James Robertson #NoMasks,riker17,43721,,https://www.riker17.com,False


In [120]:
hydrated_2021_01.shape

(122966, 35)

In [122]:
# Re-importing hydrated February 2021 data

hydrated_2021_02 = pd.read_csv('my_twitter_data/hydrated_2021_02.csv')
hydrated_2021_02.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,coordinates,created_at,hashtags,media,urls,favorite_count,id,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,lang,place,possibly_sensitive,quote_id,retweet_count,retweet_id,retweet_screen_name,source,text,tweet_url,user_created_at,user_id,user_default_profile_image,user_description,user_favourites_count,user_followers_count,user_friends_count,user_listed_count,user_location,user_name,user_screen_name,user_statuses_count,user_time_zone,user_urls,user_verified
0,,Mon Feb 01 08:32:58 +0000 2021,,,,4,1356158588687941632,86Cujo,1.356158e+18,453401500.0,en,,,,1,,,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>",@86Cujo @vsuperstar1 @CarlHen10428180 No such thing as global warming Gates is only blocking the sun so we get a lack in Vitiman D just one of a handful of his ideas to help with his depopulation plan.,https://twitter.com/sdaws5MUFC/status/1356158588687941632,Mon Jan 24 18:45:44 +0000 2011,242414603,False,Earth is Flat 👁 #420,14931,11880,3159,89,Frinton,Steve,sdaws5MUFC,28830,,,False
1,,Mon Feb 01 08:09:21 +0000 2021,,,,0,1356152644813905923,vonderleyen,1.355942e+18,1.14633e+18,en,,,,0,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","@vonderleyen @AstraZeneca Erm... but that WAS last week’s offer wasn’t it? And you wanted 120m doses, right? Why are you presenting 39m doses as (a) news and (b) a good thing? How many EU citizens does that leave unvaccinated and vulnerable?",https://twitter.com/SussexBoil/status/1356152644813905923,Tue Aug 09 22:11:38 +0000 2011,351914246,False,He who stifles free discussion secretly doubts whether what he professes to believe is really true ~ W. Phillips,28291,722,1944,2,United Kingdom,Amieas,SussexBoil,13911,,,False
2,,Mon Feb 01 08:03:36 +0000 2021,depopulation,,https://twitter.com/jamesperloff/status/1355363087155687425,0,1356151200731836419,,,,en,,False,1.355363e+18,1,1.355457e+18,DrBroncanuus,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>",#depopulation has begun https://t.co/KxIbXV7uTa,https://twitter.com/ValerieCurren/status/1356151200731836419,Wed May 16 07:28:35 +0000 2012,581613862,True,,39535,1248,4837,2,,Valerie Curren,ValerieCurren,71828,,,False
3,,Mon Feb 01 08:41:44 +0000 2021,,,,0,1356160793348354050,SteveG06322664,1.355939e+18,1.248268e+18,en,,,,0,,,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>","@SteveG06322664 @afneil My brother just died from c-19. He'd been fine 3 weeks ago. His wife has health issues - has tested positive, is asymptomatic, neither had the vaccine but an unvaccinated carer calls every day. I wonder how they got the virus! 🥺😡",https://twitter.com/WinnieAtwell/status/1356160793348354050,Sat Dec 29 13:58:36 +0000 2018,1079013820960514049,False,,644,10,31,0,,EBBOM,WinnieAtwell,873,,,False
4,,Mon Feb 01 08:51:16 +0000 2021,,,https://twitter.com/abuttenheim/status/1355587111974547462,0,1356163194285416448,LucaFerrettiEvo,,1.032006e+18,und,,False,1.355587e+18,0,,,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>",@LucaFerrettiEvo https://t.co/VRafR8ruHT,https://twitter.com/alice_ledda_/status/1356163194285416448,Tue Aug 21 20:28:27 +0000 2018,1032001505157689344,False,"Evolutionary Scientist, Very Curious Person, Old School Social.",60695,1320,791,13,Europe,"AliceLedda, PhD 💙",alice_ledda_,11235,,,False


In [123]:
hydrated_2021_02.shape

(144145, 35)

In [124]:
# Re-importing hydrated March 2021 data

hydrated_2021_03 = pd.read_csv('my_twitter_data/hydrated_2021_03.csv')
hydrated_2021_03.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,coordinates,created_at,hashtags,media,urls,favorite_count,id,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,lang,place,possibly_sensitive,quote_id,retweet_count,retweet_id,retweet_screen_name,source,text,tweet_url,user_created_at,user_id,user_default_profile_image,user_description,user_favourites_count,user_followers_count,user_friends_count,user_listed_count,user_location,user_name,user_screen_name,user_statuses_count,user_time_zone,user_urls,user_verified
0,,Mon Mar 01 08:24:19 +0000 2021,,,https://mol.im/a/9310829,8,1366303272345219073,,,,en,,False,,1,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>",Unvaccinated officers 'DO NOT want to continue policing Covid rules'\nIf They Have Stayed In Same County No Problem! But Police Could Establish That From Number Plate! https://t.co/I7dciZrbEl,https://twitter.com/boblister_poole/status/1366303272345219073,Mon Oct 20 17:41:55 +0000 2014,2838333676,False,"Why Haven’t We Stopped Flights From Most Countries Into The UK? Boris Must Get Tough With Cabinet Clearout, Support UK Fishing, Stop Illegal Migrant Boats!",125488,39772,25696,140,"South West, England",Bob For A Full Brexit,boblister_poole,261395,,,False
1,,Mon Mar 01 08:23:00 +0000 2021,NoVaccineForMe,,https://twitter.com/NakkachM/status/1366120607604211712,3,1366302942417027075,,,,en,,False,1.366121e+18,0,,,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>",If beleiving in the opposite truth makes me a conspiracy theorist...then be it....i prefer that to being called a blind follower....#NoVaccineForMe https://t.co/wiU5kp74SE,https://twitter.com/meia_j_hk/status/1366302942417027075,Sun Oct 20 11:07:05 +0000 2019,1185875016237441024,False,ceci n'est pas un vide.....\n\nhttps://t.co/wVUZoOMXLq,35729,2457,2016,1,,مَيَّا,meia_j_hk,8488,,,False
2,,Mon Mar 01 08:33:56 +0000 2021,,,,3,1366305691313520643,MissDemeanor___,1.366301e+18,1.362055e+18,en,,,,0,,,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>","@Rebel__Teacher @LBC @NickFerrariLBC Because the virus would continue to spread around the unvaccinated, possibly with less casualties, but with each new host chances of new mutations, creating new variants increases. These may be both more deadly &amp; impervious to the current vaccines. Even to athletes like yourself.",https://twitter.com/LiberiaGod/status/1366305691313520643,Sat Feb 22 07:54:57 +0000 2020,1231125163229487104,False,Liberians for Trump 2020... kinda...\n\n#MLGA,69,12,20,0,,GodBlessLiberians,LiberiaGod,629,,,False
3,,Mon Mar 01 08:18:30 +0000 2021,,,,0,1366301809405485060,,,,en,,,,16,1.366299e+18,AbdullahHasib2,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>","@piersmorgan \nWhy do you keep describing the lockdown roadmap as 'sensible' when 10 million children and 500,000 mostly unvaccinated teachers are going to be forced back into the often poorly ventilated confines of a classroom come next week? Obvious infection rates will rocket!",https://twitter.com/eveningperson/status/1366301809405485060,Thu May 21 11:44:36 +0000 2009,41570929,False,"Picture valid as ever. Wear a mask. 🏳️‍🌈 💖💜💙 RTs only for interest. Trolls blocked. If you can't sustain reasoned argument, you're muted. Voted in 1975.",3142,3408,4881,12,Shrewsbury & Bristol,Richard Burnham,eveningperson,65105,,https://eveningperson.wordpress.com,False
4,,Mon Mar 01 08:13:06 +0000 2021,,,,0,1366300449880612864,,,,en,,,1.361286e+18,8,1.366176e+18,JumboJones56,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>",@AlexFerguson50 @thearmouryblog @battleaxeBrit1 Max didn’t say that they were mandatory.\n\nHe is talking about stuff like this 👇 https://t.co/c7DqOwFGPb,https://twitter.com/IreneBu62640342/status/1366300449880612864,Tue Nov 17 19:02:13 +0000 2020,1328775348557910016,False,I don't vote Snp and never will I voted to leave the EU #TakeOffYourMask🌸🌸🌸,280933,4080,3272,4,"Scotland, United Kingdom",Irene 🌸,IreneBu62640342,87900,,,False


In [125]:
hydrated_2021_03.shape

(321807, 35)

In [126]:
# Re-importing hydrated April 2021 data

hydrated_2021_04 = pd.read_csv('my_twitter_data/hydrated_2021_04.csv')
hydrated_2021_04.head()

Unnamed: 0,coordinates,created_at,hashtags,media,urls,favorite_count,id,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,lang,place,possibly_sensitive,quote_id,retweet_count,retweet_id,retweet_screen_name,source,text,tweet_url,user_created_at,user_id,user_default_profile_image,user_description,user_favourites_count,user_followers_count,user_friends_count,user_listed_count,user_location,user_name,user_screen_name,user_statuses_count,user_time_zone,user_urls,user_verified
0,,Thu Apr 01 07:04:45 +0000 2021,,,https://twitter.com/Flobga/status/1377346654827802627,0,1377517270667255810,,,,fr,,False,1.377347e+18,40,1.377377e+18,GladysPotiron,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>","Pendant que Macron monopolise les français, cette MONSTRUEUSE déclaration passe inaperçue 😡👎😡😭 https://t.co/XbAcCD0PC3",https://twitter.com/Nathalienath19/status/1377517270667255810,Wed Mar 11 20:09:45 +0000 2020,1237833052170194944,False,@NathalieI -- Parler\n@Nathalienath19 -- solidaritia\n@Nathalienath19 -tme,27930,430,779,0,en quête de vérité,Nath La louve,Nathalienath19,23029,,,False
1,,Thu Apr 01 07:06:33 +0000 2021,,,,0,1377517723798888448,,,,en,,,,1255,1.376814e+18,CrimsonCode1,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>","Everyone; Vaxxed or Unvaxxed whatever your choice, should reject any form of Vacccine Passport. This will be the beginning of the end of freedom as we know it. Chained to a system, of THIER making, with no escape. No one wins, EVERYONE will lose! See the bigger picture!!",https://twitter.com/Manmadeimage/status/1377517723798888448,Sun Mar 13 21:13:03 +0000 2011,265604134,False,"Each person is a prisoner unto themselves. We're being imprisoned by a scientific and materialistic world. The awakened see it as a prison , others normality.",1453,31,92,0,,Scoot #KBF,Manmadeimage,2053,,,False
2,,Thu Apr 01 07:18:35 +0000 2021,,,,1,1377520752006664192,Carolramsden6,1.37732e+18,1.369255e+18,en,,,,0,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>",@Carolramsden6 @RF_Jenkins @zoeharcombe How about doing some serious research? I suggest start with Cutter incident. It’s what put loads of vaxxed people into the iron lungs. Contaminated polio vaccine. And don’t stop there. Keep digging.,https://twitter.com/naomirowling/status/1377520752006664192,Tue Sep 29 15:09:09 +0000 2009,78333947,False,"Artistic essential oil expert, nomad and owner of a Spanish olive grove",889,91,290,1,,Naomi Rowling,naomirowling,456,,http://naomirowling.com,False
3,,Thu Apr 01 07:07:10 +0000 2021,,,https://www.lifesitenews.com/opinion/wristbands-and-dining-cards-new-army-policies-exclude-isolate-unvaccinated?utm_source=LifeSiteNews.com&utm_campaign=c76a269c7d-Daily%2520Headlines%2520-%2520World_COPY_987&utm_medium=email&utm_term=0_12387f0e3e-c76a269c7d-406664550,0,1377517881601224704,,,,und,,False,,1,,,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>",https://t.co/WnqWXs4uKS,https://twitter.com/mandyjanevernon/status/1377517881601224704,Wed Dec 30 19:37:40 +0000 2015,4657017676,True,,126525,1183,4986,69,,Amanda Vernon,mandyjanevernon,158247,,,False
4,,Thu Apr 01 07:19:07 +0000 2021,,,,0,1377520887247814656,,,,en,,,,2323,1.377417e+18,EricMMatheny,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>",I don’t take medical advice from anybody who has ever publicly advocated human depopulation.,https://twitter.com/GeorgeS21302455/status/1377520887247814656,Mon Jul 10 14:35:21 +0000 2017,884420782155120640,True,,42285,435,1024,0,,George S,GeorgeS21302455,36506,,,False


In [127]:
hydrated_2021_04.shape

(386169, 35)

In [233]:
# concatenate dfs

vax_full = pd.concat([hydrated_2020_11, hydrated_2020_12, hydrated_2021_01, 
           hydrated_2021_02, hydrated_2021_03, hydrated_2021_04],
         ignore_index=True)
vax_full.head()

Unnamed: 0,coordinates,created_at,hashtags,media,urls,favorite_count,id,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,lang,place,possibly_sensitive,quote_id,retweet_count,retweet_id,retweet_screen_name,source,text,tweet_url,user_created_at,user_id,user_default_profile_image,user_description,user_favourites_count,user_followers_count,user_friends_count,user_listed_count,user_location,user_name,user_screen_name,user_statuses_count,user_time_zone,user_urls,user_verified
0,,Sun Nov 01 07:04:54 +0000 2020,,,https://twitter.com/LotusOak2/status/1322642287449591814,0,1322796743298265088,,,,en,,False,1.322642e+18,0,,,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>",Stay away from that vaccine. Population control. https://t.co/DTMFGsNRzX,https://twitter.com/BAChatwin/status/1322796743298265088,Mon Sep 19 17:13:57 +0000 2011,376304718,False,☀️🇨🇦🌍Truth☀️🇨🇦🌍 Freedom ☀️🇨🇦🌏Health ☀️🇨🇦🌍Eyes wide open\nAwake patriot and CF veteran (13+yrs),1643,130,658,0,"Niagara, Ontario",Barb Chatwin,BAChatwin,2659,,,False
1,,Sun Nov 01 07:10:02 +0000 2020,,,,0,1322798033596272640,,,,en,,,,489,1.322434e+18,sunnynwaobi1,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>","Isn't this a systematic depopulation happing in Obigbo?, where young unarmed citizens are randomly picked, some tortured while some are killed. #Obigbomassacre @StateDept @AmnestyNigeria @ https://t.co/TsakU0l3Fm",https://twitter.com/jumoke_ladan/status/1322798033596272640,Wed Jul 01 19:30:15 +0000 2009,52824297,False,Am a Christian by God's grace. Called by Christ to serve and love Him as a Catholic. Perpetually loving Him.,7009,224,359,1,"Lagos, Nigeria",Olajumoke Ladan,jumoke_ladan,1812,,,False
2,,Sun Nov 01 07:13:25 +0000 2020,,,,0,1322798887694905346,,,,en,,,,122,1.322792e+18,conspiracyb0t,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>",Bill Gates is one of the biggest proponents of mass depopulation.,https://twitter.com/wambo161/status/1322798887694905346,Fri Apr 08 16:05:41 +0000 2011,279106686,False,Follower of Jesus.,27413,205,711,3,la montaña de roble,James of Oak Mountain,wambo161,48229,,,False
3,,Sun Nov 01 07:31:24 +0000 2020,,,,0,1322803412300431361,,,,en,,,1.322434e+18,41,1.322774e+18,Onyinye51838197,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>",The world should be on notice that Nelson nwike river state governor and the Nigeria army are committing genocide and pogrom in Biafra land https://t.co/fTC8gNNSsZ,https://twitter.com/EmmanuelNwanyim/status/1322803412300431361,Tue Apr 21 07:16:59 +0000 2020,1252496422865375238,False,i was born to shine take it or live it...blessed son by God in heaven..🙌🏼🙌🏼🙌🏼..peace 👍🏻 ...Biafran citizen Israel 🇮🇱 citizen,8947,121,307,1,Israel,Emmanuel nwanyim,EmmanuelNwanyim,16345,,,False
4,,Sun Nov 01 07:01:51 +0000 2020,,,,1,1322795973337403395,bongzmessi,1.32217e+18,217711495.0,en,,,,0,,,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>",@bongzmessi To come to think of it perhaps this was done so they multiply and outnumber us but. To them the depopulation agenda is at top of their list. Phela my King bake baphahluke labantu telling things they were not supposed like the one on an interview that they injected AIDS to blacks,https://twitter.com/Boniswa63207956/status/1322795973337403395,Mon May 25 12:43:06 +0000 2020,1264899714950062080,False,Mayibuye iAfrika✊🏾. \n\nI don't call my people Kings and Queens to tickle my throat it's because we are Royalty 👸🏾🤴🏿.\nTribalism 🤮Feminism 🤮Xenophobia🤮,39392,3658,3680,2,,Boniswa,Boniswa63207956,40107,,,False


In [234]:
# almost 1.1 million rows of tweets

vax_full.shape

(1099909, 35)

## Exploring the combined dataset 

In [216]:
vax_full.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1099909 entries, 0 to 1099908
Data columns (total 35 columns):
 #   Column                      Non-Null Count    Dtype  
---  ------                      --------------    -----  
 0   coordinates                 239 non-null      object 
 1   created_at                  1099909 non-null  object 
 2   hashtags                    163543 non-null   object 
 3   media                       85056 non-null    object 
 4   urls                        240801 non-null   object 
 5   favorite_count              1099909 non-null  int64  
 6   id                          1099909 non-null  int64  
 7   in_reply_to_screen_name     234118 non-null   object 
 8   in_reply_to_status_id       225351 non-null   float64
 9   in_reply_to_user_id         234118 non-null   float64
 10  lang                        1099909 non-null  object 
 11  place                       8677 non-null     object 
 12  possibly_sensitive          313108 non-null   object 
 1

In [217]:
# convert quote_id and retweet_id to int (in order to suppress scientific notation)

vax_full['quote_id'] = vax_full['quote_id'].astype('Int64')
vax_full['retweet_id'] = vax_full['retweet_id'].astype('Int64')

In [218]:
vax_full.head()

Unnamed: 0,coordinates,created_at,hashtags,media,urls,favorite_count,id,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,lang,place,possibly_sensitive,quote_id,retweet_count,retweet_id,retweet_screen_name,source,text,tweet_url,user_created_at,user_id,user_default_profile_image,user_description,user_favourites_count,user_followers_count,user_friends_count,user_listed_count,user_location,user_name,user_screen_name,user_statuses_count,user_time_zone,user_urls,user_verified
0,,Sun Nov 01 07:04:54 +0000 2020,,,https://twitter.com/LotusOak2/status/1322642287449591814,0,1322796743298265088,,,,en,,False,1.3226422874495918e+18,0,,,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>",Stay away from that vaccine. Population control. https://t.co/DTMFGsNRzX,https://twitter.com/BAChatwin/status/1322796743298265088,Mon Sep 19 17:13:57 +0000 2011,376304718,False,☀️🇨🇦🌍Truth☀️🇨🇦🌍 Freedom ☀️🇨🇦🌏Health ☀️🇨🇦🌍Eyes wide open\nAwake patriot and CF veteran (13+yrs),1643,130,658,0,"Niagara, Ontario",Barb Chatwin,BAChatwin,2659,,,False
1,,Sun Nov 01 07:10:02 +0000 2020,,,,0,1322798033596272640,,,,en,,,,489,1.3224342418299822e+18,sunnynwaobi1,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>","Isn't this a systematic depopulation happing in Obigbo?, where young unarmed citizens are randomly picked, some tortured while some are killed. #Obigbomassacre @StateDept @AmnestyNigeria @ https://t.co/TsakU0l3Fm",https://twitter.com/jumoke_ladan/status/1322798033596272640,Wed Jul 01 19:30:15 +0000 2009,52824297,False,Am a Christian by God's grace. Called by Christ to serve and love Him as a Catholic. Perpetually loving Him.,7009,224,359,1,"Lagos, Nigeria",Olajumoke Ladan,jumoke_ladan,1812,,,False
2,,Sun Nov 01 07:13:25 +0000 2020,,,,0,1322798887694905346,,,,en,,,,122,1.322791741854937e+18,conspiracyb0t,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>",Bill Gates is one of the biggest proponents of mass depopulation.,https://twitter.com/wambo161/status/1322798887694905346,Fri Apr 08 16:05:41 +0000 2011,279106686,False,Follower of Jesus.,27413,205,711,3,la montaña de roble,James of Oak Mountain,wambo161,48229,,,False
3,,Sun Nov 01 07:31:24 +0000 2020,,,,0,1322803412300431361,,,,en,,,1.3224342418299822e+18,41,1.3227744158730527e+18,Onyinye51838197,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>",The world should be on notice that Nelson nwike river state governor and the Nigeria army are committing genocide and pogrom in Biafra land https://t.co/fTC8gNNSsZ,https://twitter.com/EmmanuelNwanyim/status/1322803412300431361,Tue Apr 21 07:16:59 +0000 2020,1252496422865375238,False,i was born to shine take it or live it...blessed son by God in heaven..🙌🏼🙌🏼🙌🏼..peace 👍🏻 ...Biafran citizen Israel 🇮🇱 citizen,8947,121,307,1,Israel,Emmanuel nwanyim,EmmanuelNwanyim,16345,,,False
4,,Sun Nov 01 07:01:51 +0000 2020,,,,1,1322795973337403395,bongzmessi,1.32217e+18,217711495.0,en,,,,0,,,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>",@bongzmessi To come to think of it perhaps this was done so they multiply and outnumber us but. To them the depopulation agenda is at top of their list. Phela my King bake baphahluke labantu telling things they were not supposed like the one on an interview that they injected AIDS to blacks,https://twitter.com/Boniswa63207956/status/1322795973337403395,Mon May 25 12:43:06 +0000 2020,1264899714950062080,False,Mayibuye iAfrika✊🏾. \n\nI don't call my people Kings and Queens to tickle my throat it's because we are Royalty 👸🏾🤴🏿.\nTribalism 🤮Feminism 🤮Xenophobia🤮,39392,3658,3680,2,,Boniswa,Boniswa63207956,40107,,,False


In [219]:
vax_full.shape

(1099909, 35)

### Retweets 

In [220]:
# more than half the 1.1 million tweets are re-tweets

(~(vax_full['retweet_id'].isnull())).sum()

640621

In [221]:
vax_full['retweet_id'].nunique()

75980

In [222]:
# the most common retweets

vax_full.groupby('retweet_id')['retweet_id'].count(). \
sort_values(ascending=False).head(10)

retweet_id
1379184293138857984    27902
1346602188953559040    20326
1379832806902206464    14516
1366957856323469312    6836 
1379200046957215744    6743 
1370200113977700352    5376 
1342085913782923264    4683 
1375160759324352512    3972 
1349810559986839552    3962 
1380906061813661696    3506 
Name: retweet_id, dtype: int64

In [223]:
# looks like retweeet text is all identical - makes sense
# not that this tweet w/ 27K re-tweets is actually pro-vaccine
# keyword was probably "vaxxed"

vax_full[vax_full['retweet_id'] == 1379184293138857984]

Unnamed: 0,coordinates,created_at,hashtags,media,urls,favorite_count,id,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,lang,place,possibly_sensitive,quote_id,retweet_count,retweet_id,retweet_screen_name,source,text,tweet_url,user_created_at,user_id,user_default_profile_image,user_description,user_favourites_count,user_followers_count,user_friends_count,user_listed_count,user_location,user_name,user_screen_name,user_statuses_count,user_time_zone,user_urls,user_verified
788525,,Mon Apr 05 21:30:18 +0000 2021,,,,0,1379184647670730753,,,,en,,,,36735,1379184293138857984,GrillmoreSlim,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","This lady on the plane said ""vaxxed and waxed, baby, I'm ready for some action on this trip."" and that should be everybody's vibe this summer.",https://twitter.com/lilakeys21/status/1379184647670730753,Tue Jul 27 01:28:03 +0000 2010,171298581,False,,2293,1297,1104,3,Jersey,The Cunt Conductor,lilakeys21,99162,,,False
788526,,Mon Apr 05 21:33:34 +0000 2021,,,,0,1379185470442201097,,,,en,,,,36735,1379184293138857984,GrillmoreSlim,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","This lady on the plane said ""vaxxed and waxed, baby, I'm ready for some action on this trip."" and that should be everybody's vibe this summer.",https://twitter.com/50sombrasdegris/status/1379185470442201097,Mon May 12 22:42:57 +0000 2014,2543923811,False,becoming my better self / 🇵🇷,13477,593,443,3,enrique 💍,grey 🌪,50sombrasdegris,20423,,,False
788539,,Mon Apr 05 21:31:23 +0000 2021,,,,0,1379184920153649153,,,,en,,,,36735,1379184293138857984,GrillmoreSlim,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","This lady on the plane said ""vaxxed and waxed, baby, I'm ready for some action on this trip."" and that should be everybody's vibe this summer.",https://twitter.com/trusttheunseen/status/1379184920153649153,Tue Jul 31 02:40:25 +0000 2012,727391077,False,Simply blessed.,12505,922,865,0,,E,trusttheunseen,23382,,,False
788545,,Mon Apr 05 21:32:04 +0000 2021,,,,0,1379185090123694083,,,,en,,,,36735,1379184293138857984,GrillmoreSlim,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","This lady on the plane said ""vaxxed and waxed, baby, I'm ready for some action on this trip."" and that should be everybody's vibe this summer.",https://twitter.com/_FefeXO/status/1379185090123694083,Thu Jul 02 02:40:10 +0000 2009,52942895,False,25| cerified lover girl. 🏳️‍🌈🇵🇷 | Cancer 🌞Leo 🌛Capricorn Asc,33196,941,507,10,"Texas, USA",CHAMPAGNE MAMÍ✨,_FefeXO,94389,,,False
788549,,Mon Apr 05 21:29:44 +0000 2021,,,,0,1379184504837902337,,,,en,,,,36735,1379184293138857984,GrillmoreSlim,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","This lady on the plane said ""vaxxed and waxed, baby, I'm ready for some action on this trip."" and that should be everybody's vibe this summer.",https://twitter.com/BryanVillone/status/1379184504837902337,Thu Jan 29 17:21:23 +0000 2009,19721340,False,Stand Up Comedian. Cheapskate. IG: BryanVillone,191,399,200,9,NJ,Global megastar. Hero to many peasant.,BryanVillone,28458,,https://www.eventbrite.com/e/down-to-the-last-bit-tickets-167885274375?ref=eios,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1082848,,Tue Apr 20 00:47:58 +0000 2021,,,,0,1384307823417303041,,,,en,,,,36735,1379184293138857984,GrillmoreSlim,"<a href=""http://www.echofon.com/"" rel=""nofollow"">Echofon</a>","This lady on the plane said ""vaxxed and waxed, baby, I'm ready for some action on this trip."" and that should be everybody's vibe this summer.",https://twitter.com/drollpatrol/status/1384307823417303041,Thu Dec 17 16:26:27 +0000 2009,97475545,False,Black Lives Matter (https://t.co/IVcaM20Hrc)\n\nSocial Distancing Expert\n\nKing Lear Act 4 Scene 6 Line 60\n\nShe/her\n\nCashApp $DrollPatrol\n\n\n(Painting by Kyra Kendall),18183,410,1348,38,Inner World,Delagrammatikas,drollpatrol,13632,,,False
1083946,,Tue Apr 20 03:01:09 +0000 2021,,,,0,1384341339702235143,,,,en,,,,36735,1379184293138857984,GrillmoreSlim,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","This lady on the plane said ""vaxxed and waxed, baby, I'm ready for some action on this trip."" and that should be everybody's vibe this summer.",https://twitter.com/kaushiiveeno/status/1384341339702235143,Fri Apr 01 20:23:42 +0000 2011,275697595,False,can neither confirm or deny that im a gemini,46157,279,193,2,tor-iunno,♔ kay ♔,kaushiiveeno,20926,,,False
1085584,,Tue Apr 20 07:08:29 +0000 2021,,,,0,1384403582158479362,,,,en,,,,36735,1379184293138857984,GrillmoreSlim,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>","This lady on the plane said ""vaxxed and waxed, baby, I'm ready for some action on this trip."" and that should be everybody's vibe this summer.",https://twitter.com/whathefaz/status/1384403582158479362,Mon Aug 16 06:05:35 +0000 2010,178989564,False,Positive outcomes only. #CODEBLACKMY\n#PelajarBukanPengantin #SchoolNotSpouse #MigranJugaManusia #PrayforPalestine #PrayforMyanmar #HutanPergiMana,205767,553,94,3,,Faz,whathefaz,153716,,http://instagram.com/faz367_,False
1090387,,Tue Apr 20 16:26:06 +0000 2021,,,,0,1384543908655517698,,,,en,,,,36735,1379184293138857984,GrillmoreSlim,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","This lady on the plane said ""vaxxed and waxed, baby, I'm ready for some action on this trip."" and that should be everybody's vibe this summer.",https://twitter.com/trcpicalove/status/1384543908655517698,Fri Sep 22 22:14:24 +0000 2017,911353010957246466,False,i solve my problems by blatantly ignoring them and going on the internet,3753,16,100,0,she/her | 23 | hungary,mónika 🥀,trcpicalove,8927,,,False


### Quote tweets 

In [224]:
# 292K out of 1.1 million tweets are quotes

(~(vax_full['quote_id'].isnull())).sum()

292572

In [225]:
# 156K are both quotes and retweets

((~(vax_full['quote_id'].isnull())) & \
(~(vax_full['retweet_id'].isnull()))).sum()

156634

In [226]:
# the most common quote tweets

vax_full.groupby('quote_id')['quote_id'].count(). \
sort_values(ascending=False).head(10)

quote_id
1346525695879614464    20486
1372504222248415232    3809 
1377647623339905024    2990 
1342085913782923264    2815 
1379184293138857984    2400 
1333129526826721280    2160 
1362783010597310464    2072 
1346602188953559040    1879 
1370735174137298944    1777 
1380606469004009472    1676 
Name: quote_id, dtype: int64

In [227]:
# looks like quote tweets are sometimes identical, but sometimes not

vax_full[vax_full['quote_id'] == 1346525695879614464].head(10)

Unnamed: 0,coordinates,created_at,hashtags,media,urls,favorite_count,id,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,lang,place,possibly_sensitive,quote_id,retweet_count,retweet_id,retweet_screen_name,source,text,tweet_url,user_created_at,user_id,user_default_profile_image,user_description,user_favourites_count,user_followers_count,user_friends_count,user_listed_count,user_location,user_name,user_screen_name,user_statuses_count,user_time_zone,user_urls,user_verified
137165,,Tue Jan 05 20:17:37 +0000 2021,,,https://twitter.com/therecount/status/1346525695879614467,23,1346551449199775746,,,,en,,False,1346525695879614464,4,,,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>","Yeah, because what you *REALLY* want is unvaccinated people handling the food supply because they don't have citizenship paperwork. 🙄🙄🙄🙄🙄 https://t.co/4gfG58eEK1",https://twitter.com/RGibsongirl/status/1346551449199775746,Fri Nov 20 20:57:15 +0000 2009,91423884,False,Corset scholar; bioanth PhD; chaotic neutral--harlequin aspect; bibliophile; tattoo collector; advocate for the dead; robot sex analyst; bad example for hire.,117226,5616,6176,16,,Dr. Rebecca Gibson,RGibsongirl,42698,,http://amazon.com/author/rebeccagibsonbioanthro,False
137185,,Tue Jan 05 20:22:37 +0000 2021,,,,0,1346552706006040576,,,,en,,,1346525695879614464,4,1.3465514491997755e+18,RGibsongirl,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>","Yeah, because what you *REALLY* want is unvaccinated people handling the food supply because they don't have citizenship paperwork. 🙄🙄🙄🙄🙄 https://t.co/4gfG58eEK1",https://twitter.com/Jerri_Lynn25/status/1346552706006040576,Sat Nov 09 03:16:22 +0000 2019,1193004107759529986,False,"“𝒯𝒽𝑒 𝓃𝑒𝓍𝓉 𝓉𝒾𝓂𝑒 𝓈𝑜𝓂𝑒𝑜𝓃𝑒‘𝓈 𝓉𝑒𝒶𝒸𝒽𝒾𝓃𝑔, 𝓌𝒽𝓎 𝒹𝑜𝓃‘𝓉 𝓎𝑜𝓊 𝑔𝑒𝓉 𝓉𝒶𝓊𝑔𝒽𝓉?“",109620,5219,2887,7,Here & There,JerriLynn,Jerri_Lynn25,72221,,,False
137199,,Tue Jan 05 20:17:56 +0000 2021,,,,0,1346551528849633280,,,,en,,,1346525695879614464,4,1.3465514491997755e+18,RGibsongirl,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>","Yeah, because what you *REALLY* want is unvaccinated people handling the food supply because they don't have citizenship paperwork. 🙄🙄🙄🙄🙄 https://t.co/4gfG58eEK1",https://twitter.com/SilviPastured/status/1346551528849633280,Sat Feb 22 17:32:01 +0000 2020,1231270345300160518,False,Settler focused on Earth Service via climate-resilient food security.\n\n#NoPrideInGenocide #LandBack\n\n#KeepItInTheGround #ClimateStrike #Divest #FoodNotLawns,192627,1335,2350,13,So-called Canada,Practice Courage,SilviPastured,103084,,,False
137201,,Tue Jan 05 20:18:33 +0000 2021,,,,0,1346551683598446598,,,,en,,,1346525695879614464,4,1.3465514491997755e+18,RGibsongirl,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>","Yeah, because what you *REALLY* want is unvaccinated people handling the food supply because they don't have citizenship paperwork. 🙄🙄🙄🙄🙄 https://t.co/4gfG58eEK1",https://twitter.com/dalydes/status/1346551683598446598,Sun Apr 03 16:08:33 +0000 2011,276555350,False,Attempting to ally. #BLMTO #wetsuwetenstrong #MMIW #RightsnotRescue #Antifa Dish with One Spoon He/his,176444,892,4373,42,Toronto,mind that magnifies the smallest matter,dalydes,87929,,,False
137219,,Tue Jan 05 20:36:39 +0000 2021,,,,0,1346556235764535299,,,,en,,,1346525695879614464,43,1.346556164214055e+18,LouisatheLast,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>",This is ridiculous. Do you want sick people handling your food? Do you think vaccines work so perfectly that unvaccinated workers won’t get vaccinated coworkers sick? What the fuck https://t.co/oBA9x2ji83,https://twitter.com/aboynamedart/status/1346556235764535299,Sun Nov 23 23:11:55 +0000 2008,17580293,False,"Team @erumors. He/His. @Racialicious Forever. Bylines: SyfyWire, Motherboard, Rolling Stone, Raw Story. Notorious anti-white racist.' -- Breitbart",44646,4103,4810,217,,Arturo R. Garcia,aboynamedart,153723,,http://racialicious.com,False
137225,,Tue Jan 05 20:37:37 +0000 2021,,,,0,1346556482683285504,,,,en,,,1346525695879614464,43,1.346556164214055e+18,LouisatheLast,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>",This is ridiculous. Do you want sick people handling your food? Do you think vaccines work so perfectly that unvaccinated workers won’t get vaccinated coworkers sick? What the fuck https://t.co/oBA9x2ji83,https://twitter.com/SouthrnGothHick/status/1346556482683285504,Sun Jul 04 02:01:47 +0000 2010,162566080,False,"Paul\n\nbisexual with an aquarium. might be scotts irish?\n|\nstudied Latin & gender; now, studies biochem\n|\nlikes: peppers, snakes, & spiders\n|\nThey/Them/Y'all.",334002,1319,2679,2,Alabama,crass iron skillet,SouthrnGothHick,125234,,,False
137227,,Tue Jan 05 20:38:35 +0000 2021,,,,0,1346556723423801346,,,,en,,,1346525695879614464,43,1.346556164214055e+18,LouisatheLast,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>",This is ridiculous. Do you want sick people handling your food? Do you think vaccines work so perfectly that unvaccinated workers won’t get vaccinated coworkers sick? What the fuck https://t.co/oBA9x2ji83,https://twitter.com/bunrxm/status/1346556723423801346,Tue Nov 24 00:36:59 +0000 2009,92157633,False,enby bun. they/them. 🐰🤓🤖,167918,1937,1945,9,¯\_(ツ)_/¯,rxbun,bunrxm,994275,,,False
137228,,Tue Jan 05 20:48:25 +0000 2021,,,,0,1346559198247727110,,,,en,,,1346525695879614464,43,1.346556164214055e+18,LouisatheLast,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>",This is ridiculous. Do you want sick people handling your food? Do you think vaccines work so perfectly that unvaccinated workers won’t get vaccinated coworkers sick? What the fuck https://t.co/oBA9x2ji83,https://twitter.com/SamanthaEich/status/1346559198247727110,Sat Nov 05 05:26:19 +0000 2011,405338829,False,"librarian, couch potato, very enthusiastic talker | she/her",3924,131,251,1,a comfy chair,Samantha 🏳️‍🌈,SamanthaEich,5894,,,False
137234,,Tue Jan 05 20:36:22 +0000 2021,,,https://twitter.com/therecount/status/1346525695879614467,157,1346556164214054918,,,,en,,False,1346525695879614464,43,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>",This is ridiculous. Do you want sick people handling your food? Do you think vaccines work so perfectly that unvaccinated workers won’t get vaccinated coworkers sick? What the fuck https://t.co/oBA9x2ji83,https://twitter.com/LouisatheLast/status/1346556164214054918,Tue Dec 02 00:47:39 +0000 2008,17794636,False,"Big mean sweaty dyke just looking for trouble. Friend of Garak. Cis-ish, she/her, graphic designer, race traitor, obesity glorifier. Opinions all mine. 🌹🍞🌹",334406,33261,2127,236,,Louisa 🌈👭,LouisatheLast,193614,,,False
137235,,Tue Jan 05 20:37:34 +0000 2021,,,,0,1346556469362249728,,,,en,,,1346525695879614464,43,1.346556164214055e+18,LouisatheLast,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>",This is ridiculous. Do you want sick people handling your food? Do you think vaccines work so perfectly that unvaccinated workers won’t get vaccinated coworkers sick? What the fuck https://t.co/oBA9x2ji83,https://twitter.com/ForthWyn/status/1346556469362249728,Sat May 30 16:24:25 +0000 2009,43551122,False,"Eve, 26, fantasy writer, web developer in training, Salford Uni graduate. She/her",41385,575,1958,35,England,📖Eve (she/her),ForthWyn,100540,,,False


### Language 

In [228]:
# most are in English, but not all

vax_full['lang'].value_counts()

en     991130
und    53757 
fr     12193 
es     7604  
ja     5112  
de     4842  
nl     4571  
tr     2266  
it     2249  
ar     1777  
pt     1572  
gu     1511  
in     1445  
pl     1412  
zh     882   
hi     778   
sv     709   
ca     536   
et     528   
el     522   
ko     423   
tl     415   
ht     383   
ru     359   
fi     345   
da     320   
cs     296   
ta     239   
sr     202   
ro     197   
iw     196   
sl     162   
no     136   
th     128   
cy     111   
te     105   
fa     89    
lt     88    
lv     64    
hu     61    
is     40    
eu     34    
ur     25    
bg     17    
uk     16    
ne     10    
mr     9     
vi     9     
kn     7     
am     6     
ml     6     
bn     5     
dv     4     
or     3     
pa     2     
ps     1     
Name: lang, dtype: int64

In [229]:
# 'und' means language was undetected
# seems like these tweets don't have much text

vax_full[vax_full['lang'] == 'und']

Unnamed: 0,coordinates,created_at,hashtags,media,urls,favorite_count,id,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,lang,place,possibly_sensitive,quote_id,retweet_count,retweet_id,retweet_screen_name,source,text,tweet_url,user_created_at,user_id,user_default_profile_image,user_description,user_favourites_count,user_followers_count,user_friends_count,user_listed_count,user_location,user_name,user_screen_name,user_statuses_count,user_time_zone,user_urls,user_verified
17,,Sun Nov 01 07:09:57 +0000 2020,Nomask MaskOFF NoVaccine,https://twitter.com/Schille18336614/status/1322798014566703104/photo/1,,0,1322798014566703104,FOX29philly,1.322796e+18,1.478771e+07,und,,False,,0,,,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>",@FOX29philly #Nomask #MaskOFF #NoVaccine https://t.co/55ZzqhJFo8,https://twitter.com/Schille18336614/status/1322798014566703104,Mon Sep 14 02:01:02 +0000 2020,1305325569815515136,False,„Sei ein Freund der Schwachen und liebe die Gerechtigkeit.“\n\n#Testboykott #NeinzumImpfzwang\n#RücktrittBundesregierung ⚔ #impfapartheid #Ungehorsam,7499,231,389,0,Linköping,Schiller ✝⚔⛪ 🇩🇪🇦🇹,Schille18336614,7577,,https://report24.news/,False
20,,Sun Nov 01 07:15:10 +0000 2020,,,https://twitter.com/sunnynwaobi1/status/1322434241829982208,0,1322799327044161537,ARISEtv,,1.087779e+09,und,,False,1322434241829982208,0,,,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>",@ARISEtv https://t.co/n5y4J0oAlU,https://twitter.com/kenzykayoficial/status/1322799327044161537,Mon Jan 03 16:02:06 +0000 2011,233576830,False,SEO Specialist / music lover / Digital Marketer / Red Devil,124,339,898,3,Nigeria,KenzyKay ¦ 20-10-20💔,kenzykayoficial,5179,,,False
40,,Sun Nov 01 07:35:59 +0000 2020,,,https://twitter.com/conspiracyb0t/status/1322791741854937089,0,1322804564559499265,ashwani_mahajan,,2.156856e+08,und,,False,1322791741854937088,0,,,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>",@ashwani_mahajan https://t.co/camEcwIpvo,https://twitter.com/devsr84/status/1322804564559499265,Fri Apr 23 07:18:45 +0000 2010,136185881,False,"Working on Ergonomics, IITian, Blogger, Tech Enthusiast, Wannabe Entrepreneur, Student of History & Geo-Politics ...",6391,704,4980,15,Mumbai,Devindra Singh,devsr84,55837,,https://www.twitter.com/devsr84,False
50,,Sun Nov 01 07:42:47 +0000 2020,EndSARS EndMilitaryBrutality EndPoliceBrutalityinNigera EndBadGoveranceInNigeria,,,0,1322806277760864256,,,,und,,,1322434241829982208,2,1322611480898973696,Bubblybee_Chi,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>",#EndSARS\n#EndMilitaryBrutality \n#EndPoliceBrutalityinNigera \n#EndBadGoveranceInNigeria https://t.co/ykBOkVRRLc,https://twitter.com/IkeLObidike/status/1322806277760864256,Tue Feb 22 10:38:12 +0000 2011,255949363,False,"Chemical Engineer (PhD), Author (Shifting Sands), Pro-Life, Non-absent Dad, Football Analyst, Arsenal fan, Scrabbler, Pragmatists, Humanist, Christian.",8468,419,329,8,Johannesburg,Dr. Ike Obidike,IkeLObidike,12310,,,False
72,,Sun Nov 01 08:18:55 +0000 2020,,,https://twitter.com/ForcedAdoption1/status/1322810228988026880,0,1322815370332266499,hakki501,1.322810e+18,4.013243e+08,und,,False,1322810228988026880,0,,,"<a href=""https://mobile.twitter.com"" rel=""nofollow"">Twitter Web App</a>",https://t.co/H9H559xnhp,https://twitter.com/hakki501/status/1322815370332266499,Sun Oct 30 11:27:55 +0000 2011,401324299,False,"Nothing to boast of except the cross of Jesus Christ.\nSaved by His grace through faith. (Ephesians 2: 8,9)",26734,905,1377,50,Temporary residence: Earth. Pe,J.,hakki501,29961,,,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1099727,,Wed Apr 21 06:27:00 +0000 2021,Mask Masks FaceMask FaceMasks FaceDiaper FaceDiapers MaskMandate MaskMandates MedicalFreedom,,https://www.instagram.com/p/CN6vxB0AjXd/?igshid=lqy30ohrdn9b,0,1384755531299045376,,,,und,,False,,0,,,"<a href=""http://instagram.com"" rel=""nofollow"">Instagram</a>",#Mask #Masks #FaceMask #FaceMasks #FaceDiaper #FaceDiapers #MaskMandate #MaskMandates #MedicalFreedom https://t.co/naFkIu1W6F,https://twitter.com/JMichW87/status/1384755531299045376,Tue Jun 16 16:17:56 +0000 2020,1272926339696398336,True,,383,6,222,0,"Hollywood, FL",Jamie White,JMichW87,1639,,,False
1099790,,Wed Apr 21 06:39:45 +0000 2021,,,https://twitter.com/kurtwearshats/status/1384564021202882565,1,1384758740100861955,,,,und,"Irvine, CA",False,1384564021202882560,0,,,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>",😂😂😂😂😂 https://t.co/dlBkxWQyZ2,https://twitter.com/angel0bugatti/status/1384758740100861955,Sat May 12 19:28:45 +0000 2012,578330624,False,Middle-aged black man with sass. Big butt. Bigger heart. | Debater.,36628,268,930,4,"Irvine, CA",Angelo,angel0bugatti,46080,,https://vsco.co/angelo-bugatti,False
1099799,,Wed Apr 21 06:43:15 +0000 2021,,,https://montanadailygazette.com/2021/04/16/unvaccinated-women-report-miscarriages-after-interactions-with-vaccinated-people/,0,1384759617528340483,,,,und,,False,,0,,,"<a href=""http://twitter.com/download/android"" rel=""nofollow"">Twitter for Android</a>",https://t.co/rZmyXHcIZi,https://twitter.com/AlainBeydoun/status/1384759617528340483,Thu Mar 08 08:52:28 +0000 2012,518323576,False,,67,13,28,0,cairns,byblos cafe,AlainBeydoun,138,,http://bybloscafe.com.au,False
1099813,,Wed Apr 21 06:39:54 +0000 2021,,,,0,1384758774301224962,,,,und,,,1384564021202882560,49,1384738849180450816,KingJosiah54,"<a href=""http://twitter.com/download/iphone"" rel=""nofollow"">Twitter for iPhone</a>",😂😂😂😂😂😂 https://t.co/EuGgqxKscL,https://twitter.com/SpcMtn/status/1384758774301224962,Fri Apr 10 03:46:16 +0000 2009,30153135,False,"As long as I’m in Polo smiling, they think they got me But they would try to crack me if they ever see a black me.",34189,294,3370,8,somewhr in outerspace,Petty Mahomes,SpcMtn,35105,,,False


### Hashtags 

In [230]:
# most commonly used hashtags (although some of these may appear common because of retweets)
# some are clearly anti-vaccine, while others are neutral

vax_full['hashtags'].value_counts().head(30)

NoVaccineForMe                              4368
COVID19                                     4266
mybodymychoice                              3514
FireFauci WeWillNotComply                   3118
MyBodyMyChoice                              3018
InformedConsent                             2555
VaccineforSouthAfrica                       2197
BillGatesBioTerrorist                       2154
ArrestBillGates                             2027
NoVaccine                                   1893
IDoNotConsent                               1284
COVID                                       850 
depopulation                                837 
vaccine                                     700 
LongCovid                                   690 
BREAKING COVID19                            683 
GreatReset                                  675 
vaccines                                    637 
VAXXED                                      537 
arrestbillgates                             530 
ScottyDoesNothing   

## Clean dataset

In [1]:
# full dataset contains 45K unique tweets 

vax_full['text'].nunique()

NameError: name 'vax_full' is not defined

In [247]:
# dropping duplicate tweets

vax_tweets = vax_full.drop_duplicates(subset='text', keep='first')
vax_tweets.shape

(457573, 35)

In [248]:
# keeping only tweets that are in English

vax_tweets = vax_tweets[vax_tweets['lang'] == 'en']
vax_tweets.shape

(396605, 35)

In [249]:
vax_tweets.to_csv('vax_tweets.csv', index=False)