Importation of packages

In [20]:
import filter_data  # class containing filter methods 
import date_handling  # ￼class containing date related methods
import tweet_analysis  # class containing the method for the content analysis of the tweets. 
import constants  # import global constants
import plot # plotting functions 
import dictionnaries # defined dictionaries
import numpy as np # numpy for array handling
import percentage # compute percentage

# Reload each module
import importlib

importlib.reload(filter_data)
importlib.reload(date_handling)
importlib.reload(tweet_analysis)
importlib.reload(constants)
importlib.reload(plot)
importlib.reload(dictionnaries)



<module 'dictionnaries' from '/home/loris/Desktop/populist_russians_bots/dictionnaries.py'>

Definition of constants 


In [2]:
START_DATE = np.datetime64('2014-07-01') # Date of the first published tweet in English 
END_DATE = np.datetime64('2018-06-01') # Date of the last published tweet in English 


Filter the data : 
- Keep only tweets written in english 
- Remove links in the tweets
- Remove columns that we do not need 
- Reformat dates

In [10]:
# Warning: This cell needs to be run only once. 

# Define the set of column indices to keep
COLUMN_INDICES = {1, 2, 5, 13}

# Call the function with the input file name and column indices
for i in range(1, 14): 
    input_path= constants.RAW_DATA_PATH.format(i)
    output_path= constants.PRE_PROCESSED_DATA_PATH.format(i)
    filter_data.filter_csv(input_path, output_path, COLUMN_INDICES, 4, "English") # Keep only tweets in english 
    filter_data.extract_date_columns(output_path, output_path)
    filter_data.remove_links(output_path, output_path)
    filter_data.convert_to_lowercase(output_path, output_path)

Get the array of dates that span the period of the tweets

In [3]:
date_array = date_handling.get_date_array(START_DATE, END_DATE)

(Optional) Plot the number of tweets over the period

In [4]:
nb_of_tweets_per_day = tweet_analysis.get_nb_of_tweets_per_day(date_array)
print(nb_of_tweets_per_day.sum(axis=0))


2116719.0


In [13]:
%matplotlib qt

plot.plot_result(date_array, nb_of_tweets_per_day, START_DATE, END_DATE)




qt.qpa.wayland: Failed to initialize EGL display 3001


Dictionary analysis


In [14]:
nb_of_tweets_filtered_with_dic = tweet_analysis.dataset_analysis(dictionnaries.dic_rooduijn_pauwels, date_array)


Plot result of dictionary analysis

In [15]:
plot.plot_result(date_array,nb_of_tweets_filtered_with_dic, START_DATE, END_DATE)

Get percentage of tweets with at least one words over the total nb of tweets

In [16]:
percentage = percentage.percentage_of_tweets(nb_of_tweets_filtered_with_dic, nb_of_tweets_per_day)
plot.plot_result(date_array, percentage, START_DATE, END_DATE)

# Populist dictionaries 


Extract the dictionaries of populist expressions

In [26]:
anti_elitism_dictionary = dictionnaries.extract_grundle_translation_dictionary("anti-elitism")
people_centrism_dictionary = dictionnaries.extract_grundle_translation_dictionary("people-centrism")
sovereignty_dictionary = dictionnaries.extract_grundle_translation_dictionary("sovereignty")
print("anti-elitism dictionary contains {} words".format(len(anti_elitism_dictionary)))
print("people_centrism dictionary contains {} words".format(len(people_centrism_dictionary)))
print("sovereignty dictionary contains {} words".format(len(sovereignty_dictionary)))
all_criteria_dictionary = anti_elitism_dictionary.union(people_centrism_dictionary).union(sovereignty_dictionary)
print("dictionary of all criteria contains {} words".format(len(all_criteria_dictionary)))

anti-elitism dictionary contains 7288 words
people_centrism dictionary contains 3135 words
sovereignty dictionary contains 426 words
dictionary of all criteria contains 10833 words


# Analysis

Perform the analysis for all criteria

In [27]:
nb_of_tweets_with_at_least_one_criteria = tweet_analysis.dataset_analysis(all_criteria_dictionary, date_array)
np.save("./cached_results/nb_of_tweets_with_at_least_one_criteria.npy", nb_of_tweets_with_at_least_one_criteria)

processing file 1
1.811981201171875e-05
processing file 2
195.377601146698
processing file 3
382.3301694393158
processing file 4
586.1631543636322
processing file 5
768.7021989822388
processing file 6
927.6805799007416
processing file 7
1129.1831333637238
processing file 8
1273.5779247283936
processing file 9
1388.289869070053
processing file 10
1500.284558057785
processing file 11
1661.5693047046661
processing file 12
1834.3246793746948
processing file 13
1993.6234483718872


Perform the anti-elitism analysis 

In [6]:
nb_of_tweets_anti_elitist = tweet_analysis.dataset_analysis(anti_elitism_dictionary,date_array)
np.save('./cached_results/nb_of_tweets_anti_elitist.npy', nb_of_tweets_anti_elitist) # Store result in storage

processing file 1
0.0003180503845214844


KeyboardInterrupt: 

Perform the people_centrism analysis 


In [13]:
nb_of_tweets_people_centrism = tweet_analysis.dataset_analysis(people_centrism_dictionary,date_array)
np.save('./cached_results/nb_of_tweets_people_centrism.npy', nb_of_tweets_people_centrism)

processing file 1
8.440017700195312e-05
processing file 2
60.74401068687439
processing file 3
119.36048793792725
processing file 4
183.60057997703552
processing file 5
241.99017453193665
processing file 6
290.83711409568787
processing file 7
352.8362989425659
processing file 8
397.9904148578644
processing file 9
434.26134753227234
processing file 10
469.16220331192017
processing file 11
518.4147465229034
processing file 12
572.2147982120514
processing file 13
622.5426344871521


Perform the sovereignty analysis

In [14]:
nb_of_tweets_sovereignty = tweet_analysis.dataset_analysis(sovereignty_dictionary, date_array)
np.save('./cached_results/nb_of_tweets_sovereignty.npy', nb_of_tweets_sovereignty)

processing file 1
0.0002989768981933594
processing file 2
8.894880056381226
processing file 3
17.40390396118164
processing file 4
26.74119734764099
processing file 5
35.13206362724304
processing file 6
42.358131408691406
processing file 7
51.51526498794556
processing file 8
58.053232192993164
processing file 9
63.371689796447754
processing file 10
68.62022280693054
processing file 11
75.86168646812439
processing file 12
83.84404349327087
processing file 13
91.11701703071594


Perform the analysis for the Right Trolls 

In [6]:
nb_tweets_right_anti_elitist = tweet_analysis.dataset_analysis(anti_elitism_dictionary, date_array, "RightTroll")
nb_tweets_right_people_centrism = tweet_analysis.dataset_analysis(people_centrism_dictionary, date_array, "RightTroll")
nb_tweets_right_sovereignty = tweet_analysis.dataset_analysis(sovereignty_dictionary, date_array, "RightTroll")
np.save("./cached_results/nb_tweets_right_anti_elitist.npy", nb_tweets_right_anti_elitist)
np.save("./cached_results/nb_tweets_right_people_centrism.npy", nb_tweets_right_people_centrism)
np.save("./cached_results/nb_tweets_right_sovereignty.npy", nb_tweets_right_sovereignty)

processing file 1
0.0004076957702636719
processing file 2
215.3645522594452
processing file 3
411.87449622154236
processing file 4
635.8055195808411
processing file 5
844.0977220535278
processing file 6
1023.1581933498383
processing file 7
1247.3113045692444
processing file 8
1402.2921578884125
processing file 9
1526.9980773925781
processing file 10
1645.093108177185
processing file 11
1827.134846687317
processing file 12
2019.8090846538544
processing file 13
2190.700097799301
processing file 1
6.532669067382812e-05
processing file 2
97.64743971824646
processing file 3
193.1334023475647
processing file 4
296.4744896888733
processing file 5
391.2334864139557
processing file 6
469.1361665725708
processing file 7
567.8150594234467
processing file 8
641.1044006347656
processing file 9
700.5750844478607
processing file 10
757.2609882354736
processing file 11
837.755576133728
processing file 12
925.376757144928
processing file 13
1007.297886133194
processing file 1
8.0108642578125e-05
proces

Perform the analysis for the Left Trolls 

In [22]:
nb_tweets_left_anti_elitist = tweet_analysis.dataset_analysis(anti_elitism_dictionary, date_array, "LeftTroll")
nb_tweets_left_people_centrism = tweet_analysis.dataset_analysis(people_centrism_dictionary, date_array, "LeftTroll")
nb_tweets_left_sovereignty = tweet_analysis.dataset_analysis(sovereignty_dictionary, date_array, "LeftTroll")
np.save("./cached_results/nb_tweets_left_anti_elitist.npy", nb_tweets_left_anti_elitist)
np.save("./cached_results/nb_tweets_left_people_centrism.npy", nb_tweets_left_people_centrism)
np.save("./cached_results/nb_tweets_left_sovereignty.npy", nb_tweets_left_sovereignty)

processing file 1
9.465217590332031e-05
processing file 2
134.24425339698792
processing file 3
266.6309189796448
processing file 4
411.54574489593506
processing file 5
541.5891797542572
processing file 6
655.1624433994293
processing file 7
800.6519958972931
processing file 8
902.3444714546204
processing file 9
981.052775144577
processing file 10
1057.9173364639282
processing file 11
1168.429649591446
processing file 12
1287.0131826400757
processing file 13
1396.4761803150177
processing file 1
5.245208740234375e-05
processing file 2
60.504013776779175
processing file 3
118.68164873123169
processing file 4
182.0705235004425
processing file 5
239.95427536964417
processing file 6
288.56716203689575
processing file 7
350.1775426864624
processing file 8
394.83086609840393
processing file 9
431.0025939941406
processing file 10
465.645920753479
processing file 11
514.6358647346497
processing file 12
568.108395576477
processing file 13
618.1072518825531
processing file 1
7.224082946777344e-05
p

Perform the analysis for the News Feed

In [23]:
nb_tweets_news_anti_elitist = tweet_analysis.dataset_analysis(anti_elitism_dictionary, date_array, "NewsFeed")
nb_tweets_news_people_centrism = tweet_analysis.dataset_analysis(people_centrism_dictionary, date_array, "NewsFeed")
nb_tweets_news_sovereignty = tweet_analysis.dataset_analysis(sovereignty_dictionary, date_array, "NewsFeed")
np.save("./cached_results/nb_tweets_news_anti_elitist.npy", nb_tweets_news_anti_elitist)
np.save("./cached_results/nb_tweets_news_people_centrism.npy", nb_tweets_news_people_centrism)
np.save("./cached_results/nb_tweets_news_sovereignty.npy", nb_tweets_news_sovereignty)

processing file 1
0.00010418891906738281
processing file 2
133.19934749603271
processing file 3
260.20931005477905
processing file 4
399.04048013687134
processing file 5
524.3051209449768
processing file 6
632.8882067203522
processing file 7
771.5159060955048
processing file 8
870.1905505657196
processing file 9
948.9955327510834
processing file 10
1025.771416425705
processing file 11
1136.3099193572998
processing file 12
1255.0064461231232
processing file 13
1364.184895992279
processing file 1
5.7697296142578125e-05
processing file 2
60.652273416519165
processing file 3
118.96420645713806
processing file 4
182.79125666618347
processing file 5
240.41578030586243
processing file 6
289.5293548107147
processing file 7
351.5306398868561
processing file 8
396.80396461486816
processing file 9
433.1488995552063
processing file 10
468.0145881175995
processing file 11
517.9637641906738
processing file 12
572.368362903595
processing file 13
622.8984622955322
processing file 1
5.435943603515625e-

Perform the analysis for the hashtag gamer

In [24]:
nb_tweets_gamer_anti_elitist = tweet_analysis.dataset_analysis(anti_elitism_dictionary, date_array, "HashtagGamer")
nb_tweets_gamer_people_centrism = tweet_analysis.dataset_analysis(people_centrism_dictionary, date_array, "HashtagGamer")
nb_tweets_gamer_sovereignty = tweet_analysis.dataset_analysis(sovereignty_dictionary, date_array, "HashtagGamer")
np.save("./cached_results/nb_tweets_gamer_anti_elitist.npy", nb_tweets_gamer_anti_elitist)
np.save("./cached_results/nb_tweets_gamer_people_centrism.npy", nb_tweets_gamer_people_centrism)
np.save("./cached_results/nb_tweets_gamer_sovereignty.npy", nb_tweets_gamer_sovereignty)

processing file 1
1.9550323486328125e-05
processing file 2
134.25631260871887
processing file 3
263.1615216732025
processing file 4
403.7081413269043
processing file 5
529.2257747650146
processing file 6
638.1393418312073
processing file 7
776.9668819904327
processing file 8
876.2808842658997
processing file 9
956.2740998268127
processing file 10
1034.53799700737
processing file 11
1147.2004261016846
processing file 12
1266.928723335266
processing file 13
1378.1075143814087
processing file 1
5.745887756347656e-05
processing file 2
61.42268705368042
processing file 3
120.0383939743042
processing file 4
185.4759635925293
processing file 5
243.81869053840637
processing file 6
292.8523919582367
processing file 7
355.4913237094879
processing file 8
400.99363231658936
processing file 9
437.710040807724
processing file 10
473.0845150947571
processing file 11
522.5990090370178
processing file 12
576.4645278453827
processing file 13
626.9256267547607
processing file 1
4.6253204345703125e-05
pro

Perform the analysis for the Fearmonger

In [25]:
nb_tweets_fear_anti_elitist = tweet_analysis.dataset_analysis(anti_elitism_dictionary, date_array, "Fearmonger")
nb_tweets_fear_people_centrism = tweet_analysis.dataset_analysis(people_centrism_dictionary, date_array, "Fearmonger")
nb_tweets_fear_sovereignty = tweet_analysis.dataset_analysis(sovereignty_dictionary, date_array, "Fearmonger")
np.save("./cached_results/nb_tweets_fear_anti_elitist.npy", nb_tweets_fear_anti_elitist)
np.save("./cached_results/nb_tweets_fear_people_centrism.npy", nb_tweets_fear_people_centrism)
np.save("./cached_results/nb_tweets_fear_sovereignty.npy", nb_tweets_fear_sovereignty)

processing file 1
2.7418136596679688e-05
processing file 2
133.7753300666809
processing file 3
261.3743932247162
processing file 4
400.54706025123596
processing file 5
526.3060903549194
processing file 6
634.780927658081
processing file 7
774.4851589202881
processing file 8
874.1925630569458
processing file 9
953.1922750473022
processing file 10
1031.067664384842
processing file 11
1141.5639209747314
processing file 12
1260.0745558738708
processing file 13
1373.3502359390259
processing file 1
5.1975250244140625e-05
processing file 2
62.423741817474365
processing file 3
122.51149773597717
processing file 4
186.70643615722656
processing file 5
244.73098993301392
processing file 6
293.5311801433563
processing file 7
355.41470217704773
processing file 8
400.4218168258667
processing file 9
436.67933654785156
processing file 10
471.4764440059662
processing file 11
520.593864440918
processing file 12
574.2718586921692
processing file 13
624.4548637866974
processing file 1
5.984306335449219e-0

# Plot results 

Global anti-elitist 

In [21]:
%matplotlib qt
# %matplotlib inline 
nb_of_tweets_anti_elitist = np.load('./cached_results/nb_of_tweets_anti_elitist.npy')
percentage_anti_elitist = percentage.percentage_of_tweets(nb_of_tweets_anti_elitist, nb_of_tweets_per_day)
plot.plot_result(date_array, percentage_anti_elitist, START_DATE, END_DATE)
mean_percentage = np.mean(percentage_anti_elitist)

print("Mean percentage: ", mean_percentage)

Mean percentage:  8.677374301675977


Global people centrism


In [None]:
%matplotlib qt
# %matplotlib inline 
nb_of_tweets_people_centrism = np.load('./cached_results/nb_of_tweets_people_centrism.npy')
percentage_people_centrism = percentage.percentage_of_tweets(nb_of_tweets_people_centrism, nb_of_tweets_per_day)
plot.plot_result(date_array, percentage_people_centrism, START_DATE, END_DATE)
mean_percentage = np.mean(percentage_people_centrism)

print("Mean percentage: ", mean_percentage)

Global sovereignty
