In [1]:
import pandas as pd
import numpy as np
import scipy as sp
import seaborn as sn
import folium
import math
import json
from branca.colormap import LinearColormap
from branca.element import MacroElement
from jinja2 import Template

In [2]:
intercountry_df = pd.read_csv('../Data_sinan/intercountry_tone_count_table_with_EVENTCODE',\
                              dtype={'EventRootCode': np.str, 'EventCode': np.str} \
                             )
country_info_df = pd.read_csv('../Data_sinan/countryinfo.csv')
world_borders = json.load(open('../Data/world_borders.topojson.json'))
fips_codes = pd.read_csv('../Data/fipscountries.csv')
fips_codes = fips_codes[['fips','country']]


In [3]:
population = country_info_df[['fips', 'population']]

In [4]:
intercountry_df = intercountry_df[pd.notnull(intercountry_df['LocEvent'])]
intercountry_df = (intercountry_df[(intercountry_df["EventRootCode"]!='--') & (intercountry_df["EventCode"]!='--')])

In [5]:
intercountry_df = fips_codes.merge(intercountry_df, left_on='fips', right_on='LocEvent', how='inner').drop('fips', axis=1)
intercountry_df = intercountry_df.rename(columns={"country":"EventCountry"})

intercountry_df = fips_codes.merge(intercountry_df, left_on='fips', right_on='LocMention', how='inner').drop('fips', axis=1)
intercountry_df = intercountry_df.rename(columns={"country":"MentionCountry"})


intercountry_df.loc[intercountry_df['MentionCountry']=='United States', 'MentionCountry'] = 'United States of America'
intercountry_df.loc[intercountry_df['EventCountry']=='United States', 'EventCountry'] = 'United States of America'

intercountry_df.loc[intercountry_df['MentionCountry']=='Serbia', 'MentionCountry'] = 'Republic of Serbia'
intercountry_df.loc[intercountry_df['EventCountry']=='Serbia', 'EventCountry'] = 'Republic of Serbia'


In [6]:
intercountry_df.head()

Unnamed: 0,MentionCountry,EventCountry,EventRootCode,EventCode,LocMention,LocEvent,AverageTone,NumberOfInterMentions
0,Andorra,Andorra,3,332,AN,AN,-1.124744,1
1,Andorra,Andorra,2,241,AN,AN,-1.123225,7
2,Andorra,Andorra,11,113,AN,AN,-6.316239,3
3,Andorra,Andorra,5,56,AN,AN,-0.732468,13
4,Andorra,Andorra,11,1123,AN,AN,-11.238896,2


In [7]:
intercountry_nb_mentions_df = intercountry_df[['MentionCountry', 'EventCountry', 'EventRootCode', 'AverageTone', 'NumberOfInterMentions']]
countries_protests = intercountry_nb_mentions_df[intercountry_nb_mentions_df['EventRootCode'] == '14']
countries_protests.head()

Unnamed: 0,MentionCountry,EventCountry,EventRootCode,AverageTone,NumberOfInterMentions
10,Andorra,Andorra,14,-6.25,1
11,Andorra,Andorra,14,-3.658537,1
32,Andorra,Andorra,14,0.089099,25
52,Andorra,Andorra,14,-0.485852,8
106,Andorra,Andorra,14,-1.053494,79


In [8]:
protest_news_numbers = countries_protests.groupby(['MentionCountry', 'EventCountry']).agg({'NumberOfInterMentions':sum})
protest_news_numbers.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,NumberOfInterMentions
MentionCountry,EventCountry,Unnamed: 2_level_1
Afghanistan,Afghanistan,4843
Afghanistan,Argentina,2
Afghanistan,Armenia,22
Afghanistan,Australia,26
Afghanistan,Austria,10


In [9]:
protest_news_numbers = protest_news_numbers['NumberOfInterMentions'].groupby(level=0, group_keys=False)

In [10]:
protest_news_numbers.nlargest(10)

MentionCountry  EventCountry                    
Afghanistan     Afghanistan                          4843
                Pakistan                              943
                United States of America              416
                Germany                               238
                Iran                                  213
                India                                 139
                Israel                                139
                Syria                                 126
                Russia                                119
                United Kingdom                        118
Albania         Albania                             47255
                Macedonia                           35104
                Israel                              12199
                Kosovo                              11735
                Greece                              10455
                United States of America            10041
                Turkey 

In [12]:
protest_news_average = countries_protests.groupby(['MentionCountry', 'EventCountry']).agg({'AverageTone':"mean"})
protest_news_average.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,AverageTone
MentionCountry,EventCountry,Unnamed: 2_level_1
Afghanistan,Afghanistan,-5.580697
Afghanistan,Argentina,-4.692109
Afghanistan,Armenia,-5.671705
Afghanistan,Australia,-3.456377
Afghanistan,Austria,-8.799038


In [13]:
protest_news_average = protest_news_average['AverageTone'].groupby(level=0, group_keys=False)

In [15]:
protest_news_average.nlargest(10)

MentionCountry  EventCountry            
Afghanistan     Panama                       2.429150
                Ireland                      1.803427
                Sri Lanka                    1.402181
                Zambia                       0.343643
                Bhutan                       0.248447
                Djibouti                    -0.805524
                United Arab Emirates        -1.087295
                Spain                       -1.127505
                Myanmar                     -1.159580
                Sudan                       -1.534445
Albania         Taiwan                       2.449020
                Fiji                         2.188439
                Panama                       2.031341
                New Zealand                  0.659631
                Zimbabwe                    -0.137741
                Tajikistan                  -0.492611
                Mongolia                    -0.533808
                Cameroon                 

In [None]:
protest_news_average['Number of mentions'] = protest_news_numbers['NumberOfInterMentions']
#df_merged = df1.merge(df2, how='outer', left_index=True, right_index=True)protest_news_numbers.join(protest_news_average, how='inner')

In [None]:
protest_news_average.head()

In [None]:
protest_news_average = protest_news_average['Number of mentions'].groupby(level=1, group_keys=False)
protest_news_average.nlargest( n=10)
#protest_news_average.groupby(['MentionCountry', 'EventCountry']).sort_values([('MentionCountry', 'EventCountry')], ascending=False)
