In [36]:
import pandas as pd
import statsmodels.api as sm
import numpy as np
from collections import Counter

In [22]:
#get migration patterns
flow_df = pd.read_csv('../hcr_dat_border.csv')
countries_of_interest = ['Slovakia', 'Hungary', 'Poland', 'Republic of Moldova']
flow_df = flow_df[flow_df.Country.isin(countries_of_interest)]
flow_df['data_date'] = pd.to_datetime(flow_df['data_date'], format='%d/%b/%Y')
flow_df.drop(columns=['iso3', 'Country'], inplace=True)
flow_df.rename(columns={"data_date": 'date', ' Border_Crossings ' : 'border_crossings'}, inplace=True)
flow_df['border_crossings'] = flow_df['border_crossings'].str.replace(',', '')
flow_df['border_crossings'] = flow_df['border_crossings'].str.replace('-', '0')
flow_df['border_crossings'] = flow_df['border_crossings'].str.strip().astype('int')
flow_df = flow_df.groupby(['date']).sum()
flow_df.head()


Unnamed: 0_level_0,border_crossings
date,Unnamed: 1_level_1
2022-02-24,65846
2022-02-25,90155
2022-02-26,131865
2022-02-27,154046
2022-02-28,149041


In [23]:
#get emotion data
df_emotion = pd.read_csv('../Labeling/binary_predictions_2022.csv')
df_emotion.drop(columns=df_emotion.columns[0], inplace=True)

date_count = Counter(df_emotion.date)
date_dict = {}
for date in date_count:
    date_emotions = df_emotion[df_emotion.date == date]
    emotion_cts = Counter(date_emotions.predicted_emotion)
    count_emotions = {emotion : c for (emotion, c) in emotion_cts.items()}
    date_dict[date] = count_emotions
df_emotions = pd.DataFrame(date_dict)
df_emotions = df_emotions.transpose()
df_emotions = df_emotions.sort_index()
df_emotions = df_emotions.fillna(0)
df_emotions.head()

Unnamed: 0,others,joy,sadness,anger,fear
2022-02-01,32.0,29.0,0.0,5.0,0.0
2022-02-02,4.0,0.0,0.0,0.0,0.0
2022-02-03,10.0,9.0,0.0,0.0,0.0
2022-02-04,29.0,16.0,3.0,8.0,1.0
2022-02-05,2.0,0.0,0.0,0.0,0.0


In [24]:
df_emotions.index.name = 'date'
df_emotions.index = pd.to_datetime(df_emotions.index)
df_emotions.drop(columns='others', inplace=True)
df_emotions.head()

Unnamed: 0_level_0,joy,sadness,anger,fear
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-02-01,29.0,0.0,5.0,0.0
2022-02-02,0.0,0.0,0.0,0.0
2022-02-03,9.0,0.0,0.0,0.0
2022-02-04,16.0,3.0,8.0,1.0
2022-02-05,0.0,0.0,0.0,0.0


In [25]:
#get acled data
import pickle
with open("../Acled/acled_2022.pkl" , mode='rb') as f:
    acled_df = pickle.load(f)
all_dates = pd.date_range(np.min(acled_df.index.get_level_values('event_date')), np.max(acled_df.index.get_level_values('event_date')))
all_places = list(set(acled_df.index.get_level_values('region')))
ind2 = pd.MultiIndex.from_product([all_dates,all_places], names = ['date','region'])
acled_df = acled_df.reindex(ind2, fill_value = 0) 
acled_df.index = acled_df.index.set_levels([pd.to_datetime(acled_df.index.levels[0]), acled_df.index.levels[1]])
acled_df.index.sortlevel('date')
acled_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,fatalities,event
date,region,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-02-01,East,0,23
2022-02-01,North,0,0
2022-02-01,Center,0,1
2022-02-01,South,0,0
2022-02-01,Kyiv,0,3


In [26]:
#focus on events in kyiv
kyiv = acled_df.xs('Kyiv', level=1)['event']
kyiv.head()


date
2022-02-01    3
2022-02-02    1
2022-02-03    2
2022-02-04    1
2022-02-05    0
Freq: D, Name: event, dtype: int64

In [27]:
acled_df = acled_df.groupby(['date']).sum()
acled_df.head()

Unnamed: 0_level_0,fatalities,event
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-02-01,0,27
2022-02-02,0,24
2022-02-03,0,8
2022-02-04,0,14
2022-02-05,0,16


In [28]:
#get gogole trends
#get trends data
with open("../Trends/trends_2022.pkl" , mode='rb') as f:
    trends_df = pickle.load(f)
trends_df = trends_df.rename(columns={'external_city' : 'external_locations'})
trends_df = trends_df.loc[~(trends_df==0).any(axis=1)]
trends_combine = trends_df.groupby(['date']).sum()
trends_combine.head()

Unnamed: 0_level_0,travel,oblast,internal_city,external_locations
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-01-01,22.103873,37.169065,28.402384,10.381487
2022-01-02,21.674791,38.984798,28.587606,10.308482
2022-01-03,20.508216,39.011884,29.58509,9.741498
2022-01-04,20.325353,39.18767,31.009361,8.339385
2022-01-05,19.395757,37.233726,29.584878,8.261052


In [29]:
merged_df = flow_df.merge(df_emotions, left_index=True, right_index=True, how='inner')
merged_df.head()

Unnamed: 0_level_0,border_crossings,joy,sadness,anger,fear
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-02-24,65846,6.0,1.0,0.0,0.0
2022-02-25,90155,14.0,5.0,14.0,5.0
2022-02-26,131865,128.0,27.0,102.0,37.0
2022-02-27,154046,112.0,46.0,39.0,2.0
2022-02-28,149041,130.0,42.0,59.0,12.0


In [30]:
merged_df.border_crossings = merged_df.border_crossings.astype('float')

In [31]:
#merged Acled data as well
merged_df = merged_df.merge(kyiv, left_index=True, right_index=True, how='inner')
merged_df.head()

Unnamed: 0_level_0,border_crossings,joy,sadness,anger,fear,event
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-02-24,65846.0,6.0,1.0,0.0,0.0,15
2022-02-25,90155.0,14.0,5.0,14.0,5.0,12
2022-02-26,131865.0,128.0,27.0,102.0,37.0,17
2022-02-27,154046.0,112.0,46.0,39.0,2.0,13
2022-02-28,149041.0,130.0,42.0,59.0,12.0,15


In [32]:
merged_df = merged_df.merge(trends_combine, left_index=True, right_index=True, how='inner')
merged_df.head()

Unnamed: 0_level_0,border_crossings,joy,sadness,anger,fear,event,travel,oblast,internal_city,external_locations
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2022-02-24,65846.0,6.0,1.0,0.0,0.0,15,57.005667,53.058785,39.93677,30.36321
2022-02-25,90155.0,14.0,5.0,14.0,5.0,12,36.296789,50.073004,38.802839,32.239871
2022-02-26,131865.0,128.0,27.0,102.0,37.0,17,23.57773,45.078,37.200613,26.070787
2022-02-27,154046.0,112.0,46.0,39.0,2.0,13,17.00639,41.786672,36.318221,23.158371
2022-02-28,149041.0,130.0,42.0,59.0,12.0,15,22.82192,43.571746,38.910562,25.667248


In [35]:
vars = ['sadness', 'travel', 'event']
lmfit = sm.OLS(merged_df['border_crossings'], sm.add_constant(merged_df.loc[:, vars])).fit()
lmfit.summary()

0,1,2,3
Dep. Variable:,border_crossings,R-squared:,0.526
Model:,OLS,Adj. R-squared:,0.51
Method:,Least Squares,F-statistic:,34.02
Date:,"Wed, 02 Aug 2023",Prob (F-statistic):,6.96e-15
Time:,18:57:10,Log-Likelihood:,-1128.7
No. Observations:,96,AIC:,2265.0
Df Residuals:,92,BIC:,2276.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-282.5224,1.25e+04,-0.023,0.982,-2.52e+04,2.46e+04
sadness,-61.6232,44.179,-1.395,0.166,-149.366,26.120
travel,2105.8502,616.270,3.417,0.001,881.885,3329.816
event,2662.9411,362.897,7.338,0.000,1942.196,3383.686

0,1,2,3
Omnibus:,16.596,Durbin-Watson:,0.683
Prob(Omnibus):,0.0,Jarque-Bera (JB):,34.379
Skew:,0.616,Prob(JB):,3.43e-08
Kurtosis:,5.66,Cond. No.,407.0


In [None]:
#iterate through offset for event in kyiv
for i in np.arange(-10,10):
    df = pd.DataFrame()
    df['kyiv_event'] = merged_df['event'].shift(i)
    #iterate through travel trend
    
    for j in np.arange(-10, 20):
        #iterate through sadness
        for k in np.arange(-20,20):

