In [115]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import dataframe_image as dfi
from datetime import datetime
import scipy
import itertools
import geopandas as gpd
import xarray as xr
import regionmask
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [116]:
file = r"C:\Users\PcLaptop\Documents\GitHub\Climate-and-conflict\df_with_dummies_.csv"
data = pd.read_csv(file)

In [117]:
data['month'] = pd.DatetimeIndex(data['time']).month_name()

#create a dictionary where the keys are increasing integers and the values are the values of the time column
time_dict = dict(enumerate(data['time'].unique(), 25))
inv_time_dict = {v: k for k, v in time_dict.items()}

#create a new variable for the month_year column
data['time'].replace(inv_time_dict, inplace=True)

df_dummies_new = pd.get_dummies(data['time'], drop_first=True)
df_dummies_new = df_dummies_new.replace({True: 1, False: 0})

In [119]:
#add a constant column to df_dummies_new
df_dummies_new['const'] = np.ones(len(df_dummies_new))

model = sm.OLS(data['conflicts'], df_dummies_new).fit()
data['res_conflicts'] = model.resid

model1 = sm.OLS(data['TA'], df_dummies_new).fit()
data['res_TA'] = model1.resid

model2 = sm.OLS(data['PA'], df_dummies_new).fit()
data['res_PA'] = model2.resid

model3 = sm.OLS(data['DL'], df_dummies_new).fit()
data['res_DL'] = model3.resid

In [120]:
y_var_name = 'res_conflicts'
X_var_names = ['res_TA','res_PA','res_DL']

In [121]:
# Regression expression for OLS with dummies

unit_names = data['admin1'].unique().tolist()
unit_names.sort()
unit_names_t = data['month'].unique().tolist()
unit_names_mr = (data['admin1'] + data['month'].astype(str)).unique().tolist()

lsdv_expr = y_var_name + ' ~ '
i = 0
for X_var_name in X_var_names:
    if i > 0:
        lsdv_expr = lsdv_expr + ' + ' + X_var_name
    else:
        lsdv_expr = lsdv_expr + X_var_name
    i = i + 1

for dummy_name_mr in unit_names_mr[:-1]:
    lsdv_expr = lsdv_expr + ' + ' + dummy_name_mr
#lsdv_expr = lsdv_expr + ' - ' + '1'
print('Regression expression for OLS with dummies=' + lsdv_expr)

Regression expression for OLS with dummies=res_conflicts ~ res_TA + res_PA + res_DL + AwdalJanuary + BakoolJanuary + BanadirJanuary + BariJanuary + BayJanuary + GalgaduudJanuary + GedoJanuary + HiraanJanuary + Lower_JubaJanuary + Lower_ShabelleJanuary + Middle_JubaJanuary + Middle_ShabelleJanuary + MudugJanuary + NugaalJanuary + SanaagJanuary + SoolJanuary + TogdheerJanuary + Woqooyi_GalbeedJanuary + AwdalFebruary + BakoolFebruary + BanadirFebruary + BariFebruary + BayFebruary + GalgaduudFebruary + GedoFebruary + HiraanFebruary + Lower_JubaFebruary + Lower_ShabelleFebruary + Middle_JubaFebruary + Middle_ShabelleFebruary + MudugFebruary + NugaalFebruary + SanaagFebruary + SoolFebruary + TogdheerFebruary + Woqooyi_GalbeedFebruary + AwdalMarch + BakoolMarch + BanadirMarch + BariMarch + BayMarch + GalgaduudMarch + GedoMarch + HiraanMarch + Lower_JubaMarch + Lower_ShabelleMarch + Middle_JubaMarch + Middle_ShabelleMarch + MudugMarch + NugaalMarch + SanaagMarch + SoolMarch + TogdheerMarch + W

In [122]:
lsdv_model = smf.ols(formula=lsdv_expr, data=data)
lsdv_model_results = lsdv_model.fit()
print(lsdv_model_results.summary())

                            OLS Regression Results                            
Dep. Variable:          res_conflicts   R-squared:                       0.343
Model:                            OLS   Adj. R-squared:                  0.288
Method:                 Least Squares   F-statistic:                     6.198
Date:                Tue, 12 Sep 2023   Prob (F-statistic):          4.92e-123
Time:                        16:07:52   Log-Likelihood:                -8578.4
No. Observations:                2808   AIC:                         1.759e+04
Df Residuals:                    2589   BIC:                         1.890e+04
Df Model:                         218                                         
Covariance Type:            nonrobust                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
Intercept               

In [171]:
data_au = pd.io.stata.read_stata(r"C:\Users\PcLaptop\Documents\GitHub\Climate-and-conflict\AJAE_MaystadtEcker.dta")
data_au_csv = pd.read_csv(r"C:\Users\PcLaptop\Documents\GitHub\Climate-and-conflict\csv_authors.csv")

In [172]:
cru_mine = pd.read_csv(r"C:\Users\PcLaptop\Documents\GitHub\Climate-and-conflict\cru_mine.csv")

cru_mine['month'] = pd.DatetimeIndex(cru_mine['time']).month_name()

#create a dictionary where the keys are increasing integers and the values are the values of the time column
time_dict = dict(enumerate(cru_mine['time'].unique(), 25))
inv_time_dict = {v: k for k, v in time_dict.items()}

#create a new variable for the month_year column
cru_mine['time'].replace(inv_time_dict, inplace=True)

In [174]:
replacement_dict = {1  :  'Awdal',
8    :         'Bakool',
9      :       'Banadir',
2      :         'Bari',
10       :         'Bay',
11        :  'Galgaduud',
12          :      'Gedo',
13          :   'Hiraan',
14   :       'Lower_Juba',
15  :   'Lower_Shabelle',
16  :      'Middle_Juba',
17   : 'Middle_Shabelle',
18    :          'Mudug',
3    :        'Nugaal',
4      :       'Sanaag',
5       :        'Sool',
6        :   'Togdheer',
7   : 'Woqooyi_Galbeed'}

inv_replacement_dict = {v: k for k, v in replacement_dict.items()}

cru_mine['new_province_id'] = cru_mine['admin1'].replace(inv_replacement_dict)

In [176]:
data_au['new_province_id']=data_au['new_province_id'].astype(int)
new = pd.merge(cru_mine, data_au_csv, left_on=['new_province_id', 'time'], right_on=['new_province_id', 'newident_yrmth'], how='left')
#new.to_csv(r"C:\Users\PcLaptop\Documents\GitHub\Climate-and-conflict\new_cru.csv")