In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns
import statsmodels.formula.api as smf

In [26]:
file = r"C:\Users\PcLaptop\Documents\GitHub\Climate-and-conflict\reg_TA_PA.csv"
df = pd.read_csv(file)

In [27]:
df['admin1'] = df['admin1'].str.replace('Lower Shabelle', 'Lower_Shabelle')
df['admin1'] = df['admin1'].str.replace('Middle Shabelle', 'Middle_Shabelle')
df['admin1'] = df['admin1'].str.replace('Lower Juba', 'Lower_Juba')
df['admin1'] = df['admin1'].str.replace('Middle Juba', 'Middle_Juba')
df['admin1'] = df['admin1'].str.replace('Woqooyi Galbeed', 'Woqooyi_Galbeed')

In [28]:
#crate a new column with the drought lenght DL defined as the number of consecutive months with positive temperature anomalies TA during the current drought period, and is zero for non-drought months
df['DL'] = 0
mask = df['TA'] > 0

# Create a group identifier for each consecutive group
group_id = (mask != mask.shift()).cumsum()

# Calculate the count within each group
count = df.groupby(group_id).cumcount() + 1

# Assign the count values to the 'DL' column
df['DL'] = np.where(mask, count, 0)

In [36]:
#plot the sum of the conflicts for all regions (admin1) in Somalia versus time_x
##plt.figure(figsize=(12,6))
#aa=df.groupby('time_x')['conflicts'].sum().plot()
#plt.show()

#plot the sum of the TA for all regions (admin1) in Somalia versus time_x
#df1=df[df['admin1']=='Lower_Shabelle']
#plt.figure(figsize=(12,6))
#aa=df1.groupby('time_x')['TA'].mean().plot()
#plt.show()

In [31]:
#in df remove all rows that contain a NaN
df = df.dropna()
df.reset_index(drop=True, inplace=True)

In [32]:
#Create the dummy variables, one for each country
df_dummies = pd.get_dummies(df['admin1'])
df_with_dummies = df.join(df_dummies)

#Create the dummy variables, one for each month
df['month'] = pd.DatetimeIndex(df['time_x']).month_name()
df_dummies_m = pd.get_dummies(df['month'])
df_with_dummies = df_with_dummies.join(df_dummies_m)

In [33]:
y_var_name = 'conflicts'
X_var_names = ['TA','PA']

unit_names = df['admin1'].unique().tolist()
unit_names.sort()
unit_names_t = df['month'].unique().tolist()

lsdv_expr = y_var_name + ' ~ '
i = 0
for X_var_name in X_var_names:
    if i > 0:
        lsdv_expr = lsdv_expr + ' + ' + X_var_name
    else:
        lsdv_expr = lsdv_expr + X_var_name
    i = i + 1
for dummy_name in unit_names[:-1]:
    lsdv_expr = lsdv_expr + ' + ' + dummy_name
for dummy_name_t in unit_names_t[:-1]:
    lsdv_expr = lsdv_expr + ' + ' + dummy_name_t
 
print('Regression expression for OLS with dummies=' + lsdv_expr)

Regression expression for OLS with dummies=conflicts ~ TA + PA + Awdal + Bakool + Banadir + Bari + Bay + Galgaduud + Gedo + Hiraan + Lower_Juba + Lower_Shabelle + Middle_Juba + Middle_Shabelle + Mudug + Nugaal + Sanaag + Sool + Togdheer + April + May + June + July + August + September + October + November + December + January + February


In [34]:
lsdv_model = smf.ols(formula=lsdv_expr, data=df_with_dummies)
lsdv_model_results = lsdv_model.fit()
print(lsdv_model_results.summary())

                            OLS Regression Results                            
Dep. Variable:              conflicts   R-squared:                       0.286
Model:                            OLS   Adj. R-squared:                  0.278
Method:                 Least Squares   F-statistic:                     35.95
Date:                Mon, 10 Jul 2023   Prob (F-statistic):          2.40e-172
Time:                        09:14:48   Log-Likelihood:                -10479.
No. Observations:                2718   AIC:                         2.102e+04
Df Residuals:                    2687   BIC:                         2.120e+04
Df Model:                          30                                         
Covariance Type:            nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------
Intercept                 

In [13]:
#shift

start_month = 8
shift = -4
window_size = 147

X = pd.DataFrame({'TA':df['TA'][start_month + shift : start_month + shift + window_size].reset_index(drop=True),
                  'PA':df['PA'][start_month + shift : start_month + shift + window_size].reset_index(drop=True) })

In [15]:
df_demean = df.copy()

# calculate the entity(state) mean beer tax
df_demean['Mean_TA'] = df_demean.groupby('admin1').TA.transform(np.mean)

# calculate the entity(state) mean for fatal rate
df_demean['Mean_conflicts'] = df_demean.groupby('admin1').conflicts.transform(np.mean)

# demean, subtract each row by the entity-mean
df_demean["conflicts"] = df_demean["conflicts"] - df_demean['Mean_conflicts']
df_demean["TA"] = df_demean["TA"] - df_demean['Mean_TA']

In [16]:
model = sm.OLS(df_demean.conflicts, df_demean.TA)
results2 = model.fit()
print(results2.summary())

                                 OLS Regression Results                                
Dep. Variable:              conflicts   R-squared (uncentered):                   0.001
Model:                            OLS   Adj. R-squared (uncentered):              0.000
Method:                 Least Squares   F-statistic:                              1.558
Date:                Mon, 10 Jul 2023   Prob (F-statistic):                       0.212
Time:                        09:10:51   Log-Likelihood:                         -10483.
No. Observations:                2718   AIC:                                  2.097e+04
Df Residuals:                    2717   BIC:                                  2.097e+04
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [13]:
reg=[]
for region in df['admin1'].unique():
    reg.append(df[df['admin1']==region].reset_index())

In [57]:
r=2
start_month = 8
shift = -4
window_size = 147

X = pd.DataFrame({'TA':reg[r]['TA'][start_month + shift : start_month + shift + window_size].reset_index(drop=True),
                  'PA':reg[r]['PA'][start_month + shift : start_month + shift + window_size].reset_index(drop=True) })
y = reg[r]['conflicts'][start_month : start_month + window_size]

#regr = linear_model.LinearRegression()
regr.fit(X, y)
print(regr.coef_)
print(1 - (1-regr.score(X, y))*(len(y)-1)/(len(y)-X.shape[1]-1)) 
regr.score(X, y)

[11.99415427 -4.15669559]
0.020556684304347206


0.03397371602620558