------------

# Spatial Panel Models with Fixed Effects

* **This notebook uses the [Panel_FE_Lag](https://pysal.org/spreg/generated/spreg.Panel_FE_Lag.html#spreg.Panel_FE_Lag) and [Panel_FE_Error](https://pysal.org/spreg/generated/spreg.Panel_FE_Error.html#spreg.Panel_FE_Error) classes.**


In [228]:
import numpy as np
import libpysal
import libpysal.weights as lpw
import spreg
import pandas as pd
import pysal as ps
from datetime import datetime
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [229]:
#df_dummies= pd.read_csv(r"C:\Users\PcLaptop\Documents\GitHub\Climate-and-conflict\df_with_dummies.csv")
#states_gdf = r"C:\Users\PcLaptop\Documents\GitHub\Climate-and-conflict\Datasets\som_adm_ocha_itos_20230308_shp\som_admbnda_adm1_ocha_20230308.shp"

df_dummies= pd.read_csv(r"/home/sara/Documenti/GitHub/Climate-and-conflict/csv/df_lag1.csv")
states_gdf = r"/home/sara/Documenti/GitHub/Climate-and-conflict/Datasets/som_adm_ocha_itos_20230308_shp/som_admbnda_adm1_ocha_20230308.shp"

In [230]:
#read xlsx file
df = pd.read_excel(r"/home/sara/Documenti/GitHub/Climate-and-conflict/displacements/UNHCR-PRMN-Displacement-Dataset - Somalia.xlsx")

df['Current (Arrival) Region'] = df['Current (Arrival) Region'].str.replace(' ', '_')
df['Previous (Departure) Region'] = df['Previous (Departure) Region'].str.replace(' ', '_')

In [231]:
v= df["Month End"]
v=v.values
dt = [datetime.strptime(v[i], "%d/%m/%Y") for i in range(len(v))]
q=[]

for i in range(len(dt)):
    q.append(datetime.timestamp(dt[i]))
    
df.insert(loc=3, column='date_timestamp', value=q)
df = df.sort_values("date_timestamp")

df['Month End'] = pd.to_datetime(df['Month End'], dayfirst=True)

In [232]:
aggregated_data = df.groupby([pd.Grouper(key='Month End', freq='M'),'Previous (Departure) Region', 'Current (Arrival) Region'])['Number of Individuals'].sum().to_frame()

In [233]:
dates = aggregated_data.index.get_level_values('Month End').unique()
districts = aggregated_data.index.get_level_values('Previous (Departure) Region').unique()
all_combinations = pd.MultiIndex.from_product([dates, districts,districts], names=['time', 'Previous (Departure) Region','Current (Arrival) Region'])

disp_data = aggregated_data.reindex(all_combinations, fill_value=0).reset_index()   

In [234]:
disp_matxs = disp_data.pivot_table(index=['time','Current (Arrival) Region'], columns='Previous (Departure) Region', values='Number of Individuals', aggfunc='sum').reset_index()
#rename column current arrival region
disp_matxs = disp_matxs.rename(columns={'Current (Arrival) Region': 'admin1'})

In [235]:
#remove the day from the date
disp_matxs['yr_mth'] = disp_matxs['time'].map(lambda x: x.strftime('%Y-%m'))
disp_matxs = disp_matxs.drop(columns=['time'])
df_dummies['yr_mth'] = pd.to_datetime(df_dummies['time'], dayfirst=True).map(lambda x: x.strftime('%Y-%m'))

df_merged = pd.merge(df_dummies, disp_matxs, on=['yr_mth', 'admin1'], how='inner')

In [236]:
# Create a new dataframe for normalized data
normalized_df = pd.DataFrame(columns=disp_matxs.columns)

# Iterate through unique yr_mth values
for yr_mth in disp_matxs['yr_mth'].unique():
    # Select rows for the current yr_mth
    subset = disp_matxs[disp_matxs['yr_mth'] == yr_mth].copy()
    
    # Extract the matrix values and convert to a NumPy array
    matrix_values = subset.iloc[:, 1:-1].values
    
    # Perform normalization (e.g., min-max scaling)
    min_val = np.min(matrix_values)
    max_val = np.max(matrix_values)
    normalized_matrix = (matrix_values - min_val) / (max_val - min_val)
    
    # Replace the original matrix with the normalized matrix
    subset.iloc[:, 1:-1] = normalized_matrix
    
    # Append the modified subset to the new dataframe
    normalized_df = pd.concat([normalized_df, subset])

# Reset the index of the new dataframe
normalized_df.reset_index(drop=True, inplace=True)


In [237]:
w_aw=normalized_df[normalized_df['yr_mth']=='2016-01']
w_aw = w_aw.drop(columns=['yr_mth', 'admin1'])

--------------------

## Spatial Lag model

Let's estimate a spatial lag panel model with fixed effects:

$$
y = \rho Wy + X\beta + \mu_i + e
$$

In [238]:
#add a column with the sum of the displacements
df_merged['sum_disp'] = df_merged.iloc[:, -18:].sum(axis=1)

In [249]:
#df_merged.to_csv(r"/home/sara/Documenti/GitHub/Climate-and-conflict/df_lag1_disp.csv", index=False)

In [240]:
w = libpysal.weights.Queen.from_shapefile(states_gdf)
w.transform = 'r'

# Define dependent variable
name_y = ["conflicts"]
y = np.array([df_merged[name] for name in name_y]).T

# Define independent variables
name_x = ['TA_lag1','PA_lag1','DL_lag1','sum_disp']
x = np.array([df_merged[name] for name in name_x]).T



In [241]:
fe_lag = spreg.Panel_FE_Lag(y, x, w, name_y=name_y, 
                            name_x=name_x, name_ds="df_dummies")

In [242]:
print(fe_lag.summary)

REGRESSION
----------
SUMMARY OF OUTPUT: MAXIMUM LIKELIHOOD SPATIAL LAG PANEL - FIXED EFFECTS
-----------------------------------------------------------------------
Data set            :  df_dummies
Weights matrix      :     unknown
Dependent Variable  :   conflicts                Number of Observations:        1512
Mean dependent var  :      0.0000                Number of Variables   :           5
S.D. dependent var  :      7.3422                Degrees of Freedom    :        1507
Pseudo R-squared    :      0.0530
Spatial Pseudo R-squared:  0.0213
Sigma-square ML     :      51.143                Log likelihood        :  -10661.645
S.E of regression   :       7.151                Akaike info criterion :   21333.290
                                                 Schwarz criterion     :   21359.896

------------------------------------------------------------------------------------
            Variable     Coefficient       Std.Error     z-Statistic     Probability
-----------------

In [243]:
df_merged = df_merged.dropna()
#df_dummies = df_dummies[df_dummies['yr_mth'] >= '2016-01']

# Define dependent variable
name_y = ["conflicts"]
y = numpy.array([df_merged[name] for name in name_y]).T

# include as independent variables the displacements for each time
name_x = ['TA_lag1','PA_lag1','DL_lag1','Awdal_y','Bakool_y','Banadir_y', 'Bari_y', 'Bay_y', 'Galgaduud_y', 'Gedo_y', 'Hiraan_y', 'Lower_Juba_y', 'Lower_Shabelle_y', 'Middle_Juba_y', 'Middle_Shabelle_y', 'Mudug_y', 'Nugaal_y', 'Sanaag_y', 'Sool_y', 'Togdheer_y', 'Woqooyi_Galbeed_y']
x = numpy.array([df_merged[name] for name in name_x]).T

In [244]:
fe_lag = spreg.Panel_FE_Lag(y, x, w, name_y=name_y, 
                            name_x=name_x, name_ds="df_merged")

In [245]:
print(fe_lag.summary)

REGRESSION
----------
SUMMARY OF OUTPUT: MAXIMUM LIKELIHOOD SPATIAL LAG PANEL - FIXED EFFECTS
-----------------------------------------------------------------------
Data set            :   df_merged
Weights matrix      :     unknown
Dependent Variable  :   conflicts                Number of Observations:        1512
Mean dependent var  :      0.0000                Number of Variables   :          22
S.D. dependent var  :      7.3422                Degrees of Freedom    :        1490
Pseudo R-squared    :      0.0801
Spatial Pseudo R-squared:  0.0464
Sigma-square ML     :      49.630                Log likelihood        :  -10639.296
S.E of regression   :       7.045                Akaike info criterion :   21322.591
                                                 Schwarz criterion     :   21439.657

------------------------------------------------------------------------------------
            Variable     Coefficient       Std.Error     z-Statistic     Probability
-----------------

In [246]:
y_var_name = 'conflicts'
X_var_names = ['TA_lag1','PA_lag1','DL_lag1']

In [247]:
# Regression expression for OLS with dummies

unit_names = df_dummies['admin1'].unique().tolist()
unit_names.sort()
unit_names_t = df_dummies['month_name'].unique().tolist()

lsdv_expr = y_var_name + ' ~ '
i = 0
for X_var_name in X_var_names:
    if i > 0:
        lsdv_expr = lsdv_expr + ' + ' + X_var_name
    else:
        lsdv_expr = lsdv_expr + X_var_name
    i = i + 1
for dummy_name in unit_names[:-1]:
   lsdv_expr = lsdv_expr + ' + ' + dummy_name + '_x'
for dummy_name in unit_names[:-1]:
   lsdv_expr = lsdv_expr + ' + ' + dummy_name + '_y'
for dummy_name_t in unit_names_t[:-1]:
    lsdv_expr = lsdv_expr + ' + ' + dummy_name_t
#for dummy_name_mr in name_x[:-1]:
 #   lsdv_expr = lsdv_expr + ' + ' + dummy_name_mr
#lsdv_expr = lsdv_expr + ' - ' + '1'
print('Regression expression for OLS with dummies=' + lsdv_expr)

Regression expression for OLS with dummies=conflicts ~ TA_lag1 + PA_lag1 + DL_lag1 + Awdal_x + Bakool_x + Banadir_x + Bari_x + Bay_x + Galgaduud_x + Gedo_x + Hiraan_x + Lower_Juba_x + Lower_Shabelle_x + Middle_Juba_x + Middle_Shabelle_x + Mudug_x + Nugaal_x + Sanaag_x + Sool_x + Togdheer_x + Awdal_y + Bakool_y + Banadir_y + Bari_y + Bay_y + Galgaduud_y + Gedo_y + Hiraan_y + Lower_Juba_y + Lower_Shabelle_y + Middle_Juba_y + Middle_Shabelle_y + Mudug_y + Nugaal_y + Sanaag_y + Sool_y + Togdheer_y + January + February + March + April + May + June + July + August + September + October + November


In [248]:
lsdv_model = smf.ols(formula=lsdv_expr, data=df_merged)
lsdv_model_results = lsdv_model.fit()
print(lsdv_model_results.summary())

                            OLS Regression Results                            
Dep. Variable:              conflicts   R-squared:                       0.829
Model:                            OLS   Adj. R-squared:                  0.823
Method:                 Least Squares   F-statistic:                     147.7
Date:                Tue, 26 Sep 2023   Prob (F-statistic):               0.00
Time:                        16:25:58   Log-Likelihood:                -5114.4
No. Observations:                1512   AIC:                         1.033e+04
Df Residuals:                    1463   BIC:                         1.059e+04
Df Model:                          48                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
Intercept             3.5237      1.05