In [250]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import dataframe_image as dfi
from datetime import datetime
import itertools
import os
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
import xarray as xr
import regionmask
import statsmodels.api as sm
import statsmodels.formula.api as smf
from patsy import dmatrices
from collections import OrderedDict
import sys
from sklearn.preprocessing import MinMaxScaler

## DATA

### Climate: UEA CRU TS4.07 (1901-2022)

In [251]:
file = r"/home/sara/Documenti/GitHub/Climate-and-conflict/Datasets/cru_ts4.07.1901.2022.pre.dat.nc"
file2 = r"/home/sara/Documenti/GitHub/Climate-and-conflict/Datasets/cru_ts4.07.1901.2022.tmx.dat.nc"

file_paths_list =[file,file2]
monthly_forecast=xr.Dataset()

for file in file_paths_list:
        monthly_forecast = xr.merge([monthly_forecast,xr.open_mfdataset(file)], compat='override')

### Conflict events: ACLED

In [252]:
file = r"/home/sara/Documenti/GitHub/Climate-and-conflict/Datasets/ACLED_1997-01-01-2023-07-18_Somalia.csv"
df = pd.read_csv(file)

### Administrative boundaries of Somalia

In [253]:
path = r"/home/sara/Documenti/GitHub/Climate-and-conflict/Datasets/som_adm_ocha_itos_20230308_shp/som_admbnda_adm1_ocha_20230308.shp"
states_gdf = gpd.read_file(path) 

### Displacements : UNHCR

In [254]:
df_disp = pd.read_excel(r"/home/sara/Documenti/GitHub/Climate-and-conflict/displacements/UNHCR-PRMN-Displacement-Dataset - Somalia.xlsx")

### Population density Data

In [255]:
asc = r"/home/sara/Documenti/GitHub/Climate-and-conflict/Datasets/gpw_v4_admin_unit_center_points_population_estimates_rev11_som.csv"
population_density = pd.read_csv(asc)

# Data manipulation

## Climate data manipulation

In [256]:
#Limit the lat-lon and time

def get_aoi(shp, world=True):
    lon_lat = {}
    # Get lat min, max
    aoi_lat = [float(shp.total_bounds[1]), float(shp.total_bounds[3])]
    aoi_lon = [float(shp.total_bounds[0]), float(shp.total_bounds[2])]

    lon_lat["lon"] = aoi_lon
    lon_lat["lat"] = aoi_lat
    return lon_lat

bounds = get_aoi(states_gdf)

In [257]:
start_date = '1901-01-01'
end_date = '2022-12-31'

region = monthly_forecast[["pre",'tmx']].sel(
    time=slice(start_date, end_date),
    lon=slice(bounds["lon"][0], bounds["lon"][1]),
    lat=slice(bounds["lat"][0], bounds["lat"][1]))

In [258]:
region_mask = regionmask.mask_3D_geopandas(states_gdf,
                                         monthly_forecast.lon,
                                         monthly_forecast.lat)

temp_pre = region.where(region_mask)
temp_pre = temp_pre.groupby("time").mean(["lat", "lon"]).to_dataframe().reset_index()

In [259]:
replacement_dict = {0  :  'Awdal',
                    1  :  'Bakool',
                    2  :  'Banadir',
                    3  :  'Bari',
                    4  :  'Bay',
                    5  :  'Galgaduud',
                    6  :  'Gedo',
                    7  :  'Hiraan',
                    8  :  'Lower_Juba',
                    9  :  'Lower_Shabelle',
                    10 :  'Middle_Juba',
                    11 :  'Middle_Shabelle',
                    12 :  'Mudug',
                    13 :  'Nugaal',
                    14 :  'Sanaag',
                    15 :  'Sool',
                    16 :  'Togdheer',
                    17 :  'Woqooyi_Galbeed'}

temp_pre['admin1'] = temp_pre['region'].replace(replacement_dict)
df['admin1'] = df['admin1'].str.replace(' ', '_')
temp_pre.drop('region', axis=1, inplace=True)

In [260]:
# Add Banadir region with tmx and pre as mean of the neighbouring regions
district1 = 'Lower_Shabelle'  
district2 = 'Middle_Shabelle'  

# Calculate the mean tmx and pre for the neighboring districts
mean_t = temp_pre[(temp_pre['admin1']==district1) | (temp_pre['admin1']==district2)].groupby('time')['tmx'].mean()
mean_p = temp_pre[(temp_pre['admin1']==district1) | (temp_pre['admin1']==district2)].groupby('time')['pre'].mean()
new_data = pd.DataFrame({ 'admin1': 'Banadir', 'tmx': mean_t, 'pre': mean_p}).reset_index()

# Append the new DataFrame to the original DataFrame
temp_pre_m = pd.concat([temp_pre, new_data])
temp_pre = temp_pre_m.sort_values(by=['time', 'admin1'], ascending=[True, True]).reset_index(drop=True)

In [261]:
# Only keep month and year in time column 
temp_pre['time'] = pd.to_datetime(temp_pre['time']).dt.strftime('%Y-%m-%d')
temp_pre['month'] = temp_pre['time'].str[5:7]
temp_pre['month_year'] = temp_pre['time'].str[:7]
temp_pre = temp_pre[['month_year','admin1','tmx','pre']]
temp_pre = temp_pre.rename(columns={'month_year':'time'})

## Conflict data manipulation

In [262]:
# Modify event_date column to datetime

df['event_date'] = pd.to_datetime(df['event_date'])
df = df.set_index('event_date') 

conflict = df.groupby([pd.Grouper(freq='M'),"admin1"]).count()
conflict.reset_index(level=[0, 1], inplace=True)
conflict = conflict[['event_date','admin1','year']].rename(columns={'year': 'conflicts','event_date': 'time'})

# Aggregate the datetime objects by month
conf = conflict.groupby([pd.Grouper(key='time', freq='M'),'admin1'])['conflicts'].sum().to_frame()

In [263]:
# Reindex the DataFrame with all dates and districts and fill missing values with 0

dates = conf.index.get_level_values('time').unique()
districts = conf.index.get_level_values('admin1').unique()
all_combinations = pd.MultiIndex.from_product([dates, districts], names=['time', 'admin1'])

conflicts = conf.reindex(all_combinations, fill_value=0).reset_index()    
conflicts = conflicts.sort_values(by=['time', 'admin1'], ascending=[True, True])
conflicts.reset_index(drop=True, inplace=True)
conflicts['time'] = conflicts['time'].dt.strftime('%Y-%m').values

# Merge the two DataFrames temp_pre and conflicts
df_2016 = pd.merge(temp_pre, conflicts, on=['time','admin1'], how='outer')

## Compute TA, PA, DL

In [264]:
# Split the dataframe into regions

reg=[]
for admin in df_2016['admin1'].unique():
    a = df_2016[df_2016['admin1']==admin].reset_index(drop=True)
    reg.append(a)

In [265]:
# Calculate the TA (temperature anomaly), PA (precipitation anomaly) and DL (drought lenght) for each region

avg_t = avg_p = std_t = std_p = np.zeros(18)

for i in range(18):

    reg[i]['year'] , reg[i]['month'] = reg[i]['time'].str[:4] , reg[i]['time'].str[5:7]

    # TA
    mean_temp_i , std_temp_i  = reg[i].groupby('month')['tmx'].mean() , reg[i].groupby('month')['tmx'].std()
    reg[i]['avg_temp'] , reg[i]['std_temp']  = reg[i]['month'].map(mean_temp_i) , reg[i]['month'].map(std_temp_i)
    reg[i]['diff_t']= (reg[i]['tmx']-reg[i]['avg_temp'])/reg[i]['std_temp']
    reg[i]['TA'] = (reg[i]['diff_t'].shift(3) + reg[i]['diff_t'].shift(2) + reg[i]['diff_t'].shift(1) + reg[i]['diff_t'])/4

    # PA
    mean_pre_i , std_pre_i  = reg[i].groupby('month')['pre'].mean() , reg[i].groupby('month')['pre'].std()
    reg[i]['avg_pre'] , reg[i]['std_pre']= reg[i]['month'].map(mean_pre_i) , reg[i]['month'].map(std_pre_i)
    reg[i]['diff_p']= (reg[i]['pre']-reg[i]['avg_pre'])/reg[i]['std_pre']
    reg[i]['PA'] = (reg[i]['diff_p'].shift(3) + reg[i]['diff_p'].shift(2) + reg[i]['diff_p'].shift(1) + reg[i]['diff_p'])/4
    
    # DL 
    reg[i]['DL'] = 0
    mask = reg[i]['TA'] > 0
    group_id = (mask != mask.shift()).cumsum()             # Create a group identifier for each consecutive group
    count = reg[i].groupby(group_id).cumcount() + 1        # Calculate the count within each group
    reg[i]['DL'] = np.where(mask, count, 0)                # Assign the count values to the 'DL' column

    # Add lagged variables
    reg[i]['TA_lag1'], reg[i]['TA_lag2'], reg[i]['TA_lag3'], reg[i]['TA_lag4'], reg[i]['TA_lag5'], reg[i]['TA_lag6'] = reg[i]['TA'].shift(1), reg[i]['TA'].shift(2), reg[i]['TA'].shift(3), reg[i]['TA'].shift(4), reg[i]['TA'].shift(5), reg[i]['TA'].shift(6)
    reg[i]['PA_lag1'], reg[i]['PA_lag2'], reg[i]['PA_lag3'], reg[i]['PA_lag4'], reg[i]['PA_lag5'], reg[i]['PA_lag6'] = reg[i]['PA'].shift(1), reg[i]['PA'].shift(2), reg[i]['PA'].shift(3), reg[i]['PA'].shift(4), reg[i]['PA'].shift(5), reg[i]['PA'].shift(6)
    reg[i]['DL_lag1'], reg[i]['DL_lag2'], reg[i]['DL_lag3'], reg[i]['DL_lag4'], reg[i]['DL_lag5'], reg[i]['DL_lag6'] = reg[i]['DL'].shift(1), reg[i]['DL'].shift(2), reg[i]['DL'].shift(3), reg[i]['DL'].shift(4), reg[i]['DL'].shift(5), reg[i]['DL'].shift(6)
    reg[i]['conflicts_lag1'], reg[i]['conflicts_lag2'], reg[i]['conflicts_lag3'], reg[i]['conflicts_lag4'], reg[i]['conflicts_lag5'], reg[i]['conflicts_lag6'] = reg[i]['conflicts'].shift(1), reg[i]['conflicts'].shift(2), reg[i]['conflicts'].shift(3), reg[i]['conflicts'].shift(4), reg[i]['conflicts'].shift(5), reg[i]['conflicts'].shift(6)

    reg[i] = reg[i].reset_index()

In [266]:
mean_temp_i

month
01    28.363771
02    29.244181
03    30.398853
04    30.900000
05    32.567951
06    33.660736
07    33.613934
08    33.499100
09    33.144917
10    31.166147
11    29.586147
12    28.361231
Name: tmx, dtype: float32

In [267]:
#set options to display all columns
pd.set_option('display.max_columns', None)
reg[17]

Unnamed: 0,index,time,admin1,tmx,pre,conflicts,year,month,avg_temp,std_temp,diff_t,TA,avg_pre,std_pre,diff_p,PA,DL,TA_lag1,TA_lag2,TA_lag3,TA_lag4,TA_lag5,TA_lag6,PA_lag1,PA_lag2,PA_lag3,PA_lag4,PA_lag5,PA_lag6,DL_lag1,DL_lag2,DL_lag3,DL_lag4,DL_lag5,DL_lag6,conflicts_lag1,conflicts_lag2,conflicts_lag3,conflicts_lag4,conflicts_lag5,conflicts_lag6
0,0,1901-01,Woqooyi_Galbeed,28.260000,2.820000,,1901,01,28.363771,0.727334,-0.142673,,2.584754,2.983821,0.078841,,0,,,,,,,,,,,,,,,,,,,,,,,,
1,1,1901-02,Woqooyi_Galbeed,29.049999,16.669998,,1901,02,29.244181,0.959817,-0.202311,,5.073114,7.663166,1.513328,,0,,,,,,,,,,,,,0.0,,,,,,,,,,,
2,2,1901-03,Woqooyi_Galbeed,30.230000,15.400000,,1901,03,30.398853,0.944207,-0.178831,,16.200493,16.163335,-0.049525,,0,,,,,,,,,,,,,0.0,0.0,,,,,,,,,,
3,3,1901-04,Woqooyi_Galbeed,30.710001,45.680000,,1901,04,30.900000,0.995990,-0.190764,-0.178645,37.468525,24.529644,0.334757,0.469350,0,,,,,,,,,,,,,0.0,0.0,0.0,,,,,,,,,
4,4,1901-05,Woqooyi_Galbeed,32.240005,14.809999,,1901,05,32.567951,0.802541,-0.408634,-0.245135,37.323769,26.050062,-0.864250,0.233577,0,-0.178645,,,,,,0.46935,,,,,,0.0,0.0,0.0,0.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1466,1466,2023-03,Woqooyi_Galbeed,,,0.0,2023,03,30.398853,0.944207,,,16.200493,16.163335,,,0,,,1.497522,1.155194,0.840233,0.337112,,,-0.156938,-0.091331,-0.164258,0.369547,0.0,0.0,17.0,16.0,15.0,14.0,0.0,0.0,2.0,0.0,1.0,1.0
1467,1467,2023-04,Woqooyi_Galbeed,,,0.0,2023,04,30.900000,0.995990,,,37.468525,24.529644,,,0,,,,1.497522,1.155194,0.840233,,,,-0.156938,-0.091331,-0.164258,0.0,0.0,0.0,17.0,16.0,15.0,0.0,0.0,0.0,2.0,0.0,1.0
1468,1468,2023-05,Woqooyi_Galbeed,,,1.0,2023,05,32.567951,0.802541,,,37.323769,26.050062,,,0,,,,,1.497522,1.155194,,,,,-0.156938,-0.091331,0.0,0.0,0.0,0.0,17.0,16.0,0.0,0.0,0.0,0.0,2.0,0.0
1469,1469,2023-06,Woqooyi_Galbeed,,,1.0,2023,06,33.660736,0.600997,,,16.975328,10.360564,,,0,,,,,,1.497522,,,,,,-0.156938,0.0,0.0,0.0,0.0,0.0,17.0,1.0,0.0,0.0,0.0,0.0,2.0


In [268]:
temp_pre_c = pd.concat([reg[i] for i in range(18)], axis=0)
#temp_pre_c = temp_pre_c.dropna()
# drop all entries before 2016-01 and after 2022-12
temp_pre_c = temp_pre_c[(temp_pre_c['time'] >= '2016-01') & (temp_pre_c['time'] <= '2022-12')]
# Select a subset of the dataframes from 1997-01 to 2022-12
temp_pre_c = temp_pre_c.fillna(0)
temp_pre_c = temp_pre_c.drop(['avg_temp', 'avg_pre', 'std_temp', 'std_pre', 'diff_t', 'diff_p'], axis=1)
temp_pre_c = temp_pre_c.sort_values(by=['time','admin1'], ascending=[True, True]).reset_index(drop=True)

In [269]:
month_dict = {1: 'January', 2: 'February', 3: 'March', 4: 'April', 5: 'May', 6:'June', 7:'July', 8:'August', 9:'September', 10:'October', 11:'November', 12:'December'}
temp_pre_c['month_name'] = temp_pre_c['month'].map(month_dict)

In [270]:
#temp_pre_c.to_csv(r"/home/sara/Documenti/GitHub/Climate-and-conflict/csv/df_lags_2016_n4_c.csv", index = False, header=True)

In [271]:
#dist = pd.read_csv(r"C:\Users\PcLaptop\Documents\GitHub\Climate-and-conflict\dist_som.csv")
dist = pd.read_csv(r"/home/sara/Documenti/GitHub/Climate-and-conflict/csv/dist_som.csv")

In [272]:
inv_dist = 1/(dist+0.001)
inv_dist.reset_index(inplace=True)
inv_dist['index'] = inv_dist['index'].replace(replacement_dict)

#df_new=temp_pre_c.merge(inv_dist, left_on='admin1', right_on='index')

## Displacements data manipulation

In [273]:
df_disp = df_disp[(df_disp['Reason']=='Drought related')]
df_disp['Current (Arrival) Region'] = df_disp['Current (Arrival) Region'].str.replace(' ', '_')
df_disp['Previous (Departure) Region'] = df_disp['Previous (Departure) Region'].str.replace(' ', '_')

In [274]:
v= df_disp["Month End"]
v=v.values
dt = [datetime.strptime(v[i], "%d/%m/%Y") for i in range(len(v))]
q=[]

for i in range(len(dt)):
    q.append(datetime.timestamp(dt[i]))
    
df_disp.insert(loc=3, column='date_timestamp', value=q)
df_disp = df_disp.sort_values("date_timestamp")

df_disp['Month End'] = pd.to_datetime(df_disp['Month End'], dayfirst=True)

In [275]:
aggregated_data = df_disp.groupby([pd.Grouper(key='Month End', freq='M'),'Previous (Departure) Region', 'Current (Arrival) Region'])['Number of Individuals'].sum().to_frame()

# Get the unique values of 'Month End' and 'Previous (Departure) Region' 
dates = aggregated_data.index.get_level_values('Month End').unique()
districts = aggregated_data.index.get_level_values('Previous (Departure) Region').unique()

all_combinations = pd.MultiIndex.from_product([dates, districts, districts], names=['time', 'Previous (Departure) Region','Current (Arrival) Region'])

# Reindex the aggregated_data DataFrame with 'all_combinations'
disp_data = aggregated_data.reindex(all_combinations, fill_value=0).reset_index()   

In [276]:
disp_matxs = disp_data.pivot_table(index=['time','Current (Arrival) Region'], columns='Previous (Departure) Region', values='Number of Individuals', aggfunc='sum').reset_index()
disp_matxs = disp_matxs.rename(columns={'Current (Arrival) Region': 'admin1'})

#remove the day from the date
disp_matxs['yr_mth'] = disp_matxs['time'].map(lambda x: x.strftime('%Y-%m'))
disp_matxs = disp_matxs.drop(columns=['time'])
temp_pre_c['yr_mth'] = pd.to_datetime(temp_pre_c['time'], format='%Y-%m', dayfirst=True).map(lambda x: x.strftime('%Y-%m'))

df_merged = pd.merge(temp_pre_c, disp_matxs, on=['yr_mth', 'admin1'], how='inner')

In [277]:
#add a column with the sum of the displacements
df_merged['sum_disp'] = df_merged.iloc[:, -18:].sum(axis=1)

In [278]:
#df_merged.to_csv(r"/home/sara/Documenti/GitHub/Climate-and-conflict/csv/df_lags_2016_n4c_disp_d.csv", index=False)

## Population density 

In [279]:
population_density = population_density.groupby('NAME2')[['UN_2000_DS','UN_2005_DS','UN_2010_DS','UN_2015_DS','UN_2020_DS']].sum()
population_density.rename(index={'Juba Dhexe (Middle)':'Middle_Juba','Juba Hoose (Lower)':'Lower_Juba','Shabelle Dhexe (Middle)':'Middle_Shabelle','Shabelle Hoose (Lower)':'Lower_Shabelle','Woqooyi Galbeed':'Woqooyi_Galbeed'},inplace=True)	

population_density['UN_2001_DS']=population_density['UN_2000_DS']+(population_density['UN_2005_DS']-population_density['UN_2000_DS'])/5
population_density['UN_2002_DS']=population_density['UN_2001_DS']+(population_density['UN_2005_DS']-population_density['UN_2000_DS'])/5
population_density['UN_2003_DS']=population_density['UN_2002_DS']+(population_density['UN_2005_DS']-population_density['UN_2000_DS'])/5
population_density['UN_2004_DS']=population_density['UN_2003_DS']+(population_density['UN_2005_DS']-population_density['UN_2000_DS'])/5
population_density['UN_2006_DS']=population_density['UN_2005_DS']+(population_density['UN_2010_DS']-population_density['UN_2005_DS'])/5
population_density['UN_2007_DS']=population_density['UN_2006_DS']+(population_density['UN_2010_DS']-population_density['UN_2005_DS'])/5
population_density['UN_2008_DS']=population_density['UN_2007_DS']+(population_density['UN_2010_DS']-population_density['UN_2005_DS'])/5
population_density['UN_2009_DS']=population_density['UN_2008_DS']+(population_density['UN_2010_DS']-population_density['UN_2005_DS'])/5
population_density['UN_2011_DS']=population_density['UN_2010_DS']+(population_density['UN_2015_DS']-population_density['UN_2010_DS'])/5
population_density['UN_2012_DS']=population_density['UN_2011_DS']+(population_density['UN_2015_DS']-population_density['UN_2010_DS'])/5
population_density['UN_2013_DS']=population_density['UN_2012_DS']+(population_density['UN_2015_DS']-population_density['UN_2010_DS'])/5
population_density['UN_2014_DS']=population_density['UN_2013_DS']+(population_density['UN_2015_DS']-population_density['UN_2010_DS'])/5
population_density['UN_2016_DS']=population_density['UN_2015_DS']+(population_density['UN_2020_DS']-population_density['UN_2015_DS'])/5
population_density['UN_2017_DS']=population_density['UN_2016_DS']+(population_density['UN_2020_DS']-population_density['UN_2015_DS'])/5
population_density['UN_2018_DS']=population_density['UN_2017_DS']+(population_density['UN_2020_DS']-population_density['UN_2015_DS'])/5
population_density['UN_2019_DS']=population_density['UN_2018_DS']+(population_density['UN_2020_DS']-population_density['UN_2015_DS'])/5
population_density['UN_2021_DS']=population_density['UN_2020_DS']+(population_density['UN_2020_DS']-population_density['UN_2015_DS'])/5
population_density['UN_2022_DS']=population_density['UN_2021_DS']+(population_density['UN_2020_DS']-population_density['UN_2015_DS'])/5

In [280]:
population_density=population_density[['UN_2000_DS','UN_2001_DS','UN_2002_DS','UN_2003_DS','UN_2004_DS','UN_2005_DS','UN_2006_DS','UN_2007_DS','UN_2008_DS','UN_2009_DS','UN_2010_DS','UN_2011_DS','UN_2012_DS','UN_2013_DS','UN_2014_DS','UN_2015_DS','UN_2016_DS','UN_2017_DS','UN_2018_DS','UN_2019_DS','UN_2020_DS','UN_2021_DS','UN_2022_DS']]
population_density.reset_index(inplace=True)
population_density.rename(columns={'NAME2':'admin1','UN_2000_DS':'2000','UN_2001_DS':'2001','UN_2002_DS':'2002','UN_2003_DS':'2003','UN_2004_DS':'2004','UN_2005_DS':'2005','UN_2006_DS':'2006','UN_2007_DS':'2007','UN_2008_DS':'2008','UN_2009_DS':'2009','UN_2010_DS':'2010','UN_2011_DS':'2011','UN_2012_DS':'2012','UN_2013_DS':'2013','UN_2014_DS':'2014','UN_2015_DS':'2015','UN_2016_DS':'2016','UN_2017_DS':'2017','UN_2018_DS':'2018','UN_2019_DS':'2019','UN_2020_DS':'2020','UN_2021_DS':'2021','UN_2022_DS':'2022'},inplace=True)

#pivot the table to have the years as rows
population_density=population_density.melt(id_vars=['admin1'],value_vars=['2000','2001','2002','2003','2004','2005','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015','2016','2017','2018','2019','2020','2021','2022'],var_name='year',value_name='population_density')
population_density['year']=population_density['year']

df_merged=pd.merge(df_merged,population_density,how='left',on=['year','admin1'])

In [281]:
#df_merged = df_merged[df_merged['conflicts']!=0]
df_merged['conflicts_log'] = np.log(df_merged['conflicts']+1)
df_merged['sum_disp_log'] = np.log(df_merged['sum_disp']+1) 

## Normalization

In [282]:
df_merged.columns

Index(['index', 'time', 'admin1', 'tmx', 'pre', 'conflicts', 'year', 'month',
       'TA', 'PA', 'DL', 'TA_lag1', 'TA_lag2', 'TA_lag3', 'TA_lag4', 'TA_lag5',
       'TA_lag6', 'PA_lag1', 'PA_lag2', 'PA_lag3', 'PA_lag4', 'PA_lag5',
       'PA_lag6', 'DL_lag1', 'DL_lag2', 'DL_lag3', 'DL_lag4', 'DL_lag5',
       'DL_lag6', 'conflicts_lag1', 'conflicts_lag2', 'conflicts_lag3',
       'conflicts_lag4', 'conflicts_lag5', 'conflicts_lag6', 'month_name',
       'yr_mth', 'Awdal', 'Bakool', 'Banadir', 'Bari', 'Bay', 'Galgaduud',
       'Gedo', 'Hiraan', 'Lower_Juba', 'Lower_Shabelle', 'Middle_Juba',
       'Middle_Shabelle', 'Mudug', 'Nugaal', 'Sanaag', 'Sool', 'Togdheer',
       'Woqooyi_Galbeed', 'sum_disp', 'population_density', 'conflicts_log',
       'sum_disp_log'],
      dtype='object')

In [283]:
col1=['tmx', 'pre', 'conflicts', 'TA', 'PA', 'DL', 
       'TA_lag1', 'TA_lag2', 'TA_lag3', 'TA_lag4', 'TA_lag5', 'TA_lag6', 
       'PA_lag1', 'PA_lag2', 'PA_lag3', 'PA_lag4', 'PA_lag5', 'PA_lag6', 
       'DL_lag1', 'DL_lag2', 'DL_lag3', 'DL_lag4', 'DL_lag5', 'DL_lag6', 
        'conflicts_lag1', 'conflicts_lag2', 'conflicts_lag3',
       'conflicts_lag4', 'conflicts_lag5', 'conflicts_lag6',
       'sum_disp', 'population_density', 'conflicts_log', 'sum_disp_log']

min_max_scaler = MinMaxScaler().fit(df_merged[col1])
df_merged[col1] = min_max_scaler.transform(df_merged[col1])

In [284]:
df_merged.head()

Unnamed: 0,index,time,admin1,tmx,pre,conflicts,year,month,TA,PA,DL,TA_lag1,TA_lag2,TA_lag3,TA_lag4,TA_lag5,TA_lag6,PA_lag1,PA_lag2,PA_lag3,PA_lag4,PA_lag5,PA_lag6,DL_lag1,DL_lag2,DL_lag3,DL_lag4,DL_lag5,DL_lag6,conflicts_lag1,conflicts_lag2,conflicts_lag3,conflicts_lag4,conflicts_lag5,conflicts_lag6,month_name,yr_mth,Awdal,Bakool,Banadir,Bari,Bay,Galgaduud,Gedo,Hiraan,Lower_Juba,Lower_Shabelle,Middle_Juba,Middle_Shabelle,Mudug,Nugaal,Sanaag,Sool,Togdheer,Woqooyi_Galbeed,sum_disp,population_density,conflicts_log,sum_disp_log
0,1380,2016-01,Awdal,0.29147,0.010244,0.041667,2016,1,0.760128,0.237045,0.522989,0.810713,0.886201,0.835072,0.758762,0.661518,0.550162,0.254084,0.256497,0.298663,0.465255,0.562534,0.597256,0.520231,0.517442,0.51462,0.511765,0.508876,0.505952,0.041667,0.0625,0.09375,0.0625,0.010417,0.010417,,2016-01,0,7,26,0,13,14,0,14,0,0,9,0,0,6,0,0,0,0,0.000757,0.016448,0.351812,0.385427
1,1380,2016-01,Bakool,0.617061,0.003803,0.0625,2016,1,0.717408,0.514353,0.522989,0.658098,0.640663,0.580636,0.521238,0.439964,0.395316,0.504205,0.547593,0.538138,0.410523,0.412084,0.420274,0.520231,0.517442,0.51462,0.511765,0.508876,0.505952,0.083333,0.041667,0.020833,0.125,0.072917,0.1875,,2016-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.011279,0.425362,0.0
2,1380,2016-01,Banadir,0.523823,0.017259,0.375,2016,1,0.50488,0.423418,0.068966,0.439621,0.423554,0.396232,0.365869,0.307324,0.269572,0.404936,0.415221,0.402632,0.394172,0.39953,0.408024,0.063584,0.05814,0.052632,0.047059,0.04142,0.035714,0.34375,0.4375,0.354167,0.3125,0.34375,0.510417,,2016-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.849495,0.789322,0.0
3,1380,2016-01,Bari,0.216547,0.011051,0.0625,2016,1,0.985403,0.393283,0.126437,0.985955,1.0,0.900732,0.771571,0.649713,0.579903,0.372015,0.435117,0.431892,0.4365,0.432406,0.375493,0.121387,0.116279,0.111111,0.105882,0.100592,0.095238,0.083333,0.041667,0.020833,0.072917,0.052083,0.010417,,2016-01,0,0,23,0,0,5,0,0,0,0,0,0,0,3,0,0,0,0,0.000264,0.005697,0.425362,0.296854
4,1380,2016-01,Bay,0.610189,0.006915,0.166667,2016,1,0.575055,0.484029,0.522989,0.509936,0.493993,0.466204,0.426781,0.363001,0.337517,0.489873,0.53979,0.530606,0.413266,0.410584,0.418324,0.520231,0.517442,0.51462,0.511765,0.508876,0.505952,0.166667,0.09375,0.197917,0.375,0.34375,0.447917,,2016-01,0,14,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00097,0.010683,0.619321,0.406423


In [285]:
df_merged.to_csv(r"/home/sara/Documenti/GitHub/Climate-and-conflict/csv/df_norm_log_d.csv", index=False)

In [286]:
#read csv
pd.read_csv(r"/home/sara/Documenti/GitHub/Climate-and-conflict/csv/df_new_all.csv")

Unnamed: 0.1,Unnamed: 0,time,Previous (Departure) Region,admin1_dep,tmx_dep,pre_dep,conflicts_dep,year,TA_dep,PA_dep,DL_dep,TA_lag1_dep,TA_lag2_dep,TA_lag3_dep,TA_lag4_dep,TA_lag5_dep,TA_lag6_dep,PA_lag1_dep,PA_lag2_dep,PA_lag3_dep,PA_lag4_dep,PA_lag5_dep,PA_lag6_dep,DL_lag1_dep,DL_lag2_dep,DL_lag3_dep,DL_lag4_dep,DL_lag5_dep,DL_lag6_dep,conflicts_lag1_dep,conflicts_lag2_dep,conflicts_lag3_dep,conflicts_lag4_dep,conflicts_lag5_dep,conflicts_lag6_dep,sum_disp_dep,population_density_dep,admin1_arr,tmx_arr,pre_arr,conflicts_arr,TA_arr,PA_arr,DL_arr,TA_lag1_arr,TA_lag2_arr,TA_lag3_arr,TA_lag4_arr,TA_lag5_arr,TA_lag6_arr,PA_lag1_arr,PA_lag2_arr,PA_lag3_arr,PA_lag4_arr,PA_lag5_arr,PA_lag6_arr,DL_lag1_arr,DL_lag2_arr,DL_lag3_arr,DL_lag4_arr,DL_lag5_arr,DL_lag6_arr,conflicts_lag1_arr,conflicts_lag2_arr,conflicts_lag3_arr,conflicts_lag4_arr,conflicts_lag5_arr,conflicts_lag6_arr,sum_disp_arr,population_density_arr,inv_distance,Current (Arrival) Region,Displacements,Disp_log,inv_distance_2,gdp_mean_arr,accessibility_to_cities_mean_arr,gdp_mean_dep,accessibility_to_cities_mean_dep
0,0,2016-01,Bakool,Bakool,0.617061,0.003803,0.062500,2016,0.717408,0.514353,0.522989,0.658098,0.640663,0.580636,0.521238,0.439964,0.395316,0.504205,0.547593,0.538138,0.410523,0.412084,0.420274,0.520231,0.517442,0.514620,0.511765,0.508876,0.505952,0.083333,0.041667,0.020833,0.125000,0.072917,0.187500,0.000000,0.011279,Awdal,0.237221,0.010244,0.041667,0.760354,0.237045,0.522989,0.814500,0.881430,0.829161,0.74865,0.661518,0.550162,0.254084,0.256497,0.298663,0.465255,0.562534,0.597256,0.520231,0.517442,0.514620,0.511765,0.508876,0.505952,0.041667,0.062500,0.093750,0.062500,0.010417,0.010417,0.001078,0.016368,0.122535,Awdal,7,1.945910,0.031743,0.004081,262.064797,0.000543,190.209430
1,1,2016-01,Banadir,Banadir,0.523823,0.017259,0.375000,2016,0.504880,0.423418,0.068966,0.439621,0.423554,0.396232,0.365869,0.307324,0.269572,0.404936,0.415221,0.402632,0.394172,0.399530,0.408024,0.063584,0.058140,0.052632,0.047059,0.041420,0.035714,0.343750,0.437500,0.354167,0.312500,0.343750,0.510417,0.000000,0.849495,Awdal,0.237221,0.010244,0.041667,0.760354,0.237045,0.522989,0.814500,0.881430,0.829161,0.74865,0.661518,0.550162,0.254084,0.256497,0.298663,0.465255,0.562534,0.597256,0.520231,0.517442,0.514620,0.511765,0.508876,0.505952,0.041667,0.062500,0.093750,0.062500,0.010417,0.010417,0.001078,0.016368,0.063982,Awdal,26,3.258097,0.013411,0.004081,262.064797,1.000000,16.304086
2,3,2016-01,Bay,Bay,0.610189,0.006915,0.166667,2016,0.575055,0.484029,0.522989,0.509936,0.493993,0.466204,0.426781,0.363001,0.337517,0.489873,0.539790,0.530606,0.413266,0.410584,0.418324,0.520231,0.517442,0.514620,0.511765,0.508876,0.505952,0.166667,0.093750,0.197917,0.375000,0.343750,0.447917,0.000970,0.010683,Awdal,0.237221,0.010244,0.041667,0.760354,0.237045,0.522989,0.814500,0.881430,0.829161,0.74865,0.661518,0.550162,0.254084,0.256497,0.298663,0.465255,0.562534,0.597256,0.520231,0.517442,0.514620,0.511765,0.508876,0.505952,0.041667,0.062500,0.093750,0.062500,0.010417,0.010417,0.001078,0.016368,0.081103,Awdal,13,2.564949,0.018173,0.004081,262.064797,0.002061,167.612994
3,4,2016-01,Galgaduud,Galgaduud,0.528045,0.011966,0.052083,2016,0.623759,0.434584,0.068966,0.573457,0.556311,0.485944,0.437810,0.358285,0.289011,0.394826,0.406395,0.406514,0.402557,0.405573,0.410642,0.063584,0.058140,0.052632,0.047059,0.041420,0.035714,0.052083,0.062500,0.093750,0.031250,0.072917,0.083333,0.000000,0.004010,Awdal,0.237221,0.010244,0.041667,0.760354,0.237045,0.522989,0.814500,0.881430,0.829161,0.74865,0.661518,0.550162,0.254084,0.256497,0.298663,0.465255,0.562534,0.597256,0.520231,0.517442,0.514620,0.511765,0.508876,0.505952,0.041667,0.062500,0.093750,0.062500,0.010417,0.010417,0.001078,0.016368,0.101782,Awdal,14,2.639057,0.024583,0.004081,262.064797,0.000205,266.761256
4,6,2016-01,Hiraan,Hiraan,0.585780,0.007367,0.104167,2016,0.648621,0.441761,0.235632,0.582886,0.560894,0.493842,0.445272,0.369380,0.316691,0.414734,0.426480,0.417190,0.397504,0.401285,0.411168,0.231214,0.226744,0.222222,0.217647,0.213018,0.208333,0.145833,0.125000,0.156250,0.135417,0.281250,0.187500,0.000000,0.003791,Awdal,0.237221,0.010244,0.041667,0.760354,0.237045,0.522989,0.814500,0.881430,0.829161,0.74865,0.661518,0.550162,0.254084,0.256497,0.298663,0.465255,0.562534,0.597256,0.520231,0.517442,0.514620,0.511765,0.508876,0.505952,0.041667,0.062500,0.093750,0.062500,0.010417,0.010417,0.001078,0.016368,0.106979,Awdal,14,2.639057,0.026308,0.004081,262.064797,0.000769,291.044337
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3625,25683,2022-11,Nugaal,Nugaal,0.392847,0.042517,0.020833,2022,0.259398,0.435348,0.005747,0.211962,0.234118,0.424259,0.409350,0.386016,0.379782,0.443404,0.447725,0.443541,0.386629,0.381517,0.407861,0.000000,0.063953,0.058480,0.052941,0.047337,0.041667,0.031250,0.010417,0.000000,0.000000,0.031250,0.010417,0.020191,0.000486,Woqooyi Galbeed,0.291392,0.023752,0.000000,0.546742,0.329900,0.091954,0.415919,0.278543,0.436579,0.52941,0.598137,0.682034,0.306474,0.477950,0.448624,0.493343,0.500507,0.326908,0.086705,0.081395,0.076023,0.070588,0.065089,0.059524,0.010417,0.010417,0.020833,0.020833,0.052083,0.041667,0.002545,0.013016,0.128605,Woqooyi Galbeed,8,2.079442,0.033975,0.012601,214.678669,0.000581,360.458176
3626,25684,2022-11,Sanaag,Sanaag,0.188695,0.016458,0.020833,2022,0.389025,0.391105,0.091954,0.291555,0.272769,0.513784,0.526762,0.527814,0.545890,0.393883,0.379633,0.380322,0.382028,0.382387,0.398549,0.086705,0.081395,0.076023,0.070588,0.065089,0.059524,0.010417,0.010417,0.031250,0.000000,0.000000,0.000000,0.009577,0.000946,Woqooyi Galbeed,0.291392,0.023752,0.000000,0.546742,0.329900,0.091954,0.415919,0.278543,0.436579,0.52941,0.598137,0.682034,0.306474,0.477950,0.448624,0.493343,0.500507,0.326908,0.086705,0.081395,0.076023,0.070588,0.065089,0.059524,0.010417,0.010417,0.020833,0.020833,0.052083,0.041667,0.002545,0.013016,0.225812,Woqooyi Galbeed,5,1.609438,0.078190,0.012601,214.678669,0.000000,449.654261
3627,25686,2022-11,Togdheer,Togdheer,0.272230,0.030782,0.010417,2022,0.491630,0.360478,0.091954,0.376194,0.275836,0.450123,0.515149,0.556232,0.636474,0.359465,0.379658,0.375028,0.403000,0.401428,0.381064,0.086705,0.081395,0.076023,0.070588,0.065089,0.059524,0.020833,0.000000,0.041667,0.010417,0.000000,0.000000,0.002713,0.006629,Woqooyi Galbeed,0.291392,0.023752,0.000000,0.546742,0.329900,0.091954,0.415919,0.278543,0.436579,0.52941,0.598137,0.682034,0.306474,0.477950,0.448624,0.493343,0.500507,0.326908,0.086705,0.081395,0.076023,0.070588,0.065089,0.059524,0.010417,0.010417,0.020833,0.020833,0.052083,0.041667,0.002545,0.013016,0.681826,Woqooyi Galbeed,47,3.850148,0.498639,0.012601,214.678669,0.003469,172.529407
3628,25702,2022-12,Sool,Sool,0.206989,0.000316,0.083333,2022,0.445744,0.374292,0.097701,0.322435,0.256761,0.238749,0.453353,0.469606,0.483601,0.419604,0.425241,0.421952,0.417643,0.383861,0.381224,0.092486,0.087209,0.081871,0.076471,0.071006,0.065476,0.020833,0.020833,0.010417,0.041667,0.010417,0.031250,0.005817,0.000796,Woqooyi Galbeed,0.167117,0.001353,0.020833,0.641329,0.308825,0.097701,0.494653,0.399282,0.282756,0.43327,0.548343,0.598137,0.329900,0.306474,0.477950,0.448624,0.493343,0.500507,0.092486,0.087209,0.081871,0.076471,0.071006,0.065476,0.000000,0.010417,0.010417,0.020833,0.020833,0.052083,0.001667,0.013016,0.232572,Woqooyi Galbeed,13,2.564949,0.081858,0.012601,214.678669,0.000223,307.865944


In [287]:
df_d = pd.read_csv(r"/home/sara/Documenti/GitHub/Climate-and-conflict/csv/df_lags_2016_n4c_disp_d.csv")
df_e = pd.read_csv(r"/home/sara/Documenti/GitHub/Climate-and-conflict/csv/df_lags_2016_n4_disp_d_pop_norm.csv")

In [291]:
pd.read_csv("/home/sara/Documenti/GitHub/Climate-and-conflict/csv/df_lags_norm_regr.csv")

Unnamed: 0,index,time,admin1,tmx,pre,conflicts,year,month,TA,PA,DL,TA_lag1,TA_lag2,TA_lag3,TA_lag4,TA_lag5,TA_lag6,PA_lag1,PA_lag2,PA_lag3,PA_lag4,PA_lag5,PA_lag6,DL_lag1,DL_lag2,DL_lag3,DL_lag4,DL_lag5,DL_lag6,conflicts_lag1,conflicts_lag2,conflicts_lag3,conflicts_lag4,conflicts_lag5,conflicts_lag6,month_name,yr_mth,Awdal,Bakool,Banadir,Bari,Bay,Galgaduud,Gedo,Hiraan,Lower_Juba,Lower_Shabelle,Middle_Juba,Middle_Shabelle,Mudug,Nugaal,Sanaag,Sool,Togdheer,Woqooyi_Galbeed,sum_disp,population_density,conflicts_log
0,1380,2016-01,Awdal,0.291470,0.010244,0.041667,2016,1,0.760128,0.237045,0.522989,0.810713,0.886201,0.835072,0.758762,0.661518,0.550162,0.254084,0.256497,0.298663,0.465255,0.562534,0.597256,0.520231,0.517442,0.514620,0.511765,0.508876,0.505952,0.041667,0.062500,0.093750,0.062500,0.010417,0.010417,,2016-01,0,7,26,0,13,14,0,14,0,0,9,0,0,6,0,0,0,0,0.000757,0.016448,1.386294
1,1380,2016-01,Bakool,0.617061,0.003803,0.062500,2016,1,0.717408,0.514353,0.522989,0.658098,0.640663,0.580636,0.521238,0.439964,0.395316,0.504205,0.547593,0.538138,0.410523,0.412084,0.420274,0.520231,0.517442,0.514620,0.511765,0.508876,0.505952,0.083333,0.041667,0.020833,0.125000,0.072917,0.187500,,2016-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.011279,1.791759
2,1380,2016-01,Banadir,0.523823,0.017259,0.375000,2016,1,0.504880,0.423418,0.068966,0.439621,0.423554,0.396232,0.365869,0.307324,0.269572,0.404936,0.415221,0.402632,0.394172,0.399530,0.408024,0.063584,0.058140,0.052632,0.047059,0.041420,0.035714,0.343750,0.437500,0.354167,0.312500,0.343750,0.510417,,2016-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000000,0.849495,3.583519
3,1380,2016-01,Bari,0.216547,0.011051,0.062500,2016,1,0.985403,0.393283,0.126437,0.985955,1.000000,0.900732,0.771571,0.649713,0.579903,0.372015,0.435117,0.431892,0.436500,0.432406,0.375493,0.121387,0.116279,0.111111,0.105882,0.100592,0.095238,0.083333,0.041667,0.020833,0.072917,0.052083,0.010417,,2016-01,0,0,23,0,0,5,0,0,0,0,0,0,0,3,0,0,0,0,0.000264,0.005697,1.791759
4,1380,2016-01,Bay,0.610189,0.006915,0.166667,2016,1,0.575055,0.484029,0.522989,0.509936,0.493993,0.466204,0.426781,0.363001,0.337517,0.489873,0.539790,0.530606,0.413266,0.410584,0.418324,0.520231,0.517442,0.514620,0.511765,0.508876,0.505952,0.166667,0.093750,0.197917,0.375000,0.343750,0.447917,,2016-01,0,14,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000970,0.010683,2.772589
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1507,1463,2022-12,Nugaal,0.285550,0.010259,0.010417,2022,12,0.322265,0.373878,0.011494,0.235851,0.206728,0.234118,0.424259,0.409350,0.386016,0.435348,0.443404,0.447725,0.443541,0.386629,0.381517,0.005780,0.000000,0.064327,0.058824,0.053254,0.047619,0.020833,0.031250,0.010417,0.000000,0.000000,0.031250,,2022-12,0,0,0,0,0,0,0,0,0,0,0,0,0,1378,0,0,0,0,0.011720,0.000486,0.000000
1508,1463,2022-12,Sanaag,0.079754,0.002756,0.000000,2022,12,0.490590,0.377120,0.097701,0.353711,0.284356,0.272769,0.513784,0.526762,0.527814,0.391105,0.393883,0.379633,0.380322,0.382028,0.382387,0.092486,0.087209,0.081871,0.076471,0.071006,0.065476,0.020833,0.010417,0.010417,0.031250,0.000000,0.000000,,2022-12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,465,0,0,0,0.003955,0.000946,-inf
1509,1463,2022-12,Sool,0.206989,0.000316,0.083333,2022,12,0.445744,0.374292,0.097701,0.322435,0.256761,0.238749,0.453353,0.469606,0.483601,0.419604,0.425241,0.421952,0.417643,0.383861,0.381224,0.092486,0.087209,0.081871,0.076471,0.071006,0.065476,0.020833,0.020833,0.010417,0.041667,0.010417,0.031250,,2022-12,0,0,0,0,0,0,0,0,0,0,0,0,170,0,0,514,0,0,0.005817,0.000796,2.079442
1510,1463,2022-12,Togdheer,0.183433,0.001294,0.000000,2022,12,0.581094,0.354426,0.097701,0.447001,0.366905,0.275836,0.450123,0.515149,0.556232,0.360478,0.359465,0.379658,0.375028,0.403000,0.401428,0.092486,0.087209,0.081871,0.076471,0.071006,0.065476,0.010417,0.020833,0.000000,0.041667,0.010417,0.000000,,2022-12,0,0,0,0,0,0,0,0,0,0,0,0,142,0,0,83,366,0,0.005026,0.006629,-inf


In [290]:
#reset print options
df_merged.head()

Unnamed: 0,index,time,admin1,tmx,pre,conflicts,year,month,TA,PA,DL,TA_lag1,TA_lag2,TA_lag3,TA_lag4,TA_lag5,TA_lag6,PA_lag1,PA_lag2,PA_lag3,PA_lag4,PA_lag5,PA_lag6,DL_lag1,DL_lag2,DL_lag3,DL_lag4,DL_lag5,DL_lag6,conflicts_lag1,conflicts_lag2,conflicts_lag3,conflicts_lag4,conflicts_lag5,conflicts_lag6,month_name,yr_mth,Awdal,Bakool,Banadir,Bari,Bay,Galgaduud,Gedo,Hiraan,Lower_Juba,Lower_Shabelle,Middle_Juba,Middle_Shabelle,Mudug,Nugaal,Sanaag,Sool,Togdheer,Woqooyi_Galbeed,sum_disp,population_density,conflicts_log,sum_disp_log
0,1380,2016-01,Awdal,0.29147,0.010244,0.041667,2016,1,0.760128,0.237045,0.522989,0.810713,0.886201,0.835072,0.758762,0.661518,0.550162,0.254084,0.256497,0.298663,0.465255,0.562534,0.597256,0.520231,0.517442,0.51462,0.511765,0.508876,0.505952,0.041667,0.0625,0.09375,0.0625,0.010417,0.010417,,2016-01,0,7,26,0,13,14,0,14,0,0,9,0,0,6,0,0,0,0,0.000757,0.016448,0.351812,0.385427
1,1380,2016-01,Bakool,0.617061,0.003803,0.0625,2016,1,0.717408,0.514353,0.522989,0.658098,0.640663,0.580636,0.521238,0.439964,0.395316,0.504205,0.547593,0.538138,0.410523,0.412084,0.420274,0.520231,0.517442,0.51462,0.511765,0.508876,0.505952,0.083333,0.041667,0.020833,0.125,0.072917,0.1875,,2016-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.011279,0.425362,0.0
2,1380,2016-01,Banadir,0.523823,0.017259,0.375,2016,1,0.50488,0.423418,0.068966,0.439621,0.423554,0.396232,0.365869,0.307324,0.269572,0.404936,0.415221,0.402632,0.394172,0.39953,0.408024,0.063584,0.05814,0.052632,0.047059,0.04142,0.035714,0.34375,0.4375,0.354167,0.3125,0.34375,0.510417,,2016-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.849495,0.789322,0.0
3,1380,2016-01,Bari,0.216547,0.011051,0.0625,2016,1,0.985403,0.393283,0.126437,0.985955,1.0,0.900732,0.771571,0.649713,0.579903,0.372015,0.435117,0.431892,0.4365,0.432406,0.375493,0.121387,0.116279,0.111111,0.105882,0.100592,0.095238,0.083333,0.041667,0.020833,0.072917,0.052083,0.010417,,2016-01,0,0,23,0,0,5,0,0,0,0,0,0,0,3,0,0,0,0,0.000264,0.005697,0.425362,0.296854
4,1380,2016-01,Bay,0.610189,0.006915,0.166667,2016,1,0.575055,0.484029,0.522989,0.509936,0.493993,0.466204,0.426781,0.363001,0.337517,0.489873,0.53979,0.530606,0.413266,0.410584,0.418324,0.520231,0.517442,0.51462,0.511765,0.508876,0.505952,0.166667,0.09375,0.197917,0.375,0.34375,0.447917,,2016-01,0,14,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00097,0.010683,0.619321,0.406423


In [289]:
df_e.head()

Unnamed: 0,index,time,admin1,year,month,TA,PA,DL,TA_lag1,TA_lag2,TA_lag3,TA_lag4,TA_lag5,TA_lag6,PA_lag1,PA_lag2,PA_lag3,PA_lag4,PA_lag5,PA_lag6,DL_lag1,DL_lag2,DL_lag3,DL_lag4,DL_lag5,DL_lag6,conflicts,Awdal_x,Bakool_x,Banadir_x,Bari_x,Bay_x,Galgaduud_x,Gedo_x,Hiraan_x,Lower_Juba_x,Lower_Shabelle_x,Middle_Juba_x,Middle_Shabelle_x,Mudug_x,Nugaal_x,Sanaag_x,Sool_x,Togdheer_x,Woqooyi_Galbeed_x,April,August,December,February,January,July,June,March,May,November,October,September,month_name,yr_mth,Awdal_y,Bakool_y,Banadir_y,Bari_y,Bay_y,Galgaduud_y,Gedo_y,Hiraan_y,Lower_Juba_y,Lower_Shabelle_y,Middle_Juba_y,Middle_Shabelle_y,Mudug_y,Nugaal_y,Sanaag_y,Sool_y,Togdheer_y,Woqooyi_Galbeed_y,sum_disp,population_density,conflicts_log
0,1380,2016-01,Awdal,2016,1,0.760128,0.237045,0.522989,0.810713,0.886201,0.835072,0.758762,0.661518,0.550162,0.254084,0.256497,0.298663,0.465255,0.562534,0.597256,0.520231,0.517442,0.51462,0.511765,0.508876,0.505952,0.041667,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,January,2016-01,0,7,26,0,13,14,0,14,0,0,9,0,0,6,0,0,0,0,0.000757,0.016448,1.609438
1,1380,2016-01,Bakool,2016,1,0.717408,0.514353,0.522989,0.658098,0.640663,0.580636,0.521238,0.439964,0.395316,0.504205,0.547593,0.538138,0.410523,0.412084,0.420274,0.520231,0.517442,0.51462,0.511765,0.508876,0.505952,0.0625,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,January,2016-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.011279,1.94591
2,1380,2016-01,Banadir,2016,1,0.50488,0.423418,0.068966,0.439621,0.423554,0.396232,0.365869,0.307324,0.269572,0.404936,0.415221,0.402632,0.394172,0.39953,0.408024,0.063584,0.05814,0.052632,0.047059,0.04142,0.035714,0.375,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,January,2016-01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.849495,3.610918
3,1380,2016-01,Bari,2016,1,0.985403,0.393283,0.126437,0.985955,1.0,0.900732,0.771571,0.649713,0.579903,0.372015,0.435117,0.431892,0.4365,0.432406,0.375493,0.121387,0.116279,0.111111,0.105882,0.100592,0.095238,0.0625,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,January,2016-01,0,0,23,0,0,5,0,0,0,0,0,0,0,3,0,0,0,0,0.000264,0.005697,1.94591
4,1380,2016-01,Bay,2016,1,0.575055,0.484029,0.522989,0.509936,0.493993,0.466204,0.426781,0.363001,0.337517,0.489873,0.53979,0.530606,0.413266,0.410584,0.418324,0.520231,0.517442,0.51462,0.511765,0.508876,0.505952,0.166667,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,January,2016-01,0,14,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00097,0.010683,2.833213
