In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np
import xarray as xr
import rioxarray as rxr
import rasterio as rio
import rasterstats as rstats
import regionmask
import os
import re

In [3]:
source_dir = '/mnt/c/Users/natra/Documents/Research/Iraq-post-conflict-rebel-governance'
iom_adm3_mapping = source_dir + '/data/iom/iom_adm3_mapping.csv'
iom_data_file = source_dir + '/data/iom/iom_raw.xlsx'
ag_indices_file = source_dir + '/data/satellite_indices/evi/evi_mask_mean_max_min/evi_stats_Jan2001-Dec2022.csv'

In [3]:
iom_adm3_map = pd.read_csv(iom_adm3_mapping)
iom_data = pd.read_excel(iom_data_file, sheet_name="Full Dataset",header=1)
ag_data = pd.read_csv(ag_indices_file)

## xSub Data - Monthly Conflict Events

In [19]:
xsub_conflict_file = source_dir + '/data/conflict/xSub_MELTT1km2dB_IRQ_event.csv'
xsub_conflict = pd.read_csv(xsub_conflict_file)
print(xsub_conflict.columns)
xsub_conflict.head(2)

Index(['SOURCE', 'CONFLICT', 'COWN', 'COWC', 'ISO3', 'DATE', 'LAT', 'LONG',
       'GEOPRECISION', 'TIMEPRECISION', 'INITIATOR_SIDEA', 'INITIATOR_SIDEB',
       'INITIATOR_SIDEC', 'INITIATOR_SIDED', 'TARGET_SIDEA', 'TARGET_SIDEB',
       'TARGET_SIDEC', 'TARGET_SIDED', 'DYAD_A_A', 'DYAD_A_B', 'DYAD_A_C',
       'DYAD_A_D', 'DYAD_B_A', 'DYAD_B_B', 'DYAD_B_C', 'DYAD_B_D', 'DYAD_C_A',
       'DYAD_C_B', 'DYAD_C_C', 'DYAD_C_D', 'DYAD_D_A', 'DYAD_D_B', 'DYAD_D_C',
       'DYAD_D_D', 'ACTION_ANY', 'ACTION_IND', 'ACTION_DIR', 'ACTION_PRT',
       'SIDEA_ANY', 'SIDEA_IND', 'SIDEA_DIR', 'SIDEA_PRT', 'SIDEB_ANY',
       'SIDEB_IND', 'SIDEB_DIR', 'SIDEB_PRT', 'SIDEC_ANY', 'SIDEC_IND',
       'SIDEC_DIR', 'SIDEC_PRT', 'SIDED_ANY', 'SIDED_IND', 'SIDED_DIR',
       'SIDED_PRT', 'ACTION_AIRSTRIKE', 'ACTION_AMBUSH', 'ACTION_ARMOR',
       'ACTION_ARREST', 'ACTION_ARTILLERY', 'ACTION_BOMB', 'ACTION_CHEMICAL',
       'ACTION_CIV_ABUSE', 'ACTION_DISPLACE', 'ACTION_FIREFIGHT',
       'ACTION_KIDNAP', 'ACT

Unnamed: 0,SOURCE,CONFLICT,COWN,COWC,ISO3,DATE,LAT,LONG,GEOPRECISION,TIMEPRECISION,...,ACTION_RAID,ACTION_RIOT,ACTION_RIOTCONTROL,ACTION_ROBBERY,ACTION_ROCKET,ACTION_SIEGE,ACTION_STORM,ACTION_SUICIDE,ACTION_TERROR,ACTION_UNKNOWN
0,MELTT1km2dB,Iraq,645,IRQ,IRQ,19910113,33.340582,44.400876,settlement,day,...,0,0,0,0,0,0,0,0,0,0
1,MELTT1km2dB,Iraq,645,IRQ,IRQ,19910117,33.340582,44.400876,settlement,day,...,0,0,0,0,0,0,0,0,0,0


In [20]:
xsub_tojoin = xsub_conflict.loc[:,('DATE','LAT','LONG','ACTION_IND', 'ACTION_DIR', 'ACTION_PRT')]
xsub_tojoin['date_dt'] = pd.to_datetime(xsub_tojoin['DATE'],format='%Y%m%d')
xsub_tojoin = xsub_tojoin[xsub_tojoin['date_dt']>='2001-01-01']
xsub_tojoin['geometry'] = gpd.points_from_xy(xsub_tojoin['LONG'],xsub_tojoin['LAT'])
xsub_tojoin = xsub_tojoin.drop(columns=['LAT','LONG','DATE'])
xsub_tojoin = gpd.GeoDataFrame(xsub_tojoin, geometry='geometry',crs='EPSG:4326')

xsub_tojoin.head()

Unnamed: 0,ACTION_IND,ACTION_DIR,ACTION_PRT,date_dt,geometry
175,0,0,0,2001-04-18,POINT (45.17786 34.27725)
176,0,0,0,2001-06-07,POINT (47.79747 30.53302)
177,0,0,0,2001-09-16,POINT (47.14399 31.83588)
178,1,0,0,2001-09-16,POINT (47.17528 31.84528)
179,0,0,0,2001-09-23,POINT (46.01103 35.26622)


In [21]:
# join xsub to adm3 areas
iraq_shp_adm3_loc = source_dir + '/data/geo_boundaries/irq-administrative-divisions-shapefiles/irq_admbnda_adm3_cso_20190603.shp'
iraq_shp_adm3 = gpd.read_file(iraq_shp_adm3_loc)
iraq_adm3_filt = iraq_shp_adm3.loc[:,('ADM3_EN','ADM3_PCODE','geometry')]

adm3_xsub = xsub_tojoin.sjoin(iraq_adm3_filt, how='left', predicate='intersects')
print(xsub_tojoin.shape)
print(adm3_xsub.shape)
print(adm3_xsub[adm3_xsub['ADM3_PCODE'].isna()].shape) 
adm3_xsub.head(2)

(51692, 5)
(51692, 8)
(166, 8)


Unnamed: 0,ACTION_IND,ACTION_DIR,ACTION_PRT,date_dt,geometry,index_right,ADM3_EN,ADM3_PCODE
175,0,0,0,2001-04-18,POINT (45.17786 34.27725),149.0,Jalawla,IQG10Q05N04
176,0,0,0,2001-06-07,POINT (47.79747 30.53302),170.0,Markaz Al-Basrah,IQG02Q02N02


In [23]:
# from examination of fields that don't join, it seems some are mis-identified geometrically (eg, longitude 4.3)
# others are very close / along the border of Iraq, which should be included, but some others are far outside,
# in Iran or elsewhere. Will set a max distance to avoid this 
adm3_xsub[adm3_xsub['ADM3_PCODE'].isna()].head(8)

Unnamed: 0,ACTION_IND,ACTION_DIR,ACTION_PRT,date_dt,geometry,index_right,ADM3_EN,ADM3_PCODE
23906,1,0,0,2009-03-08,POINT (4.39389 33.33861),,,
24054,1,0,0,2009-03-26,POINT (4.39389 33.33861),,,
24210,1,1,0,2009-04-16,POINT (46.58972 33.38444),,,
24694,1,0,0,2009-06-22,POINT (4.18500 33.30833),,,
24764,1,0,0,2009-07-01,POINT (4.39222 35.46806),,,
32116,1,0,0,2016-01-01,POINT (18.46667 33.42056),,,
32193,0,1,0,2016-01-04,POINT (20.76667 32.88722),,,
32239,0,1,0,2016-01-05,POINT (30.00000 36.08333),,,


In [24]:
xsub_unjoined = adm3_xsub[adm3_xsub['ADM3_PCODE'].isna()]
xsub_unjoined = xsub_unjoined.drop(columns=['index_right','ADM3_EN','ADM3_PCODE'])
xsub_unjoined = xsub_unjoined.to_crs('EPSG:3893')

iraq_adm3_newcrs = iraq_adm3_filt.to_crs('EPSG:3893')

adm3_xsub_nn = xsub_unjoined.sjoin_nearest(iraq_adm3_newcrs, how='left',max_distance=10000)
adm3_xsub_nn = adm3_xsub_nn.to_crs('EPSG:4326')
print(xsub_unjoined.shape)
print(adm3_xsub_nn.shape)
# still 132 records which did not join within 10km of the Iraq shapefile edges, so will not include these
print(adm3_xsub_nn[adm3_xsub_nn['ADM3_PCODE'].isna()].shape) 
adm3_xsub_nn.head(2)

(166, 5)
(166, 8)
(132, 8)


Unnamed: 0,ACTION_IND,ACTION_DIR,ACTION_PRT,date_dt,geometry,index_right,ADM3_EN,ADM3_PCODE
23906,1,0,0,2009-03-08,POINT (4.39389 33.33861),,,
24054,1,0,0,2009-03-26,POINT (4.39389 33.33861),,,


In [26]:
adm3_xsub_nn_notna = adm3_xsub_nn[~adm3_xsub_nn['ADM3_PCODE'].isna()]
adm3_xsub_notna = adm3_xsub[~adm3_xsub['ADM3_PCODE'].isna()]

adm3_xsub_fin  = pd.concat([adm3_xsub_notna,adm3_xsub_nn_notna])
adm3_xsub_fin = adm3_xsub_fin.drop(columns=['index_right','geometry'])
adm3_xsub_fin.head(3)

Unnamed: 0,ACTION_IND,ACTION_DIR,ACTION_PRT,date_dt,ADM3_EN,ADM3_PCODE
175,0,0,0,2001-04-18,Jalawla,IQG10Q05N04
176,0,0,0,2001-06-07,Markaz Al-Basrah,IQG02Q02N02
177,0,0,0,2001-09-16,Al-Msharah,IQG14Q03N02


In [37]:
adm3_xsub_monthly =  adm3_xsub_fin.set_index('date_dt')
adm3_xsub_monthly = adm3_xsub_monthly.groupby(['ADM3_EN','ADM3_PCODE'])[['ACTION_IND','ACTION_DIR','ACTION_PRT']].resample('ME').sum()
adm3_xsub_monthly = adm3_xsub_monthly.reset_index()
adm3_xsub_monthly = adm3_xsub_monthly.rename(columns={'date_dt':'month'})
adm3_xsub_monthly.head()

Unnamed: 0,ADM3_EN,ADM3_PCODE,month,ACTION_IND,ACTION_DIR,ACTION_PRT
0,Abi Gharaq,IQG07Q02N02,2017-12-31,1,1,0
1,Abi Gharaq,IQG07Q02N02,2018-01-31,0,0,0
2,Abi Gharaq,IQG07Q02N02,2018-02-28,0,0,0
3,Abi Gharaq,IQG07Q02N02,2018-03-31,0,0,0
4,Abi Gharaq,IQG07Q02N02,2018-04-30,0,0,0


In [38]:
adm3_xsub_file = source_dir + '/data/conflict/adm3_xsub_monthly.csv'
adm3_xsub_monthly.to_csv(adm3_xsub_file,index=False)

## GPW Population

In [5]:
# helpful conversion to df: https://gis.stackexchange.com/questions/358051/convert-raster-to-csv-with-lat-lon-and-value-columns
# processing of global tif files into iraq adm3 regions done in Google Colab Pro - 
# below I'm reading in the output of the Colab files

pop_dir = source_dir + '/data/population/'

dfs = []
for file in os.listdir(pop_dir):
    if file.endswith('.csv'):
        file_path = pop_dir + file
        df = pd.read_csv(file_path, index_col=None)
        dfs.append(df)
pop_df = pd.concat(dfs, axis=0, ignore_index=True)

In [6]:
pop_df

Unnamed: 0,ADM3_PCODE,Shape_Area,year,pop_count,pop_density
0,IQG01Q01N01,0.252123,2000,213705.02,847620.8
1,IQG01Q01N02,0.117594,2000,122815.63,1044400.0
2,IQG01Q01N03,0.056195,2000,58186.016,1035438.0
3,IQG01Q02N01,0.455597,2000,55940.53,122785.1
4,IQG01Q02N02,0.157582,2000,,
