In [147]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os
import re
import calendar
from operator import attrgetter

## Read in All Data

In [290]:
source_dir = '/mnt/c/Users/natra/Documents/Research/Iraq-post-conflict-rebel-governance'
iom_adm3_mapping = source_dir + '/data/iom/iom_adm3_mapping.csv'
iom_data_file = source_dir + '/data/iom/iom_raw.xlsx'
ag_indices_file = source_dir + '/data/satellite_indices/evi/evi_mask_mean_max_min/evi_stats_Jan2001-Dec2022.csv'
adm3_xsub_file = source_dir + '/data/conflict/adm3_xsub_monthly.csv'
pop_dir = source_dir + '/data/population/'
adm3_sunni_eth_file = source_dir + '/data/EPR/adm3_sunni_epr.csv'
territorial_control_file = source_dir + '/data/territorial-control/isis territorial control.xlsx'
ucdp_conflict_file = source_dir + '/data/conflict/ucdp_conflict_monthly.csv'

# NOTE - as-is, there is a duplicate ADM3_PCODE in this shapefile!! 
# IQG01Q05N01 is for both Abi Gharaq and Markaz Ana
iraq_shp_adm3_loc = source_dir + '/data/geo_boundaries/irq-administrative-divisions-shapefiles/irq_admbnda_adm3_cso_20190603.shp'

In [291]:
iom_adm3_map = pd.read_csv(iom_adm3_mapping)
iom_data = pd.read_excel(iom_data_file, sheet_name="Full Dataset",header=1)
ag_data = pd.read_csv(ag_indices_file)
adm3_xsub = pd.read_csv(adm3_xsub_file)
iraq_shp_adm3 = gpd.read_file(iraq_shp_adm3_loc)
adm3_sunni_eth = pd.read_csv(adm3_sunni_eth_file)
territorial_control = pd.read_excel(territorial_control_file)
ucdp_conflict = pd.read_csv(ucdp_conflict_file)

In [292]:
print(territorial_control.shape)
territorial_control = territorial_control.drop(columns=['adm2_en','adm1_en'])
terr_contrl_occ = territorial_control[territorial_control['iom_occupied']==1]
terr_contrl_occ.loc[:,'occ_start_month_st'] = pd.to_datetime(terr_contrl_occ[['start_yr','start_m']].\
                                    astype('Int64').astype(str).apply(" ".join,1),format="%Y %m")
terr_contrl_occ.loc[:,'occ_end_month_st'] = pd.to_datetime(terr_contrl_occ[['end_yr','end_m']].\
                                    astype('Int64').astype(str).apply(" ".join,1),format="%Y %m")

terr_contrl_occ.loc[:,'occ_start_month'] = (terr_contrl_occ['occ_start_month_st'] + pd.offsets.MonthEnd()).dt.strftime('%Y-%m-%d')
terr_contrl_occ.loc[:,'occ_end_month'] = (terr_contrl_occ['occ_end_month_st'] + pd.offsets.MonthEnd()).dt.strftime('%Y-%m-%d')
terr_contrl_occ.loc[:,'occ_length_mon'] = (terr_contrl_occ['occ_end_month_st'].dt.to_period('M')  - terr_contrl_occ['occ_start_month_st'].dt.to_period('M')).apply(attrgetter('n')).astype('Int64')

terr_control_full = pd.concat([terr_contrl_occ,territorial_control[territorial_control['iom_occupied']!=1]])
terr_control_full = terr_control_full.loc[:,('adm3_en','iom_occupied','occ_start_month','occ_end_month','occ_length_mon')]
print(terr_control_full.shape)

(81, 8)
(81, 5)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  terr_contrl_occ.loc[:,'occ_start_month_st'] = pd.to_datetime(terr_contrl_occ[['start_yr','start_m']].\
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  terr_contrl_occ.loc[:,'occ_end_month_st'] = pd.to_datetime(terr_contrl_occ[['end_yr','end_m']].\
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  terr_

In [294]:
# obtain desired info from IOM
iom_data_filt = iom_data.loc[:,('LocationID','DateofInterview','Q1.10 IsLocationInDisputedArea',
                                'Q1.11 WasLocationOccupied','Q1.11.1 RetakenPeriod',
                                'Q1.12 FirstMajorEthno-religiousPrior','Q1.13 FirstMajorEthno-religiousCurrent')]

In [295]:
# concatenate population year files
# processing of global tif files into iraq adm3 regions done in Google Colab Pro - 
# below I'm reading in the output of the Colab files

pop_dfs = []
for file in os.listdir(pop_dir):
    if file.endswith('.csv'):
        file_path = pop_dir + file
        df = pd.read_csv(file_path, index_col=None)
        pop_dfs.append(df)
pop_df_full = pd.concat(pop_dfs, axis=0, ignore_index=True)

## Merge all data at monthly, adm3 level

In [296]:
# get max evi at monthly level
ag_data_max = ag_data.loc[:,('ADM3_EN','max_evi','max_evi_scaled','date')]
ag_data_max['date'] = pd.to_datetime(ag_data_max['date'])
ag_data_max = ag_data_max.set_index('date')
ag_data_max_month = ag_data_max.groupby(['ADM3_EN'])[['max_evi','max_evi_scaled']].resample('ME').max()

ag_data_max_month.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,max_evi,max_evi_scaled
ADM3_EN,date,Unnamed: 2_level_1,Unnamed: 3_level_1
Abi Gharaq,2001-01-31,5416.0,0.5416
Abi Gharaq,2001-02-28,6291.0,0.6291
Abi Gharaq,2001-03-31,6513.0,0.6513
Abi Gharaq,2001-04-30,5332.0,0.5332
Abi Gharaq,2001-05-31,5147.0,0.5147


In [297]:
# get mean evi at monthly level
ag_data_avg = ag_data.loc[:,('ADM3_EN','mean_evi','mean_evi_scaled','date')]
ag_data_avg['date'] = pd.to_datetime(ag_data_avg['date'])
ag_data_avg = ag_data_avg.set_index('date')
ag_data_avg_month = ag_data_avg.groupby(['ADM3_EN'])[['mean_evi','mean_evi_scaled']].resample('ME').mean()

ag_data_avg_month.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_evi,mean_evi_scaled
ADM3_EN,date,Unnamed: 2_level_1,Unnamed: 3_level_1
Abi Gharaq,2001-01-31,2886.37349,0.288637
Abi Gharaq,2001-02-28,3094.206464,0.309421
Abi Gharaq,2001-03-31,3275.809481,0.327581
Abi Gharaq,2001-04-30,2596.503366,0.25965
Abi Gharaq,2001-05-31,2161.217474,0.216122


In [298]:
ag_data_monthly = ag_data_avg_month.join(ag_data_max_month)
print(ag_data_avg_month.shape)
print(ag_data_max_month.shape)
print(ag_data_monthly.shape)

(77616, 2)
(77616, 2)
(77616, 4)


In [299]:
# xsub only goes until 2019-12-31
adm3_xsub_mon = adm3_xsub.rename(columns={'month':'date'})
adm3_xsub_mon = adm3_xsub_mon.drop(columns=['ADM3_PCODE'])
adm3_xsub_mon = adm3_xsub_mon.set_index(['ADM3_EN','date'])
adm3_xsub_mon.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ACTION_IND,ACTION_DIR,ACTION_PRT
ADM3_EN,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Abi Gharaq,2017-12-31,1,1,0
Abi Gharaq,2018-01-31,0,0,0
Abi Gharaq,2018-02-28,0,0,0
Abi Gharaq,2018-03-31,0,0,0
Abi Gharaq,2018-04-30,0,0,0


In [300]:
# join agricultural EVI indices with xsub conflict data
adm3_iraq_data = ag_data_monthly.join(adm3_xsub_mon)
print(ag_data_monthly.shape)
print(adm3_xsub_mon.shape)
print(adm3_iraq_data.shape)
adm3_iraq_data = adm3_iraq_data.reset_index()

(77616, 4)
(23658, 3)
(77616, 7)


In [301]:
adm3_iraq_data.head()

Unnamed: 0,ADM3_EN,date,mean_evi,mean_evi_scaled,max_evi,max_evi_scaled,ACTION_IND,ACTION_DIR,ACTION_PRT
0,Abi Gharaq,2001-01-31,2886.37349,0.288637,5416.0,0.5416,,,
1,Abi Gharaq,2001-02-28,3094.206464,0.309421,6291.0,0.6291,,,
2,Abi Gharaq,2001-03-31,3275.809481,0.327581,6513.0,0.6513,,,
3,Abi Gharaq,2001-04-30,2596.503366,0.25965,5332.0,0.5332,,,
4,Abi Gharaq,2001-05-31,2161.217474,0.216122,5147.0,0.5147,,,


In [302]:
# no duplicates at this point
adm3_iraq_data[adm3_iraq_data.duplicated(subset=['date','ADM3_EN'],keep=False)]

Unnamed: 0,ADM3_EN,date,mean_evi,mean_evi_scaled,max_evi,max_evi_scaled,ACTION_IND,ACTION_DIR,ACTION_PRT


In [303]:
# make xsub conflict values 0 for un-defined time periods where we have xsub data; otherwise 
# keep NA. Have data from 2001-01-01 until 2019-12-31
min_xsub = pd.to_datetime('2001-01-01')
max_xsub = adm3_xsub.month.max()

adm3_iraq_data['ACTION_IND'] = np.where(((adm3_iraq_data['date']>=min_xsub)&(adm3_iraq_data['date']<=max_xsub)&(adm3_iraq_data.ACTION_IND.isna())),0,adm3_iraq_data.ACTION_IND)
adm3_iraq_data['ACTION_DIR'] = np.where(((adm3_iraq_data['date']>=min_xsub)&(adm3_iraq_data['date']<=max_xsub)&(adm3_iraq_data.ACTION_DIR.isna())),0,adm3_iraq_data.ACTION_DIR)
adm3_iraq_data['ACTION_PRT'] = np.where(((adm3_iraq_data['date']>=min_xsub)&(adm3_iraq_data['date']<=max_xsub)&(adm3_iraq_data.ACTION_PRT.isna())),0,adm3_iraq_data.ACTION_PRT)
adm3_iraq_data.head()

Unnamed: 0,ADM3_EN,date,mean_evi,mean_evi_scaled,max_evi,max_evi_scaled,ACTION_IND,ACTION_DIR,ACTION_PRT
0,Abi Gharaq,2001-01-31,2886.37349,0.288637,5416.0,0.5416,0.0,0.0,0.0
1,Abi Gharaq,2001-02-28,3094.206464,0.309421,6291.0,0.6291,0.0,0.0,0.0
2,Abi Gharaq,2001-03-31,3275.809481,0.327581,6513.0,0.6513,0.0,0.0,0.0
3,Abi Gharaq,2001-04-30,2596.503366,0.25965,5332.0,0.5332,0.0,0.0,0.0
4,Abi Gharaq,2001-05-31,2161.217474,0.216122,5147.0,0.5147,0.0,0.0,0.0


In [304]:
# no duplicates at this point
adm3_iraq_data[adm3_iraq_data.duplicated(subset=['date','ADM3_EN'],keep=False)]

Unnamed: 0,ADM3_EN,date,mean_evi,mean_evi_scaled,max_evi,max_evi_scaled,ACTION_IND,ACTION_DIR,ACTION_PRT


In [305]:
# create 5-year interval fields to join with population data
adm3_iraq_data['year'] = adm3_iraq_data['date'].dt.year
adm3_iraq_data['year_5yr'] = np.where(
                               adm3_iraq_data['year'] < 2005, 2000,
                               np.where(adm3_iraq_data['year'] < 2010, 2005,
                                np.where(adm3_iraq_data['year'] < 2015, 2010,
                                np.where(adm3_iraq_data['year'] < 2020, 2015,
                                np.where(adm3_iraq_data['year'] < 2025, 2020, 0)))))
adm3_iraq_data.head()

Unnamed: 0,ADM3_EN,date,mean_evi,mean_evi_scaled,max_evi,max_evi_scaled,ACTION_IND,ACTION_DIR,ACTION_PRT,year,year_5yr
0,Abi Gharaq,2001-01-31,2886.37349,0.288637,5416.0,0.5416,0.0,0.0,0.0,2001,2000
1,Abi Gharaq,2001-02-28,3094.206464,0.309421,6291.0,0.6291,0.0,0.0,0.0,2001,2000
2,Abi Gharaq,2001-03-31,3275.809481,0.327581,6513.0,0.6513,0.0,0.0,0.0,2001,2000
3,Abi Gharaq,2001-04-30,2596.503366,0.25965,5332.0,0.5332,0.0,0.0,0.0,2001,2000
4,Abi Gharaq,2001-05-31,2161.217474,0.216122,5147.0,0.5147,0.0,0.0,0.0,2001,2000


In [306]:
# no duplicates at this point
adm3_iraq_data[adm3_iraq_data.duplicated(subset=['date','ADM3_EN'],keep=False)]

Unnamed: 0,ADM3_EN,date,mean_evi,mean_evi_scaled,max_evi,max_evi_scaled,ACTION_IND,ACTION_DIR,ACTION_PRT,year,year_5yr


In [307]:
# join population data by ADM3_PCODE and 5-year interval
pop_df_join = pop_df_full.drop(columns=['Shape_Area'])
print(adm3_iraq_data.shape)
adm3_iraq_data = adm3_iraq_data.merge(pop_df_join, how='outer',left_on=['ADM3_EN','year_5yr'],
                                      right_on=['ADM3_EN','year'])
adm3_iraq_data = adm3_iraq_data.drop(columns=['year_x','year_y','year_5yr'])
print(pop_df_join.shape)
print(adm3_iraq_data.shape)

(77616, 11)
(1470, 4)
(77616, 11)


In [308]:
# no duplicates at this point
adm3_iraq_data[adm3_iraq_data.duplicated(subset=['date','ADM3_EN'],keep=False)]

Unnamed: 0,ADM3_EN,date,mean_evi,mean_evi_scaled,max_evi,max_evi_scaled,ACTION_IND,ACTION_DIR,ACTION_PRT,pop_count,pop_density


In [309]:
adm3_iraq_data.head()

Unnamed: 0,ADM3_EN,date,mean_evi,mean_evi_scaled,max_evi,max_evi_scaled,ACTION_IND,ACTION_DIR,ACTION_PRT,pop_count,pop_density
0,Abi Gharaq,2001-01-31,2886.37349,0.288637,5416.0,0.5416,0.0,0.0,0.0,137199.66,8709119.0
1,Abi Gharaq,2001-02-28,3094.206464,0.309421,6291.0,0.6291,0.0,0.0,0.0,137199.66,8709119.0
2,Abi Gharaq,2001-03-31,3275.809481,0.327581,6513.0,0.6513,0.0,0.0,0.0,137199.66,8709119.0
3,Abi Gharaq,2001-04-30,2596.503366,0.25965,5332.0,0.5332,0.0,0.0,0.0,137199.66,8709119.0
4,Abi Gharaq,2001-05-31,2161.217474,0.216122,5147.0,0.5147,0.0,0.0,0.0,137199.66,8709119.0


#### IOM Data Preparation

In [310]:
# format IOM data for joining
iom_data_filt = iom_data_filt.rename(columns={
                                    'Q1.10 IsLocationInDisputedArea':'disputed_area',
                                    'Q1.11 WasLocationOccupied':'loc_occupied',
                                    'Q1.11.1 RetakenPeriod':'retaken_period',
                                    'Q1.12 FirstMajorEthno-religiousPrior':'maj_ethrelig_prior',
                                    'Q1.13 FirstMajorEthno-religiousCurrent':'maj_ethrelig_curr'         
                                    })

In [311]:
# add flags for whether attacked, occupied, or no ISIL activity
iom_data_filt['iom_attacked'] = np.where(iom_data_filt['loc_occupied']=='Yes, attacked', 1, 0)
iom_data_filt['iom_occupied'] = np.where(iom_data_filt['loc_occupied']=='Yes, occupied', 1, 0)
iom_data_filt['iom_no_isil_action'] = np.where(iom_data_filt['loc_occupied']=='No', 1, 0)
# add flags for whether in ISIL religious-ethnic group prior to crisis and currently
iom_data_filt['isil_ingroup_prior'] = np.where(iom_data_filt['maj_ethrelig_prior']=='Arab Sunni Muslim', 1, 0)
iom_data_filt['isil_ingroup_curr'] = np.where(iom_data_filt['maj_ethrelig_curr']=='Arab Sunni Muslim', 1, 0)
# convert disputed area answers to 1,0 flags
iom_data_filt['disputed_area'] = np.where(iom_data_filt['disputed_area']=='Yes', 1, 0)

In [312]:
# add flag for whether retaken pre- or post-2016 as indicator for long or short-term occupation
iom_data_filt['retaken_year'] = iom_data_filt['retaken_period'].str.extract("(\d\d\d\d)")
iom_data_filt['retaken_year'] = iom_data_filt['retaken_year'].astype('float')
iom_data_filt['retaken_post_2016'] = np.where(iom_data_filt['retaken_year'].isna(),iom_data_filt['retaken_year'],
                                              np.where(iom_data_filt['retaken_year']>=2017,1,0))

In [313]:
# drop unnecessary columns
iom_data_filt = iom_data_filt.drop(columns=['loc_occupied','retaken_period','maj_ethrelig_prior',
                                            'maj_ethrelig_curr','DateofInterview'])

In [314]:
iom_adm3_data = iom_data_filt.merge(iom_adm3_map.loc[:,('LocationID','ADM3_EN')],how='left',
                                    on='LocationID')
print(iom_data_filt.shape)
print(iom_adm3_data.shape)

(3717, 9)
(3717, 10)


In [315]:
# to aggregate, want to take the max of all fields except for iom_no_isil_action, which we want to take the
# min of - if any part of the adm3 area had isil action, we don't want to flag it as not having any action
iom_adm3_data_for_min = iom_adm3_data.loc[:,('ADM3_EN','iom_no_isil_action')]
iom_adm3_data_for_max = iom_adm3_data.drop(columns=['LocationID','iom_no_isil_action'])

In [316]:
iom_adm3_data_max_gb = iom_adm3_data_for_max.groupby(['ADM3_EN']).max().reset_index()
iom_adm3_data_min_gb = iom_adm3_data_for_min.groupby(['ADM3_EN']).min().reset_index()

In [317]:
iom_adm3_data_fin = iom_adm3_data_max_gb.merge(iom_adm3_data_min_gb,how='outer',
                                               on='ADM3_EN')
print(iom_adm3_data_max_gb.shape)
print(iom_adm3_data_min_gb.shape)
print(iom_adm3_data_fin.shape)

(215, 8)
(215, 2)
(215, 9)


In [318]:
# add IOM data to full dataset
print(adm3_iraq_data.shape)
adm3_iraq_data = adm3_iraq_data.merge(iom_adm3_data_fin, how='outer',
                                      on='ADM3_EN')
print(adm3_iraq_data.shape)

(77616, 11)
(77616, 19)


In [319]:
# identify number of Adm3 areas with no IOM data
len(adm3_iraq_data[adm3_iraq_data['iom_occupied'].isna()].ADM3_EN.unique())

79

In [320]:
adm3_iraq_data[adm3_iraq_data.duplicated(subset=['ADM3_EN','date'],keep=False)]

Unnamed: 0,ADM3_EN,date,mean_evi,mean_evi_scaled,max_evi,max_evi_scaled,ACTION_IND,ACTION_DIR,ACTION_PRT,pop_count,pop_density,disputed_area,iom_attacked,iom_occupied,isil_ingroup_prior,isil_ingroup_curr,retaken_year,retaken_post_2016,iom_no_isil_action


In [321]:
# add in GeoEPR data
adm3_sunni_eth = adm3_sunni_eth.drop(columns=['ADM3_PCODE']) 
adm3_iraq_data = adm3_iraq_data.merge(adm3_sunni_eth, on=['ADM3_EN'],how='left')
print(adm3_iraq_data.shape)
adm3_iraq_data[adm3_iraq_data.duplicated(subset=['ADM3_EN','date'],keep=False)]

(77616, 22)


Unnamed: 0,ADM3_EN,date,mean_evi,mean_evi_scaled,max_evi,max_evi_scaled,ACTION_IND,ACTION_DIR,ACTION_PRT,pop_count,...,iom_attacked,iom_occupied,isil_ingroup_prior,isil_ingroup_curr,retaken_year,retaken_post_2016,iom_no_isil_action,sunni_dom,sunni_mix,no_sunni


In [322]:
adm3_iraq_data.head(2)

Unnamed: 0,ADM3_EN,date,mean_evi,mean_evi_scaled,max_evi,max_evi_scaled,ACTION_IND,ACTION_DIR,ACTION_PRT,pop_count,...,iom_attacked,iom_occupied,isil_ingroup_prior,isil_ingroup_curr,retaken_year,retaken_post_2016,iom_no_isil_action,sunni_dom,sunni_mix,no_sunni
0,Abi Gharaq,2001-01-31,2886.37349,0.288637,5416.0,0.5416,0.0,0.0,0.0,137199.66,...,0.0,0.0,0.0,0.0,,,1.0,0,0,1
1,Abi Gharaq,2001-02-28,3094.206464,0.309421,6291.0,0.6291,0.0,0.0,0.0,137199.66,...,0.0,0.0,0.0,0.0,,,1.0,0,0,1


In [323]:
# attach more precise occupation dates
print(adm3_iraq_data.shape)
adm3_iraq_data = adm3_iraq_data.merge(terr_control_full.drop(columns=['iom_occupied']),
                                           left_on='ADM3_EN',right_on='adm3_en',how='left')
print(adm3_iraq_data.shape)

(77616, 22)
(77616, 26)


In [324]:
adm3_iraq_data.head()

Unnamed: 0,ADM3_EN,date,mean_evi,mean_evi_scaled,max_evi,max_evi_scaled,ACTION_IND,ACTION_DIR,ACTION_PRT,pop_count,...,retaken_year,retaken_post_2016,iom_no_isil_action,sunni_dom,sunni_mix,no_sunni,adm3_en,occ_start_month,occ_end_month,occ_length_mon
0,Abi Gharaq,2001-01-31,2886.37349,0.288637,5416.0,0.5416,0.0,0.0,0.0,137199.66,...,,,1.0,0,0,1,,,,
1,Abi Gharaq,2001-02-28,3094.206464,0.309421,6291.0,0.6291,0.0,0.0,0.0,137199.66,...,,,1.0,0,0,1,,,,
2,Abi Gharaq,2001-03-31,3275.809481,0.327581,6513.0,0.6513,0.0,0.0,0.0,137199.66,...,,,1.0,0,0,1,,,,
3,Abi Gharaq,2001-04-30,2596.503366,0.25965,5332.0,0.5332,0.0,0.0,0.0,137199.66,...,,,1.0,0,0,1,,,,
4,Abi Gharaq,2001-05-31,2161.217474,0.216122,5147.0,0.5147,0.0,0.0,0.0,137199.66,...,,,1.0,0,0,1,,,,


In [326]:
ucdp_conflict[(ucdp_conflict['ADM3_EN']=='Abu Dalf')&(ucdp_conflict['month']<='2012-06-30')]

Unnamed: 0,ADM3_EN,where_prec,isis_conflict,month,num_events
4,Abu Dalf,1,1,2008-09-30,1
9,Abu Dalf,2,1,2008-02-29,1
10,Abu Dalf,2,1,2011-02-28,1
12,Abu Dalf,4,1,2006-05-31,1
13,Abu Dalf,4,1,2007-07-31,1
14,Abu Dalf,4,1,2007-10-31,2
15,Abu Dalf,4,1,2008-01-31,3
16,Abu Dalf,4,1,2008-06-30,1
17,Abu Dalf,4,1,2008-07-31,1
18,Abu Dalf,4,1,2008-09-30,1


In [327]:
# sum events at the admin 3 level of precision or better (where_prec == 1 or 2)
print(ucdp_conflict.shape)
ucdp_conflict_precadm3 = ucdp_conflict[ucdp_conflict['where_prec'].isin([1,2])]
print(ucdp_conflict_precadm3.shape)
ucdp_conflict_precadm3 = ucdp_conflict_precadm3.groupby(['ADM3_EN','isis_conflict','month'])['num_events'].sum().reset_index()
ucdp_conflict_precadm3 = ucdp_conflict_precadm3.pivot(index=['ADM3_EN','month'],columns='isis_conflict',values='num_events').reset_index()
ucdp_conflict_precadm3 = ucdp_conflict_precadm3.rename(columns={1:'ucdp_isis_events_adm3prec',
                                                                0:'ucdp_nonisis_events_adm3prec'})
ucdp_conflict_precadm3 = ucdp_conflict_precadm3.fillna(value={'ucdp_isis_events_adm3prec':0,
                                                               'ucdp_nonisis_events_adm3prec':0})
ucdp_conflict_precadm3['ucdp_all_events_adm3prec'] = ucdp_conflict_precadm3['ucdp_isis_events_adm3prec'] + ucdp_conflict_precadm3['ucdp_nonisis_events_adm3prec']
ucdp_conflict_precadm3.head()

(11347, 5)
(3238, 5)


isis_conflict,ADM3_EN,month,ucdp_nonisis_events_adm3prec,ucdp_isis_events_adm3prec,ucdp_all_events_adm3prec
0,Abu Dalf,2008-02-29,0.0,1.0,1.0
1,Abu Dalf,2008-09-30,0.0,1.0,1.0
2,Abu Dalf,2011-02-28,0.0,1.0,1.0
3,Abu Dalf,2013-06-30,0.0,1.0,1.0
4,Abu Dalf,2015-01-31,0.0,1.0,1.0


In [328]:
# sum events at the admin 2 level of precision or better (where_prec == 1, 2, or 3)
print(ucdp_conflict.shape)
ucdp_conflict_precadm2 = ucdp_conflict[ucdp_conflict['where_prec'].isin([1,2,3])]
print(ucdp_conflict_precadm2.shape)
ucdp_conflict_precadm2 = ucdp_conflict_precadm2.groupby(['ADM3_EN','isis_conflict','month'])['num_events'].sum().reset_index()
ucdp_conflict_precadm2 = ucdp_conflict_precadm2.pivot(index=['ADM3_EN','month'],columns='isis_conflict',values='num_events').reset_index()
ucdp_conflict_precadm2 = ucdp_conflict_precadm2.rename(columns={1:'ucdp_isis_events_adm2prec',
                                                                0:'ucdp_nonisis_events_adm2prec'})
ucdp_conflict_precadm2 = ucdp_conflict_precadm2.fillna(value={'ucdp_isis_events_adm2prec':0,
                                                               'ucdp_nonisis_events_adm2prec':0})
ucdp_conflict_precadm2['ucdp_all_events_adm2prec'] = ucdp_conflict_precadm2['ucdp_isis_events_adm2prec'] + ucdp_conflict_precadm2['ucdp_nonisis_events_adm2prec']
ucdp_conflict_precadm2.head()

(11347, 5)
(4035, 5)


isis_conflict,ADM3_EN,month,ucdp_nonisis_events_adm2prec,ucdp_isis_events_adm2prec,ucdp_all_events_adm2prec
0,Abu Dalf,2008-02-29,0.0,1.0,1.0
1,Abu Dalf,2008-09-30,0.0,1.0,1.0
2,Abu Dalf,2011-02-28,0.0,1.0,1.0
3,Abu Dalf,2013-06-30,0.0,1.0,1.0
4,Abu Dalf,2014-06-30,0.0,1.0,1.0


In [329]:
print(ucdp_conflict.shape)
ucdp_conflict_precadm1 = ucdp_conflict[ucdp_conflict['where_prec'].isin([1,2,3,4])]
print(ucdp_conflict_precadm1.shape)
ucdp_conflict_precadm1 = ucdp_conflict_precadm1.groupby(['ADM3_EN','isis_conflict','month'])['num_events'].sum().reset_index()

ucdp_conflict_precadm1 = ucdp_conflict_precadm1.pivot(index=['ADM3_EN','month'],columns='isis_conflict',values='num_events').reset_index()
ucdp_conflict_precadm1 = ucdp_conflict_precadm1.rename(columns={1:'ucdp_isis_events_adm1prec',
                                                                0:'ucdp_nonisis_events_adm1prec'})
ucdp_conflict_precadm1 = ucdp_conflict_precadm1.fillna(value={'ucdp_isis_events_adm1prec':0,
                                                               'ucdp_nonisis_events_adm1prec':0})
ucdp_conflict_precadm1['ucdp_all_events_adm1prec'] = ucdp_conflict_precadm1['ucdp_isis_events_adm1prec'] + ucdp_conflict_precadm1['ucdp_nonisis_events_adm1prec']
print(ucdp_conflict_precadm1.shape)

ucdp_conflict_precadm1.head()

(11347, 5)
(11347, 5)
(9696, 5)


isis_conflict,ADM3_EN,month,ucdp_nonisis_events_adm1prec,ucdp_isis_events_adm1prec,ucdp_all_events_adm1prec
0,Abi Gharaq,2013-06-30,0.0,2.0,2.0
1,Abi Gharaq,2013-11-30,0.0,1.0,1.0
2,Abi Gharaq,2014-06-30,0.0,1.0,1.0
3,Abi Gharaq,2014-07-31,0.0,1.0,1.0
4,Abu Dalf,2006-05-31,0.0,1.0,1.0


In [330]:
ucdp_conflict_full = pd.merge(ucdp_conflict_precadm1,ucdp_conflict_precadm2,on=['ADM3_EN','month'],
                              how='outer')
ucdp_conflict_full = pd.merge(ucdp_conflict_full,ucdp_conflict_precadm3,on=['ADM3_EN','month'],
                              how='outer')
ucdp_conflict_full = ucdp_conflict_full.fillna(0)
print(ucdp_conflict_full.shape)

(9696, 11)


In [331]:
# test that logic holds, no more adm3-precision events than adm 2, than admin 1
ucdp_conflict_full[ucdp_conflict_full['ucdp_all_events_adm2prec'] < ucdp_conflict_full['ucdp_all_events_adm3prec'] ]

isis_conflict,ADM3_EN,month,ucdp_nonisis_events_adm1prec,ucdp_isis_events_adm1prec,ucdp_all_events_adm1prec,ucdp_nonisis_events_adm2prec,ucdp_isis_events_adm2prec,ucdp_all_events_adm2prec,ucdp_nonisis_events_adm3prec,ucdp_isis_events_adm3prec,ucdp_all_events_adm3prec


In [332]:
# test that logic holds, fewer isis events less than overall events
ucdp_conflict_full[ucdp_conflict_full['ucdp_isis_events_adm3prec'] > ucdp_conflict_full['ucdp_all_events_adm3prec'] ]

isis_conflict,ADM3_EN,month,ucdp_nonisis_events_adm1prec,ucdp_isis_events_adm1prec,ucdp_all_events_adm1prec,ucdp_nonisis_events_adm2prec,ucdp_isis_events_adm2prec,ucdp_all_events_adm2prec,ucdp_nonisis_events_adm3prec,ucdp_isis_events_adm3prec,ucdp_all_events_adm3prec


In [333]:
# store UCDP conflict data at month-adm3 level
ucdp_conflict_full_file = source_dir + '/data/conflict/ucdp_conflict_month_adm3_level.csv'

ucdp_conflict_full.to_csv(ucdp_conflict_full_file,index=False)

In [334]:
# add in UCDP conflict data
print(adm3_iraq_data.shape)
ucdp_conflict_full['month'] = pd.to_datetime(ucdp_conflict_full['month'])
adm3_iraq_data = adm3_iraq_data.merge(ucdp_conflict_full,
                                            left_on=['ADM3_EN','date'],
                                           right_on=['ADM3_EN','month'],
                                           how='left')
adm3_iraq_data = adm3_iraq_data.drop(columns=['month','adm3_en'])
print(adm3_iraq_data.shape)

(77616, 26)
(77616, 34)


In [335]:
# attach Adm3 geometries and parent areas to final dataset
iraq_shp_adm3_filt = iraq_shp_adm3.loc[:,('Shape_Area', 'ADM3_EN', 'ADM3_AR', 'ADM3_PCODE',
       'ADM2_EN', 'ADM2_AR', 'ADM2_PCODE', 'ADM1_EN', 'ADM1_AR', 'ADM1_PCODE',
       'geometry')]
print(adm3_iraq_data.shape)
adm3_iraq_data_fin = adm3_iraq_data.merge(iraq_shp_adm3_filt, how='left',
                                          on='ADM3_EN')
adm3_iraq_data_fin = adm3_iraq_data_fin.rename(columns={'date':'month'})
print(adm3_iraq_data_fin.shape)

(77616, 34)
(77616, 44)


In [336]:
adm3_iraq_data_fin[adm3_iraq_data_fin.duplicated(subset=['month','ADM3_EN'],keep=False)]

Unnamed: 0,ADM3_EN,month,mean_evi,mean_evi_scaled,max_evi,max_evi_scaled,ACTION_IND,ACTION_DIR,ACTION_PRT,pop_count,...,Shape_Area,ADM3_AR,ADM3_PCODE,ADM2_EN,ADM2_AR,ADM2_PCODE,ADM1_EN,ADM1_AR,ADM1_PCODE,geometry


In [337]:
adm3_iraq_data_fin.columns

Index(['ADM3_EN', 'month', 'mean_evi', 'mean_evi_scaled', 'max_evi',
       'max_evi_scaled', 'ACTION_IND', 'ACTION_DIR', 'ACTION_PRT', 'pop_count',
       'pop_density', 'disputed_area', 'iom_attacked', 'iom_occupied',
       'isil_ingroup_prior', 'isil_ingroup_curr', 'retaken_year',
       'retaken_post_2016', 'iom_no_isil_action', 'sunni_dom', 'sunni_mix',
       'no_sunni', 'occ_start_month', 'occ_end_month', 'occ_length_mon',
       'ucdp_nonisis_events_adm1prec', 'ucdp_isis_events_adm1prec',
       'ucdp_all_events_adm1prec', 'ucdp_nonisis_events_adm2prec',
       'ucdp_isis_events_adm2prec', 'ucdp_all_events_adm2prec',
       'ucdp_nonisis_events_adm3prec', 'ucdp_isis_events_adm3prec',
       'ucdp_all_events_adm3prec', 'Shape_Area', 'ADM3_AR', 'ADM3_PCODE',
       'ADM2_EN', 'ADM2_AR', 'ADM2_PCODE', 'ADM1_EN', 'ADM1_AR', 'ADM1_PCODE',
       'geometry'],
      dtype='object')

In [338]:
monthly_adm3_geo_file = source_dir + '/data/combined/monthly_adm3_data.geojson'
adm3_iraq_data_fin_gpd = gpd.GeoDataFrame(adm3_iraq_data_fin,geometry=adm3_iraq_data_fin['geometry'])
adm3_iraq_data_fin_gpd.to_file(monthly_adm3_geo_file,driver='GeoJSON')

In [339]:
monthly_adm3_file = source_dir + '/data/combined/monthly_adm3_data.csv'
adm3_iraq_data_fin.drop(columns=['geometry']).to_csv(monthly_adm3_file,index=False,
                                                     encoding='utf-8-sig')