In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import xarray as xr
import rioxarray as rxr
import rasterio as rio
import rasterstats as rstats
import regionmask
import os
import re

In [3]:
source_dir = '/mnt/c/Users/natra/Documents/Research/Iraq-post-conflict-rebel-governance'
iom_adm3_mapping = source_dir + '/data/iom/iom_adm3_mapping.csv'
iom_data_file = source_dir + '/data/iom/iom_raw.xlsx'
ag_indices_file = source_dir + '/data/satellite_indices/evi/evi_mask_mean_max_min/evi_stats_Jan2001-Dec2022.csv'

In [4]:
iom_adm3_map = pd.read_csv(iom_adm3_mapping)
iom_data = pd.read_excel(iom_data_file, sheet_name="Full Dataset",header=1)
ag_data = pd.read_csv(ag_indices_file)

## UCDP Conflict Events

In [5]:
ucdp_iraq_file = source_dir + '/data/conflict/gedevents-Iraq-2024-04-12.csv'
ucdp_conflict = pd.read_csv(ucdp_iraq_file)
print(ucdp_conflict.columns)
print(ucdp_conflict.shape)
ucdp_conflict.head(2)

Index(['id', 'relid', 'year', 'active_year', 'code_status', 'type_of_violence',
       'conflict_dset_id', 'conflict_new_id', 'conflict_name', 'dyad_dset_id',
       'dyad_new_id', 'dyad_name', 'side_a_dset_id', 'side_a_new_id', 'side_a',
       'side_b_dset_id', 'side_b_new_id', 'side_b', 'number_of_sources',
       'source_article', 'source_office', 'source_date', 'source_headline',
       'source_original', 'where_prec', 'where_coordinates',
       'where_description', 'adm_1', 'adm_2', 'latitude', 'longitude',
       'geom_wkt', 'priogrid_gid', 'country', 'country_id', 'region',
       'event_clarity', 'date_prec', 'date_start', 'date_end', 'deaths_a',
       'deaths_b', 'deaths_civilians', 'deaths_unknown', 'best_est',
       'high_est', 'low_est'],
      dtype='object')
(8713, 47)


Unnamed: 0,id,relid,year,active_year,code_status,type_of_violence,conflict_dset_id,conflict_new_id,conflict_name,dyad_dset_id,...,date_prec,date_start,date_end,deaths_a,deaths_b,deaths_civilians,deaths_unknown,best_est,high_est,low_est
0,121766,IRQ-2003-1-332-21,2003,True,Clear,1,420,420,"Australia, United Kingdom, United States - Iraq",883,...,1,3/24/2003 0:00,3/24/2003 0:00,0,0,0,0,0,62,0
1,121767,IRQ-2003-1-332-23,2003,True,Clear,1,420,420,"Australia, United Kingdom, United States - Iraq",883,...,1,3/25/2003 0:00,3/25/2003 0:00,0,4,0,0,4,4,4


In [20]:
ucdp_conflict_events = ucdp_conflict.loc[:,('id', 'relid', 'year','type_of_violence',
                                            'conflict_new_id', 'conflict_name',
                                            'side_a_new_id', 'side_a',
                                            'side_b_new_id', 'side_b', 'number_of_sources',
                                            'where_prec', 'where_coordinates','where_description', 
                                             'latitude', 'longitude',
                                            'event_clarity', 'date_prec', 'date_start', 'date_end')]
ucdp_conflict_events = ucdp_conflict_events[ucdp_conflict_events['year']>= 2000]
ucdp_conflict_events.head(2)

Unnamed: 0,id,relid,year,type_of_violence,conflict_new_id,conflict_name,side_a_new_id,side_a,side_b_new_id,side_b,number_of_sources,where_prec,where_coordinates,where_description,latitude,longitude,event_clarity,date_prec,date_start,date_end
0,121766,IRQ-2003-1-332-21,2003,1,420,"Australia, United Kingdom, United States - Iraq",3822,"Government of Australia, Government of United ...",116,Government of Iraq,-1,6,Iraq,Iraq,33.0,44.0,1,1,3/24/2003 0:00,3/24/2003 0:00
1,121767,IRQ-2003-1-332-23,2003,1,420,"Australia, United Kingdom, United States - Iraq",3822,"Government of Australia, Government of United ...",116,Government of Iraq,-1,5,Euphrates river,Euphrates River,33.083333,43.483333,1,1,3/25/2003 0:00,3/25/2003 0:00


In [25]:
ucdp_isis_events = ucdp_conflict_events[(ucdp_conflict_events['side_a']=='IS')|(ucdp_conflict_events['side_b']=='IS')]
ucdp_conflict_events['isis_conflict'] = np.where(((ucdp_conflict_events['side_a']=='IS')|(ucdp_conflict_events['side_b']=='IS')),
                                                 1,0)
print(ucdp_conflict_events[ucdp_conflict_events['isis_conflict']==1].shape)
ucdp_conflict_events['date_start'] = pd.to_datetime(ucdp_conflict_events['date_start'])
ucdp_conflict_events['date_end'] = pd.to_datetime(ucdp_conflict_events['date_end'])
ucdp_conflict_events['geometry'] = gpd.points_from_xy(ucdp_conflict_events.longitude,ucdp_conflict_events.latitude)
ucdp_conflict_events = gpd.GeoDataFrame(ucdp_conflict_events, geometry=ucdp_conflict_events['geometry'],
                                        crs='epsg:4326')
ucdp_isis_events.head(2)

(6813, 22)


Unnamed: 0,id,relid,year,type_of_violence,conflict_new_id,conflict_name,side_a_new_id,side_a,side_b_new_id,side_b,...,where_coordinates,where_description,latitude,longitude,event_clarity,date_prec,date_start,date_end,isis_conflict,geometry
9,131191,IRQ-2006-1-448-233,2006,1,259,Iraq: Government,116,Government of Iraq,234,IS,...,Mosul town,Mosul town,36.335,43.118889,1,1,2006-08-17,2006-08-17,1,POINT (43.11889 36.33500)
10,131192,IRQ-2006-1-448-396,2006,1,259,Iraq: Government,116,Government of Iraq,234,IS,...,Al AnbÄr province,Anbar province,33.0,41.75,1,1,2006-12-24,2006-12-24,1,POINT (41.75000 33.00000)


In [26]:
# join ucdp to adm3 areas
iraq_shp_adm3_loc = source_dir + '/data/geo_boundaries/irq-administrative-divisions-shapefiles/irq_admbnda_adm3_cso_20190603.shp'
iraq_shp_adm3 = gpd.read_file(iraq_shp_adm3_loc)
iraq_adm3_filt = iraq_shp_adm3.loc[:,('ADM1_EN','ADM2_EN','ADM3_EN','geometry')]


In [27]:
print(ucdp_conflict_events.shape)
ucdp_adm3_geoms = gpd.sjoin(ucdp_conflict_events,iraq_adm3_filt,how='left',predicate='intersects')
print(ucdp_adm3_geoms.shape)

(8260, 22)
(8260, 26)


In [40]:
# based on event location precision, assign locations at adm 3 level with possibility to remove less precise
ucdp_adm3_geoms_wh12 = ucdp_adm3_geoms[ucdp_adm3_geoms['where_prec'].isin([1,2])]
ucdp_adm3_geoms_wh12 = ucdp_adm3_geoms_wh12.drop(columns=['ADM1_EN','ADM2_EN','index_right'])
print(ucdp_adm3_geoms_wh12.shape)
ucdp_adm3_geoms_wh3 = ucdp_adm3_geoms[ucdp_adm3_geoms['where_prec'] == 3]
print(ucdp_adm3_geoms_wh3.shape)
ucdp_adm3_geoms_wh4 = ucdp_adm3_geoms[ucdp_adm3_geoms['where_prec'] == 4]
print(ucdp_adm3_geoms_wh4.shape)


(6867, 23)
(278, 26)
(593, 26)


In [42]:
# join adm2 level with all adm3
iraq_adm3_filt_adm2 = iraq_adm3_filt.loc[:,('ADM2_EN','ADM3_EN')]
ucdp_adm3_geoms_wh3 = ucdp_adm3_geoms_wh3.drop(columns=['index_right','ADM1_EN','ADM3_EN'])
print(ucdp_adm3_geoms_wh3.shape)
ucdp_adm3_geoms_wh3_adm3 = ucdp_adm3_geoms_wh3.merge(iraq_adm3_filt_adm2,how='left',on='ADM2_EN')
ucdp_adm3_geoms_wh3_adm3 = ucdp_adm3_geoms_wh3_adm3.drop(columns=['ADM2_EN'])
print(ucdp_adm3_geoms_wh3_adm3.shape)

(278, 23)
(992, 23)


In [43]:
# join adm1 level with all adm3
iraq_adm3_filt_adm1 = iraq_adm3_filt.loc[:,('ADM1_EN','ADM3_EN')]
ucdp_adm3_geoms_wh4 = ucdp_adm3_geoms_wh4.drop(columns=['index_right','ADM2_EN','ADM3_EN'])
print(ucdp_adm3_geoms_wh4.shape)
ucdp_adm3_geoms_wh4_adm3 = ucdp_adm3_geoms_wh4.merge(iraq_adm3_filt_adm1,how='left',on='ADM1_EN')
ucdp_adm3_geoms_wh4_adm3 = ucdp_adm3_geoms_wh4_adm3.drop(columns=['ADM1_EN'])
print(ucdp_adm3_geoms_wh4_adm3.shape)

(593, 23)
(11973, 23)


In [45]:
# combine ucdp conflict events where location can be geocoded to at least adm 1
ucdp_conflict_locknown = pd.concat([ucdp_adm3_geoms_wh12,ucdp_adm3_geoms_wh3_adm3,
                                    ucdp_adm3_geoms_wh4_adm3])
ucdp_conflict_locknown = ucdp_conflict_locknown.drop(columns=['geometry','latitude','longitude',
                                                              'where_coordinates','date_start'])



In [53]:
ucdp_conflict_loc_monthly =  ucdp_conflict_locknown.set_index('date_end')
ucdp_conflict_loc_monthly = ucdp_conflict_loc_monthly.groupby(['ADM3_EN','where_prec','isis_conflict'])[['relid']].resample('ME').count()
ucdp_conflict_loc_monthly = ucdp_conflict_loc_monthly.reset_index()
ucdp_conflict_loc_monthly = ucdp_conflict_loc_monthly.rename(columns={'date_end':'month',
                                                                      'relid':'num_events'})
ucdp_conflict_loc_monthly = ucdp_conflict_loc_monthly[ucdp_conflict_loc_monthly['num_events']>0]
ucdp_conflict_loc_monthly.head()

Unnamed: 0,ADM3_EN,where_prec,isis_conflict,month,num_events
0,Abi Gharaq,4,1,2013-06-30,2
5,Abi Gharaq,4,1,2013-11-30,1
12,Abi Gharaq,4,1,2014-06-30,1
13,Abi Gharaq,4,1,2014-07-31,1
14,Abu Dalf,1,1,2008-09-30,1


In [57]:
ucdp_monthly_file = source_dir + '/data/conflict/ucdp_conflict_monthly.csv'
ucdp_conflict_loc_monthly.to_csv(ucdp_monthly_file,index=False)

In [9]:
ucdp_conflict_events['side_a'].unique()

array(['Government of Australia, Government of United Kingdom, Government of United States of America',
       'Ansar al-Islam', 'Government of Iraq', 'IS',
       'Government of Turkey', '1920 Revolution Brigades',
       'al-Mahdi Army', 'Government of Iran', 'Jaysh al-Mukhtar', 'KDP',
       'PKK', 'Government of Syria', 'AAH', 'Jamaat Jund al-Sahaba'],
      dtype=object)

In [10]:
ucdp_conflict_events['side_b'].unique()

array(['Government of Iraq', 'PUK', 'al-Mahdi Army', 'IS', 'Civilians',
       'PKK', 'Ansar al-Islam', 'RJF', 'KDPI', 'SCIRI', 'PJAK', 'MEK',
       'JRTN', 'Government of Israel', 'Syrian insurgents'], dtype=object)

## xSub Data - Monthly Conflict Events

In [21]:
xsub_conflict_file = source_dir + '/data/conflict/xSub_MELTT1km2dB_IRQ_event.csv'
xsub_conflict = pd.read_csv(xsub_conflict_file)
print(xsub_conflict.columns)
xsub_conflict.head(2)

Index(['SOURCE', 'CONFLICT', 'COWN', 'COWC', 'ISO3', 'DATE', 'LAT', 'LONG',
       'GEOPRECISION', 'TIMEPRECISION', 'INITIATOR_SIDEA', 'INITIATOR_SIDEB',
       'INITIATOR_SIDEC', 'INITIATOR_SIDED', 'TARGET_SIDEA', 'TARGET_SIDEB',
       'TARGET_SIDEC', 'TARGET_SIDED', 'DYAD_A_A', 'DYAD_A_B', 'DYAD_A_C',
       'DYAD_A_D', 'DYAD_B_A', 'DYAD_B_B', 'DYAD_B_C', 'DYAD_B_D', 'DYAD_C_A',
       'DYAD_C_B', 'DYAD_C_C', 'DYAD_C_D', 'DYAD_D_A', 'DYAD_D_B', 'DYAD_D_C',
       'DYAD_D_D', 'ACTION_ANY', 'ACTION_IND', 'ACTION_DIR', 'ACTION_PRT',
       'SIDEA_ANY', 'SIDEA_IND', 'SIDEA_DIR', 'SIDEA_PRT', 'SIDEB_ANY',
       'SIDEB_IND', 'SIDEB_DIR', 'SIDEB_PRT', 'SIDEC_ANY', 'SIDEC_IND',
       'SIDEC_DIR', 'SIDEC_PRT', 'SIDED_ANY', 'SIDED_IND', 'SIDED_DIR',
       'SIDED_PRT', 'ACTION_AIRSTRIKE', 'ACTION_AMBUSH', 'ACTION_ARMOR',
       'ACTION_ARREST', 'ACTION_ARTILLERY', 'ACTION_BOMB', 'ACTION_CHEMICAL',
       'ACTION_CIV_ABUSE', 'ACTION_DISPLACE', 'ACTION_FIREFIGHT',
       'ACTION_KIDNAP', 'ACT

Unnamed: 0,SOURCE,CONFLICT,COWN,COWC,ISO3,DATE,LAT,LONG,GEOPRECISION,TIMEPRECISION,...,ACTION_RAID,ACTION_RIOT,ACTION_RIOTCONTROL,ACTION_ROBBERY,ACTION_ROCKET,ACTION_SIEGE,ACTION_STORM,ACTION_SUICIDE,ACTION_TERROR,ACTION_UNKNOWN
0,MELTT1km2dB,Iraq,645,IRQ,IRQ,19910113,33.340582,44.400876,settlement,day,...,0,0,0,0,0,0,0,0,0,0
1,MELTT1km2dB,Iraq,645,IRQ,IRQ,19910117,33.340582,44.400876,settlement,day,...,0,0,0,0,0,0,0,0,0,0


In [23]:
xsub_conflict.loc[:,('DYAD_A_A', 'DYAD_A_B', 'DYAD_A_C',
       'DYAD_A_D', 'DYAD_B_A', 'DYAD_B_B', 'DYAD_B_C', 'DYAD_B_D', 'DYAD_C_A',
       'DYAD_C_B', 'DYAD_C_C', 'DYAD_C_D', 'DYAD_D_A', 'DYAD_D_B', 'DYAD_D_C',
       'DYAD_D_D')]

Unnamed: 0,DYAD_A_A,DYAD_A_B,DYAD_A_C,DYAD_A_D,DYAD_B_A,DYAD_B_B,DYAD_B_C,DYAD_B_D,DYAD_C_A,DYAD_C_B,DYAD_C_C,DYAD_C_D,DYAD_D_A,DYAD_D_B,DYAD_D_C,DYAD_D_D
0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0
1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0
2,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0
3,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0
4,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51862,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
51863,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
51864,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
51865,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [4]:
xsub_tojoin = xsub_conflict.loc[:,('DATE','LAT','LONG','ACTION_IND', 'ACTION_DIR', 'ACTION_PRT')]
xsub_tojoin['date_dt'] = pd.to_datetime(xsub_tojoin['DATE'],format='%Y%m%d')
print(xsub_tojoin['date_dt'].max())
print(xsub_tojoin['date_dt'].min())

2019-12-07 00:00:00
1991-01-13 00:00:00


In [20]:
xsub_tojoin = xsub_conflict.loc[:,('DATE','LAT','LONG','ACTION_IND', 'ACTION_DIR', 'ACTION_PRT')]
xsub_tojoin['date_dt'] = pd.to_datetime(xsub_tojoin['DATE'],format='%Y%m%d')
xsub_tojoin = xsub_tojoin[xsub_tojoin['date_dt']>='2001-01-01']
xsub_tojoin['geometry'] = gpd.points_from_xy(xsub_tojoin['LONG'],xsub_tojoin['LAT'])
xsub_tojoin = xsub_tojoin.drop(columns=['LAT','LONG','DATE'])
xsub_tojoin = gpd.GeoDataFrame(xsub_tojoin, geometry='geometry',crs='EPSG:4326')

xsub_tojoin.head()

Unnamed: 0,ACTION_IND,ACTION_DIR,ACTION_PRT,date_dt,geometry
175,0,0,0,2001-04-18,POINT (45.17786 34.27725)
176,0,0,0,2001-06-07,POINT (47.79747 30.53302)
177,0,0,0,2001-09-16,POINT (47.14399 31.83588)
178,1,0,0,2001-09-16,POINT (47.17528 31.84528)
179,0,0,0,2001-09-23,POINT (46.01103 35.26622)


In [21]:
# join xsub to adm3 areas
iraq_shp_adm3_loc = source_dir + '/data/geo_boundaries/irq-administrative-divisions-shapefiles/irq_admbnda_adm3_cso_20190603.shp'
iraq_shp_adm3 = gpd.read_file(iraq_shp_adm3_loc)
iraq_adm3_filt = iraq_shp_adm3.loc[:,('ADM3_EN','ADM3_PCODE','geometry')]

adm3_xsub = xsub_tojoin.sjoin(iraq_adm3_filt, how='left', predicate='intersects')
print(xsub_tojoin.shape)
print(adm3_xsub.shape)
print(adm3_xsub[adm3_xsub['ADM3_PCODE'].isna()].shape) 
adm3_xsub.head(2)

(51692, 5)
(51692, 8)
(166, 8)


Unnamed: 0,ACTION_IND,ACTION_DIR,ACTION_PRT,date_dt,geometry,index_right,ADM3_EN,ADM3_PCODE
175,0,0,0,2001-04-18,POINT (45.17786 34.27725),149.0,Jalawla,IQG10Q05N04
176,0,0,0,2001-06-07,POINT (47.79747 30.53302),170.0,Markaz Al-Basrah,IQG02Q02N02


In [23]:
# from examination of fields that don't join, it seems some are mis-identified geometrically (eg, longitude 4.3)
# others are very close / along the border of Iraq, which should be included, but some others are far outside,
# in Iran or elsewhere. Will set a max distance to avoid this 
adm3_xsub[adm3_xsub['ADM3_PCODE'].isna()].head(8)

Unnamed: 0,ACTION_IND,ACTION_DIR,ACTION_PRT,date_dt,geometry,index_right,ADM3_EN,ADM3_PCODE
23906,1,0,0,2009-03-08,POINT (4.39389 33.33861),,,
24054,1,0,0,2009-03-26,POINT (4.39389 33.33861),,,
24210,1,1,0,2009-04-16,POINT (46.58972 33.38444),,,
24694,1,0,0,2009-06-22,POINT (4.18500 33.30833),,,
24764,1,0,0,2009-07-01,POINT (4.39222 35.46806),,,
32116,1,0,0,2016-01-01,POINT (18.46667 33.42056),,,
32193,0,1,0,2016-01-04,POINT (20.76667 32.88722),,,
32239,0,1,0,2016-01-05,POINT (30.00000 36.08333),,,


In [24]:
xsub_unjoined = adm3_xsub[adm3_xsub['ADM3_PCODE'].isna()]
xsub_unjoined = xsub_unjoined.drop(columns=['index_right','ADM3_EN','ADM3_PCODE'])
xsub_unjoined = xsub_unjoined.to_crs('EPSG:3893')

iraq_adm3_newcrs = iraq_adm3_filt.to_crs('EPSG:3893')

adm3_xsub_nn = xsub_unjoined.sjoin_nearest(iraq_adm3_newcrs, how='left',max_distance=10000)
adm3_xsub_nn = adm3_xsub_nn.to_crs('EPSG:4326')
print(xsub_unjoined.shape)
print(adm3_xsub_nn.shape)
# still 132 records which did not join within 10km of the Iraq shapefile edges, so will not include these
print(adm3_xsub_nn[adm3_xsub_nn['ADM3_PCODE'].isna()].shape) 
adm3_xsub_nn.head(2)

(166, 5)
(166, 8)
(132, 8)


Unnamed: 0,ACTION_IND,ACTION_DIR,ACTION_PRT,date_dt,geometry,index_right,ADM3_EN,ADM3_PCODE
23906,1,0,0,2009-03-08,POINT (4.39389 33.33861),,,
24054,1,0,0,2009-03-26,POINT (4.39389 33.33861),,,


In [26]:
adm3_xsub_nn_notna = adm3_xsub_nn[~adm3_xsub_nn['ADM3_PCODE'].isna()]
adm3_xsub_notna = adm3_xsub[~adm3_xsub['ADM3_PCODE'].isna()]

adm3_xsub_fin  = pd.concat([adm3_xsub_notna,adm3_xsub_nn_notna])
adm3_xsub_fin = adm3_xsub_fin.drop(columns=['index_right','geometry'])
adm3_xsub_fin.head(3)

Unnamed: 0,ACTION_IND,ACTION_DIR,ACTION_PRT,date_dt,ADM3_EN,ADM3_PCODE
175,0,0,0,2001-04-18,Jalawla,IQG10Q05N04
176,0,0,0,2001-06-07,Markaz Al-Basrah,IQG02Q02N02
177,0,0,0,2001-09-16,Al-Msharah,IQG14Q03N02


In [37]:
adm3_xsub_monthly =  adm3_xsub_fin.set_index('date_dt')
adm3_xsub_monthly = adm3_xsub_monthly.groupby(['ADM3_EN','ADM3_PCODE'])[['ACTION_IND','ACTION_DIR','ACTION_PRT']].resample('ME').sum()
adm3_xsub_monthly = adm3_xsub_monthly.reset_index()
adm3_xsub_monthly = adm3_xsub_monthly.rename(columns={'date_dt':'month'})
adm3_xsub_monthly.head()

Unnamed: 0,ADM3_EN,ADM3_PCODE,month,ACTION_IND,ACTION_DIR,ACTION_PRT
0,Abi Gharaq,IQG07Q02N02,2017-12-31,1,1,0
1,Abi Gharaq,IQG07Q02N02,2018-01-31,0,0,0
2,Abi Gharaq,IQG07Q02N02,2018-02-28,0,0,0
3,Abi Gharaq,IQG07Q02N02,2018-03-31,0,0,0
4,Abi Gharaq,IQG07Q02N02,2018-04-30,0,0,0


In [38]:
adm3_xsub_file = source_dir + '/data/conflict/adm3_xsub_monthly.csv'
adm3_xsub_monthly.to_csv(adm3_xsub_file,index=False)

## GeoEPR

In [5]:
geoepr_file = source_dir + '/data/EPR/GeoEPR-2021.geojson'
geoepr_df = gpd.read_file(geoepr_file)

In [6]:
geoepr_iraq = geoepr_df[geoepr_df['statename']=='Iraq']
geoepr_iraq

Unnamed: 0,gwid,statename,from,to,group,groupid,gwgroupid,umbrella,sqkm,type,geometry
1086,645,Iraq,1946,2021,Shi'a Arabs,2000,64502000,,108150,Regional & urban,"MULTIPOLYGON (((45.38094 33.41709, 45.33382 33..."
1087,645,Iraq,1946,2021,Sunni Arabs,3000,64503000,,96757,Regional & urban,"MULTIPOLYGON (((47.09444 29.90708, 47.04776 29..."
1088,645,Iraq,1946,2021,Kurds,1000,64501000,,72420,Regional & urban,"MULTIPOLYGON (((43.91298 37.22659, 43.91298 37..."
1089,645,Iraq,1980,2021,Assyrians,4000,64504000,,1000,Regionally based,"MULTIPOLYGON (((43.16741 37.12906, 43.20782 37..."
1090,645,Iraq,1992,2021,Turkmen,5000,64505000,,2995,Regionally based,"MULTIPOLYGON (((44.85596 34.78855, 44.86303 34..."


In [7]:
iraq_shp_adm3_loc = source_dir + '/data/geo_boundaries/irq-administrative-divisions-shapefiles/irq_admbnda_adm3_cso_20190603.shp'
iraq_shp_adm3 = gpd.read_file(iraq_shp_adm3_loc)
iraq_adm3_filt = iraq_shp_adm3.loc[:,('ADM3_EN','ADM3_PCODE','geometry')]


In [20]:
epr_sunni = geoepr_iraq[geoepr_iraq['group']=='Sunni Arabs'].loc[:,('group','geometry')]
epr_non_sunni =geoepr_iraq[geoepr_iraq['group']!='Sunni Arabs'].loc[:,('group','geometry')]
epr_non_sunni = epr_non_sunni.dissolve()
epr_non_sunni['group'] = 'Non-Sunni Arabs'

In [36]:
iraq_adm3_filt_eth = iraq_adm3_filt.sjoin(epr_sunni,predicate='intersects',how='left')
iraq_adm3_filt_eth = iraq_adm3_filt_eth.drop(columns=['index_right'])
iraq_adm3_filt_eth = iraq_adm3_filt_eth.sjoin(epr_non_sunni, predicate='intersects',how='left')
iraq_adm3_filt_eth['sunni_dom'] = np.where((iraq_adm3_filt_eth['group_left']=='Sunni Arabs')&(iraq_adm3_filt_eth['group_right'].isna()),1,0)
iraq_adm3_filt_eth['sunni_mix'] = np.where((iraq_adm3_filt_eth['group_left']=='Sunni Arabs')&(iraq_adm3_filt_eth['group_right'] == 'Non-Sunni Arabs'),1,0)
iraq_adm3_filt_eth['no_sunni'] = np.where((iraq_adm3_filt_eth['group_left'].isna())&(iraq_adm3_filt_eth['group_right'] == 'Non-Sunni Arabs'),1,0)
iraq_adm3_filt_eth = iraq_adm3_filt_eth.drop(columns=['index_right','group_left','group_right','geometry'])
iraq_adm3_filt_eth.head()

Unnamed: 0,ADM3_EN,ADM3_PCODE,sunni_dom,sunni_mix,no_sunni
0,Abi Gharaq,IQG01Q05N01,0,0,1
1,Abu Dalf,IQG16Q01N02,1,0,0
2,Abu Ghraib,IQG08Q02N02,0,1,0
3,Agjalare,IQG06Q02N01,0,0,1
4,Akaika,IQG17Q05N04,0,0,1


In [32]:
# confirm no overlap in any fields
iraq_adm3_filt_eth[(iraq_adm3_filt_eth['sunni_dom']==1)&(iraq_adm3_filt_eth['sunni_mix']==1)]

Unnamed: 0,ADM3_EN,ADM3_PCODE,sunni_dom,sunni_mix,no_sunni


In [33]:
# confirm no overlap in any fields
iraq_adm3_filt_eth[(iraq_adm3_filt_eth['sunni_mix']==1)&(iraq_adm3_filt_eth['no_sunni']==1)]

Unnamed: 0,ADM3_EN,ADM3_PCODE,sunni_dom,sunni_mix,no_sunni


In [34]:
# confirm no overlap in any fields
iraq_adm3_filt_eth[(iraq_adm3_filt_eth['sunni_dom']==1)&(iraq_adm3_filt_eth['no_sunni']==1)]

Unnamed: 0,ADM3_EN,ADM3_PCODE,sunni_dom,sunni_mix,no_sunni


In [35]:
iraq_adm3_filt_eth_file = source_dir + '/data/EPR/adm3_sunni_epr.csv'
iraq_adm3_filt_eth.to_csv(iraq_adm3_filt_eth_file,index=False)