In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np

In [3]:
source_dir = '/mnt/c/Users/natra/Documents/Research/Iraq-post-conflict-rebel-governance'

In [3]:
iom_2022_file = source_dir + '/data/iom/2022_collapsed.csv'
iom_2022 = pd.read_csv(iom_2022_file)

In [4]:
iom_2022.head()

Unnamed: 0,LocationID,Governorate,District,Subdistrict,LocationType,PlaceType,LocationEn,isgov
0,2101002,Anbar,Al-Ka'im,Al-Obiadi,Peri-urban,Neighborhood,Al Aubaidi Al Qadima,1
1,2101033,Anbar,Al-Ka'im,Al-Obiadi,Peri-urban,Neighborhood,Tiwaan,1
2,2101032,Anbar,Al-Ka'im,Al-Obiadi,Rural,Village,Al Masharea,1
3,2101010,Anbar,Al-Ka'im,Al-Obiadi,Rural,Village,Al Zalla,1
4,2101034,Anbar,Al-Ka'im,Al-Obiadi,Rural,Village,Dghima village,1


In [5]:
print(len(iom_2022.Subdistrict.unique()))
print(len(iom_2022.District.unique()))
print(len(iom_2022.Governorate.unique()))

264
94
18


In [4]:
iraq_shp_adm3_loc = source_dir + '/data/geo_boundaries/irq-administrative-divisions-shapefiles/irq_admbnda_adm3_cso_20190603.shp'
iraq_shp_adm3 = gpd.read_file(iraq_shp_adm3_loc)

In [5]:
adm3_ar_places = iraq_shp_adm3.loc[:,('ADM3_AR')]

In [7]:
print(len(iraq_shp_adm3.ADM3_EN.unique()))
print(len(iraq_shp_adm3.ADM2_EN.unique()))
print(len(iraq_shp_adm3.ADM1_EN.unique()))

294
101
18


Governorates correspond to Adm 1 locations - 18 total

Districts - there are 105 in the overall IOM DTM data, but only 101 in Admin 2.

Subdistricts - correspond to Adm 3 locations.  

Location - according to IOM's definition, corresponds to the 4th official administrative division (https://iraqdtm.iom.int/MasterList#Methodology). However, the shapefiles for these adm 4 locations do not seem to be available (as described in the Metadata page for the Iraq Populated Places database OCHA created: https://data.humdata.org/dataset/iraq-populated-places-2021, where they say "If, in the future, administrative level 4 boundaries are obtained, these P-codes may need to be regenerated.")

## Spatial Joins

### Joining IOM Location IDs to Adm 3 Areas

In [5]:
iom_int_locs_file = source_dir + '/data/iom/iom_raw.xlsx'

iom_int_locs = pd.read_excel(iom_int_locs_file, sheet_name="Full Dataset",header=1)
iom_all_locs = iom_int_locs.loc[:,('LocationID','PlaceID','Sub-district','LocationType','PlaceType','Latitude','Longitude')]
# confirmed that these were unique
print(iom_all_locs.shape)
iom_all_locs.head(2)

(3717, 7)


Unnamed: 0,LocationID,PlaceID,Sub-district,LocationType,PlaceType,Latitude,Longitude
0,2101002,23647,Al-Obiadi,Peri-urban,Neighborhood,34.420203,41.201494
1,2101037,33778,Al-Obiadi,Urban,Neighborhood,34.433043,41.234143


In [6]:
# convert IOM lat/lon to point geometry
iom_all_locs['geometry'] = gpd.points_from_xy(iom_all_locs['Longitude'],iom_all_locs['Latitude'])
iom_all_locs = iom_all_locs.drop(columns=['Latitude','Longitude'])
iom_all_locs = gpd.GeoDataFrame(iom_all_locs, geometry='geometry',crs='EPSG:4326')
iom_all_locs.head()

Unnamed: 0,LocationID,PlaceID,Sub-district,LocationType,PlaceType,geometry
0,2101002,23647,Al-Obiadi,Peri-urban,Neighborhood,POINT (41.20149 34.42020)
1,2101037,33778,Al-Obiadi,Urban,Neighborhood,POINT (41.23414 34.43304)
2,2101006,90,Al-Obiadi,Urban,Neighborhood,POINT (41.22633 34.42921)
3,2101007,23797,Al-Obiadi,Urban,Neighborhood,POINT (41.22601 34.43465)
4,2101008,164,Al-Obiadi,Urban,Neighborhood,POINT (41.23290 34.43660)


In [8]:
# spatial join with adm3 areas
adm3_locs_join = iraq_shp_adm3.loc[:,('ADM3_PCODE','ADM3_EN','geometry')]

iom_adm3_df = adm3_locs_join.sjoin(iom_all_locs,how='left',predicate='intersects')
print(adm3_locs_join.shape)
print(iom_all_locs.shape)
print(iom_adm3_df.shape)
iom_adm3_df.head()

(294, 3)
(3717, 6)
(3793, 9)


Unnamed: 0,ADM3_PCODE,ADM3_EN,geometry,index_right,LocationID,PlaceID,Sub-district,LocationType,PlaceType
0,IQG01Q05N01,Abi Gharaq,"POLYGON ((44.36654 32.56190, 44.36466 32.55802...",392.0,2204143.0,23950.0,Markaz Al-Hilla,Urban,Neighborhood
0,IQG01Q05N01,Abi Gharaq,"POLYGON ((44.36654 32.56190, 44.36466 32.55802...",390.0,2204158.0,34275.0,Abu Gharaq,Rural,Village
1,IQG16Q01N02,Abu Dalf,"POLYGON ((44.19124 34.77808, 44.21397 34.75600...",2935.0,2801015.0,26116.0,Markaz Al-Daur,Urban,Neighborhood
1,IQG16Q01N02,Abu Dalf,"POLYGON ((44.19124 34.77808, 44.21397 34.75600...",2941.0,2801021.0,26077.0,Markaz Al-Daur,Urban,Neighborhood
1,IQG16Q01N02,Abu Dalf,"POLYGON ((44.19124 34.77808, 44.21397 34.75600...",2938.0,2801018.0,26078.0,Markaz Al-Daur,Urban,Neighborhood


In [9]:
print(iom_adm3_df[iom_adm3_df['LocationID'].isna()].shape)
print(iom_adm3_df[~iom_adm3_df['LocationID'].isna()].shape)

(79, 9)
(3714, 9)


In [10]:
# check for IOM locations that were missed
missed_iom_locs = iom_all_locs.LocationID.unique()[~np.isin(iom_all_locs.LocationID.unique(),iom_adm3_df['LocationID'].unique())]
iom_all_locs[iom_all_locs['LocationID'].isin(missed_iom_locs)]

Unnamed: 0,LocationID,PlaceID,Sub-district,LocationType,PlaceType,geometry
10,2101036,33466,Al-Rummaneh,Rural,Village,POINT (41.00614 34.42984)
891,1104016,23620,Rizgari,Rural,Village,POINT (42.37722 37.06755)
2718,2708032,25688,Rubiya,Peri-urban,Neighborhood,POINT (42.08371 36.80816)


These locations fall just outside of the bounds of the Adm 3 layers, from manual examination. Will use nearest neighbor matching to join them with Adm3 areas.

In [11]:
iom_nearest_neighbor = iom_all_locs[iom_all_locs['LocationID'].isin(missed_iom_locs)]
# reproject for accurate distance measuring
iom_nearest_neighbor = iom_nearest_neighbor.to_crs('EPSG:3893')
adm3_locs_join_newcrs = adm3_locs_join.to_crs('EPSG:3893')

iom_adm3_nn = adm3_locs_join_newcrs.sjoin_nearest(iom_nearest_neighbor,how='right')
iom_adm3_nn = iom_adm3_nn.to_crs('EPSG:4326')
print(adm3_locs_join.shape)
print(iom_nearest_neighbor.shape)
print(iom_adm3_nn.shape)
iom_adm3_nn.head()

(294, 3)
(3, 6)
(3, 9)


Unnamed: 0,index_left,ADM3_PCODE,ADM3_EN,LocationID,PlaceID,Sub-district,LocationType,PlaceType,geometry
10,81,IQG01Q02N03,Al-Rummaneh,2101036,33466,Al-Rummaneh,Rural,Village,POINT (41.00614 34.42984)
891,243,IQG09Q04N01,Markaz Zakho,1104016,23620,Rizgari,Rural,Village,POINT (42.37722 37.06755)
2718,263,IQG15Q08N04,Rabia,2708032,25688,Rubiya,Peri-urban,Neighborhood,POINT (42.08371 36.80816)


In [12]:
iom_adm3_df = pd.concat([iom_adm3_df,iom_adm3_nn])
print(iom_adm3_df.shape)
# confirm no more IOM locs missing
print(iom_all_locs.LocationID.unique()[~np.isin(iom_all_locs.LocationID.unique(),iom_adm3_df['LocationID'].unique())].shape)
# confirm 3717 adm3 locs
print(iom_adm3_df[~iom_adm3_df['LocationID'].isna()].shape)
iom_adm3_df.head(2)

(3796, 10)
(0,)
(3717, 10)


Unnamed: 0,ADM3_PCODE,ADM3_EN,geometry,index_right,LocationID,PlaceID,Sub-district,LocationType,PlaceType,index_left
0,IQG01Q05N01,Abi Gharaq,"POLYGON ((44.36654 32.56190, 44.36466 32.55802...",392.0,2204143.0,23950.0,Markaz Al-Hilla,Urban,Neighborhood,
0,IQG01Q05N01,Abi Gharaq,"POLYGON ((44.36654 32.56190, 44.36466 32.55802...",390.0,2204158.0,34275.0,Abu Gharaq,Rural,Village,


In [14]:
# see if any IOM locations assigned to multiple Adm 3 areas
mult_iom_locs = iom_adm3_df.loc[:,('ADM3_EN','LocationID')].groupby('LocationID').nunique().reset_index()
mult_iom_locs[mult_iom_locs['ADM3_EN']>1]

Unnamed: 0,LocationID,ADM3_EN


In [15]:
# see whether there are multiple places per locations - there aren't
test_loc_places = iom_adm3_df.loc[:,('LocationID','PlaceID')].groupby('LocationID').nunique().reset_index()
test_loc_places[test_loc_places['PlaceID']>1]

Unnamed: 0,LocationID,PlaceID


In [16]:
# see whether there are multiple locations per placeid - there aren't
test_loc_places = iom_adm3_df.loc[:,('LocationID','PlaceID')].groupby('PlaceID').nunique().reset_index()
test_loc_places[test_loc_places['LocationID']>1]

Unnamed: 0,PlaceID,LocationID


In [17]:
# create final mapping document from LocationIDs to Adm 3 areas
iom_adm3_mapping = source_dir + '/data/iom/iom_adm3_mapping.csv'
iom_adm3_df.loc[:,('LocationID','PlaceID','ADM3_PCODE','ADM3_EN')].to_csv(iom_adm3_mapping,index=False)

### Join to Adm 2 Areas

In [7]:
iraq_adm2_filt = iraq_shp_adm3.loc[:,('ADM2_EN','ADM2_PCODE','geometry')]
iraq_adm2_filt = iraq_adm2_filt.dissolve(by=['ADM2_EN','ADM2_PCODE'])
iraq_adm2_filt = iraq_adm2_filt.reset_index()
iraq_adm2_filt.shape

(101, 3)

In [8]:
iom_adm2_df = iraq_adm2_filt.sjoin(iom_all_locs,how='left',predicate='intersects')
print(iraq_adm2_filt.shape)
print(iom_all_locs.shape)
print(iom_adm2_df.shape)
iom_adm2_df.head()

(101, 3)
(3717, 6)
(3726, 9)


Unnamed: 0,ADM2_EN,ADM2_PCODE,geometry,index_right,LocationID,PlaceID,Sub-district,LocationType,PlaceType
0,Abu Al-Khaseeb,IQG02Q01,"POLYGON ((48.15221 30.43647, 48.16310 30.42845...",691.0,3101023.0,505.0,Markaz Abu al Khaseeb,Urban,Neighborhood
0,Abu Al-Khaseeb,IQG02Q01,"POLYGON ((48.15221 30.43647, 48.16310 30.42845...",683.0,3101003.0,25540.0,Markaz Abu al Khaseeb,Urban,Neighborhood
0,Abu Al-Khaseeb,IQG02Q01,"POLYGON ((48.15221 30.43647, 48.16310 30.42845...",687.0,3101015.0,809.0,Markaz Abu al Khaseeb,Peri-Urban,Neighborhood
0,Abu Al-Khaseeb,IQG02Q01,"POLYGON ((48.15221 30.43647, 48.16310 30.42845...",692.0,3101024.0,24158.0,Markaz Abu al Khaseeb,Urban,Neighborhood
0,Abu Al-Khaseeb,IQG02Q01,"POLYGON ((48.15221 30.43647, 48.16310 30.42845...",689.0,3101017.0,811.0,Markaz Abu al Khaseeb,Peri-Urban,Neighborhood


In [9]:
print(iom_adm2_df[iom_adm2_df['LocationID'].isna()].shape)
print(iom_adm2_df[~iom_adm2_df['LocationID'].isna()].shape)

(12, 9)
(3714, 9)


In [15]:
iom_relig = pd.read_excel(iom_int_locs_file, sheet_name="Q3.6.3 IDP EthnoReligious Group",header=1)
iom_relig = iom_relig.loc[:,('LocationID','PlaceID','District',"% of Arab Sunni Muslim","% of Arab Shia Muslim")]
iom_relig = iom_relig.rename(columns={"% of Arab Sunni Muslim": 'PercArabSunni',"% of Arab Shia Muslim":'PercArabShia'})
iom_relig = iom_relig.fillna(value=0)
print(iom_relig.shape)
iom_relig.head(2)

(1935, 5)


Unnamed: 0,LocationID,PlaceID,District,PercArabSunni,PercArabShia
0,2101008,164,Al-Ka'im,100.0,0.0
1,2101006,90,Al-Ka'im,100.0,0.0


In [21]:
iom_adm2_relig = iom_adm2_df.merge(iom_relig,on=['LocationID','PlaceID'])
iom_adm2_relig = iom_adm2_relig.drop(columns=['geometry','index_right','Sub-district'])
print(iom_adm2_relig.shape)

(1933, 9)


In [19]:
iom_adm2_relig.head(2)

Unnamed: 0,ADM2_EN,ADM2_PCODE,LocationID,PlaceID,LocationType,PlaceType,District,PercArabSunni,PercArabShia
0,Abu Al-Khaseeb,IQG02Q01,3101023.0,505.0,Urban,Neighborhood,Abu Al-Khaseeb,30.0,70.0
1,Abu Al-Khaseeb,IQG02Q01,3101003.0,25540.0,Urban,Neighborhood,Abu Al-Khaseeb,100.0,0.0


In [22]:
iom_adm2_relig_file = source_dir + '/data/iom/iom_adm2_relig.csv'

iom_adm2_relig.to_csv(iom_adm2_relig_file,index=False)

### Joining IOM Subdistricts to Adm 3 Areas - INCOMPLETE, not exact overlap

In [45]:
# another IOM file with Arabic names
#iom_int_locs_file = source_dir + '/data/iom/2021816711501_iom_DTM_Integrated_Location_Assessment_VI.xlsx'
iom_int_locs_file = source_dir + '/data/iom/iom_raw.xlsx'

iom_int_locs = pd.read_excel(iom_int_locs_file, sheet_name="Full Dataset",header=1)
# take only 2 lat/lons from each sub-district for the join
iom_locs_geo = iom_int_locs.loc[:,('Sub-district','Latitude','Longitude')].groupby('Sub-district').head(2)
print(iom_locs_geo.shape)
iom_locs_geo.head(2)

(490, 3)


Unnamed: 0,Sub-district,Latitude,Longitude
0,Al-Obiadi,34.420203,41.201494
1,Al-Obiadi,34.433043,41.234143


In [28]:
# confirm all subdistricts from the condensed data are in the iom_raw data
test_locids = iom_2022.merge(pd.DataFrame({'iom_subd':iom_locs_ar['Sub-district'].unique(),'locids':'Test'}), 
                    left_on='Subdistrict',right_on='iom_subd',how='left')
test_locids[test_locids['locids'].isna()]

Unnamed: 0,LocationID,Governorate,District,Subdistrict,LocationType,PlaceType,LocationEn,isgov,iom_subd,locids


In [46]:
# convert IOM lat/lon to point geometry
iom_locs_geo['geometry'] = gpd.points_from_xy(iom_locs_geo['Longitude'],iom_locs_geo['Latitude'])
iom_locs_geo = iom_locs_geo.drop(columns=['Latitude','Longitude'])
iom_locs_geo = gpd.GeoDataFrame(iom_locs_geo, geometry='geometry',crs='EPSG:4326')
iom_locs_geo.head()

Unnamed: 0,Sub-district,geometry
0,Al-Obiadi,POINT (41.20149 34.42020)
1,Al-Obiadi,POINT (41.23414 34.43304)
9,Al-Rummaneh,POINT (41.28825 34.44625)
10,Al-Rummaneh,POINT (41.00614 34.42984)
16,Markaz Al-Ka'im,POINT (40.97593 34.38771)


In [74]:
# spatial join with adm3 areas
adm3_locs_join = iraq_shp_adm3.loc[:,('ADM3_PCODE','geometry')]

subd_adm3_df = adm3_locs_join.sjoin(iom_locs_geo,how='left',predicate='intersects')
print(adm3_locs_join.shape)
print(iom_locs_geo.shape)
print(subd_adm3_df.shape)
subd_adm3_df.head()

(294, 2)
(490, 2)
(577, 4)


Unnamed: 0,ADM3_PCODE,geometry,index_right,Sub-district
0,IQG07Q02N02,"POLYGON ((44.36654 32.56190, 44.36466 32.55802...",392.0,Markaz Al-Hilla
0,IQG07Q02N02,"POLYGON ((44.36654 32.56190, 44.36466 32.55802...",390.0,Abu Gharaq
1,IQG16Q01N02,"POLYGON ((44.19124 34.77808, 44.21397 34.75600...",2933.0,Markaz Al-Daur
1,IQG16Q01N02,"POLYGON ((44.19124 34.77808, 44.21397 34.75600...",2932.0,Markaz Al-Daur
2,IQG08Q02N02,"POLYGON ((44.05011 33.42133, 44.05663 33.42086...",441.0,Markaz Abu Ghraib


In [79]:
adm3_subd_map = subd_adm3_df.loc[:,('ADM3_PCODE','Sub-district')].drop_duplicates()
adm3_subd_map = adm3_subd_map[adm3_subd_map.notna()]
print(adm3_subd_map.shape)
adm3_subd_map.head()

(391, 2)


Unnamed: 0,ADM3_PCODE,Sub-district
0,IQG07Q02N02,Markaz Al-Hilla
0,IQG07Q02N02,Abu Gharaq
1,IQG16Q01N02,Markaz Al-Daur
2,IQG08Q02N02,Markaz Abu Ghraib
2,IQG08Q02N02,Akar koof


In [76]:
# find places where no IOM was matched to ADM3
nomatch_adm3 = subd_adm3_df[subd_adm3_df['Sub-district'].isna()].drop_duplicates()
print(nomatch_adm3.shape)
nomatch_adm3.head()

(90, 4)


Unnamed: 0,ADM3_PCODE,geometry,index_right,Sub-district
12,IQG14Q05N01,"POLYGON ((47.72724 31.46606, 47.71829 31.44591...",,
15,IQG02Q03N01,"POLYGON ((48.46115 29.99500, 48.41589 29.99451...",,
17,IQG17Q04N01,"POLYGON ((45.93125 31.31548, 45.93482 31.31191...",,
20,IQG02Q05N01,"POLYGON ((47.49807 30.97416, 47.50160 30.96785...",,
21,IQG17Q03N03,"POLYGON ((46.60449 31.45911, 46.59415 31.45386...",,


In [77]:
# for unmatched Admin areas, try using all the iom areas to join
iom_locs_geo_full = iom_int_locs.loc[:,('Sub-district','Latitude','Longitude')]
print(iom_locs_geo_full.shape)
# convert IOM lat/lon to point geometry
iom_locs_geo_full['geometry'] = gpd.points_from_xy(iom_locs_geo_full['Longitude'],iom_locs_geo_full['Latitude'])
iom_locs_geo_full = iom_locs_geo_full.drop(columns=['Latitude','Longitude'])
iom_locs_geo_full = gpd.GeoDataFrame(iom_locs_geo_full, geometry='geometry',crs='EPSG:4326')
iom_locs_geo_full.head()

(3717, 3)


Unnamed: 0,Sub-district,geometry
0,Al-Obiadi,POINT (41.20149 34.42020)
1,Al-Obiadi,POINT (41.23414 34.43304)
2,Al-Obiadi,POINT (41.22633 34.42921)
3,Al-Obiadi,POINT (41.22601 34.43465)
4,Al-Obiadi,POINT (41.23290 34.43660)


In [81]:
# spatial join of remaining unmatched adm3 areas
adm3_locs_join_rd2 = nomatch_adm3.loc[:,('ADM3_PCODE','geometry')]

subd_adm3_df_rd2 = adm3_locs_join_rd2.sjoin(iom_locs_geo_full,how='left',predicate='intersects')
print(adm3_locs_join_rd2.shape)
print(iom_locs_geo_full.shape)
adm3_subd_map_rd2 = subd_adm3_df_rd2.loc[:,('ADM3_PCODE','Sub-district')].drop_duplicates()
adm3_subd_map_rd2 = adm3_subd_map_rd2[adm3_subd_map_rd2['Sub-district'].notna()]
print(adm3_subd_map_rd2.shape)
adm3_subd_map_rd2.head()

(90, 2)
(3717, 2)
(15, 2)


Unnamed: 0,ADM3_PCODE,Sub-district
28,IQG08Q01N02,Markaz Al Adhamia
30,IQG05Q01N02,Markaz Al-Diwaniya
45,IQG04Q01N01,Markaz Al-Najaf
64,IQG13Q04N03,Al-Multaqa
64,IQG13Q04N03,Yaychi


In [82]:
# final ADM3 areas that successfully intersected wih an IOM Subdistrict
adm3_joins_iom = pd.concat([adm3_subd_map_rd2,adm3_subd_map])
adm3_joins_iom.shape

(406, 2)

In [84]:
# confirm no IOM subdistricts are missing from adm3_joins_iom
iom_remainder_tomatch = iom_locs_geo.merge(adm3_joins_iom, on='Sub-district',how='left')
iom_remainder_tomatch[iom_remainder_tomatch['ADM3_PCODE'].isna()]

Unnamed: 0,Sub-district,geometry,ADM3_PCODE


In [95]:
# identify double-matched adm3 areas to subdistricts
double_matched_subds = adm3_joins_iom.groupby('Sub-district').nunique().reset_index()
double_matched_subds = double_matched_subds[double_matched_subds['ADM3_PCODE']>1]
print(double_matched_subds.shape)
double_matched_subds = double_matched_subds.merge(adm3_joins_iom, on='Sub-district',how='left')
double_matched_subds = double_matched_subds.drop(columns=['ADM3_PCODE_x'])
double_matched_subds = double_matched_subds.rename(columns={'ADM3_PCODE_y':'ADM3_PCODE'})
print(double_matched_subds.shape)
double_matched_subds = double_matched_subds.merge(iraq_shp_adm3.loc[:,('ADM3_PCODE','ADM3_EN')])
print(double_matched_subds.shape)
double_matched_subds

(49, 2)
(101, 2)
(101, 3)


Unnamed: 0,Sub-district,ADM3_PCODE,ADM3_EN
0,9 Nissan,IQG08Q06N02,Baghdad Al-Jedeeda
1,9 Nissan,IQG08Q07N01,Markaz Al-Thawra
2,Abaiji,IQG16Q04N04,Al-Dujeel
3,Abaiji,IQG08Q02N04,Al-Tarmiya
4,Akar koof,IQG08Q02N02,Abu Ghraib
...,...,...,...
96,Warmawa,IQG06Q01N02,Qaradagh
97,Yaychi,IQG13Q04N03,Al-Multaka
98,Yaychi,IQG13Q04N04,Yaijy
99,Zummar,IQG15Q08N01,Markaz Telafar


In [96]:
# if names match in double-matched areas, use the matched names 
double_matched_subds_namematch = double_matched_subds[double_matched_subds['Sub-district']==double_matched_subds['ADM3_EN']]
print(double_matched_subds_namematch.shape)
double_matched_subds_namematch

(20, 3)


Unnamed: 0,Sub-district,ADM3_PCODE,ADM3_EN
8,Al-Amirya,IQG01Q01N01,Al-Amirya
12,Al-Duloeyah,IQG16Q04N03,Al-Duloeyah
14,Al-Garma,IQG01Q01N02,Al-Garma
16,Al-Habbaniya,IQG01Q03N01,Al-Habbaniya
18,Al-Haqlaniya,IQG01Q06N01,Al-Haqlaniya
25,Al-Qosh,IQG15Q09N02,Al-Qosh
32,Al-Yousifya,IQG08Q05N04,Al-Yousifya
34,Al-Zubaidiya,IQG18Q04N01,Al-Zubaidiya
37,Batifa,IQG09Q04N02,Batifa
53,Markaz Al-Diwaniya,IQG05Q02N04,Markaz Al-Diwaniya


In [102]:
# where names don't match in double-matched areas, prep for manual matching
nomatch_doublematch = double_matched_subds[~double_matched_subds['Sub-district'].isin(double_matched_subds_namematch['Sub-district'].unique())]
print(nomatch_doublematch.shape)

fin_manual_match = source_dir + '/final_manual_match.csv'
nomatch_doublematch.to_csv(fin_manual_match)

(59, 3)


#### Final Matched Dataframe

In [100]:
# iom-adm3 joins that didn't double-match geometries
single_match = adm3_joins_iom.groupby('Sub-district').nunique().reset_index()
single_match = single_match[single_match['ADM3_PCODE']==1]
single_match = single_match.merge(adm3_joins_iom, on='Sub-district',how='left')
single_match = single_match.drop(columns=['ADM3_PCODE_x'])
single_match = single_match.rename(columns={'ADM3_PCODE_y':'ADM3_PCODE'})
single_match = single_match.merge(iraq_shp_adm3.loc[:,('ADM3_PCODE','ADM3_EN')],on='ADM3_PCODE',how='left')

print(single_match.shape)
single_match.head(2)

(215, 3)


Unnamed: 0,Sub-district,ADM3_PCODE,ADM3_EN
0,AL-Hurr,IQG12Q03N01,Markaz Kerbela
1,AL-Khayrat,IQG12Q02N02,Markaz Al-Hindiya


In [None]:
fin_iom_adm3_name_map = pd.concat([single_match,double_matched_subds_namematch])

In [None]:
# testing how easy it is to join IOM with Adm 3 names
iom_subdistricts_ar = pd.DataFrame({'iom_subd':iom_2022.Subdistrict.unique()})
iraq_adm3_ar_names = pd.DataFrame({'adm3':iraq_shp_adm3.ADM3_AR.unique()})

## English Name Joins - Does not work completely, not exact overlap

In [None]:
# testing how easy it is to join IOM with Adm 3 names
iom_subdistricts = pd.DataFrame({'iom_subd':iom_2022.Subdistrict.unique()})
iraq_adm3_names = pd.DataFrame({'adm3':iraq_shp_adm3.ADM3_EN.unique()})

join_names = pd.merge(iom_subdistricts, iraq_adm3_names, left_on='iom_subd', right_on='adm3',how='outer')
print(join_names.shape)
# Adm 3 names that don't immediately match to IOM Subdistrict names
print(join_names[join_names['iom_subd'].isna()].shape)
# IOM Subdistrict names that don't immediately match to Adm 3 names
print(join_names[join_names['adm3'].isna()].shape)

In [None]:
iom_subd_tocorrect = join_names[join_names['adm3'].isna()]['iom_subd']
adm3_tocorrect = join_names[join_names['iom_subd'].isna()]['adm3']

names_tomatch = pd.DataFrame({'iom_subdistict':list(sorted(iom_subd_tocorrect))})
names_tomatch = names_tomatch.join(pd.DataFrame({'adm3_en':list(sorted(adm3_tocorrect))}),how='outer')
print(names_tomatch.shape)
names_tomatch.head()

In [36]:
names_tomatch_file = source_dir + '/adm3_names_tomatch.csv'
names_tomatch.to_csv(names_tomatch_file)