<a href="https://colab.research.google.com/github/natalie-ayers/Iraq-post-conflict-rebel-governance/blob/main/GPW_Population_to_Iraq_Adm3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install rioxarray

Collecting rioxarray
  Downloading rioxarray-0.15.1-py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.7/53.7 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Collecting rasterio>=1.3 (from rioxarray)
  Downloading rasterio-1.3.9-cp310-cp310-manylinux2014_x86_64.whl (20.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.6/20.6 MB[0m [31m45.4 MB/s[0m eta [36m0:00:00[0m
Collecting affine (from rasterio>=1.3->rioxarray)
  Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Collecting snuggs>=1.4.1 (from rasterio>=1.3->rioxarray)
  Downloading snuggs-1.4.7-py3-none-any.whl (5.4 kB)
Installing collected packages: snuggs, affine, rasterio, rioxarray
Successfully installed affine-2.4.0 rasterio-1.3.9 rioxarray-0.15.1 snuggs-1.4.7


In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import xarray as xr
import rioxarray as rxr
import os
import re

## Citation
Center for International Earth Science Information Network - CIESIN - Columbia University. 2018. Gridded Population of the World, Version 4 (GPWv4): Population Density Adjusted to Match 2015 Revision UN WPP Country Totals, Revision 11. Palisades, New York: NASA Socioeconomic Data and Applications Center (SEDAC). https://doi.org/10.7927/H4F47M65. Accessed 24 March 2024.

In [2]:
iraq_file = 'irq_admbnda_adm0_cso_itos_20190603.shp'
iraq_df = gpd.read_file(iraq_file)
iraq_df = iraq_df.loc[:,('ADM0_EN','geometry')]
print(iraq_df.bounds)
minx, miny, maxx, maxy = iraq_df.bounds.values[0]
print(minx, miny, maxx, maxy)
iraq_df.head()

        minx       miny       maxx       maxy
0  38.794836  29.069527  48.623795  37.377264
38.794836202000056 29.06952693900007 48.62379496600005 37.377264006000075


Unnamed: 0,ADM0_EN,geometry
0,Iraq,"POLYGON ((42.84654 37.34800, 42.85154 37.34657..."


In [13]:
iraq_adm3_file = 'irq_admbnda_adm3_cso_20190603.shp'
iraq_adm3_df = gpd.read_file(iraq_adm3_file)
iraq_adm3_df = iraq_adm3_df.loc[:,('Shape_Area','ADM3_PCODE','geometry')]
print(iraq_adm3_df.shape)

(294, 3)


The below cells were run separately for each year: 2000, 2005, 2010, 2015, and 2020. The only changes made were the input and output file names. Attempts to process all files together as a loop were disregarded due to the high RAM usage for this process and frequent kernel crashes when building functions for mass processing.  

In [38]:
rxr_file = 'gpw_v4_population_count_adjusted_to_2015_unwpp_country_totals_rev11_2020_30_sec.tif'

rxr_year = rxr.open_rasterio(rxr_file)
rxr_year_iraq = rxr_year.rio.clip(iraq_df.geometry,iraq_df.crs)

In [39]:
rxr_year_iraq_df = rxr_year_iraq.squeeze().drop("spatial_ref").drop("band")
rxr_year_iraq_df.name = 'pop_count'
rxr_df = rxr_year_iraq_df.to_dataframe().reset_index()
rxr_df['year'] = re.findall(r'\d\d\d\d',rxr_file)[1]
print(rxr_df.shape)
rxr_df.head()

(1175463, 4)


Unnamed: 0,y,x,pop_count,year
0,37.370833,38.804167,-3.4028230000000003e+38,2020
1,37.370833,38.8125,-3.4028230000000003e+38,2020
2,37.370833,38.820833,-3.4028230000000003e+38,2020
3,37.370833,38.829167,-3.4028230000000003e+38,2020
4,37.370833,38.8375,-3.4028230000000003e+38,2020


In [40]:
rxr_df['geometry'] = gpd.points_from_xy(x=rxr_df.x, y=rxr_df.y, crs='epsg:4326')
rxr_df = gpd.GeoDataFrame(rxr_df, geometry='geometry')
print(rxr_df.shape)
rxr_df.head()

(1175463, 5)


Unnamed: 0,y,x,pop_count,year,geometry
0,37.370833,38.804167,-3.4028230000000003e+38,2020,POINT (38.80417 37.37083)
1,37.370833,38.8125,-3.4028230000000003e+38,2020,POINT (38.81250 37.37083)
2,37.370833,38.820833,-3.4028230000000003e+38,2020,POINT (38.82083 37.37083)
3,37.370833,38.829167,-3.4028230000000003e+38,2020,POINT (38.82917 37.37083)
4,37.370833,38.8375,-3.4028230000000003e+38,2020,POINT (38.83750 37.37083)


In [41]:
rxr_df_adm3 = rxr_df.sjoin(iraq_adm3_df, predicate='intersects',how='right')
print(rxr_df_adm3.shape)
print(rxr_df_adm3[rxr_df_adm3['ADM3_PCODE'].isna()].shape)
rxr_df_adm3.head(2)

(606854, 8)
(0, 8)


Unnamed: 0,index_left,y,x,pop_count,year,Shape_Area,ADM3_PCODE,geometry
0,680949,32.5625,44.354167,414.723419,2020,0.015754,IQG07Q02N02,"POLYGON ((44.36654 32.56190, 44.36466 32.55802..."
0,680950,32.5625,44.3625,414.723328,2020,0.015754,IQG07Q02N02,"POLYGON ((44.36654 32.56190, 44.36466 32.55802..."


In [42]:
pop_adm3_gb = rxr_df_adm3.drop(columns=['index_left','x','y'])
pop_adm3_gb = pop_adm3_gb.groupby(by=['ADM3_PCODE','Shape_Area','year']).sum().reset_index()
pop_adm3_gb['pop_density'] = pop_adm3_gb['pop_count']/pop_adm3_gb['Shape_Area']
print(pop_adm3_gb.shape)
pop_adm3_gb.head(2)

(294, 5)


  pop_adm3_gb = pop_adm3_gb.groupby(by=['ADM3_PCODE','Shape_Area','year']).sum().reset_index()


Unnamed: 0,ADM3_PCODE,Shape_Area,year,pop_count,pop_density
0,IQG01Q01N01,0.252123,2020,378111.5,1499708.0
1,IQG01Q01N02,0.117594,2020,214496.0,1824031.0


In [43]:
pop_adm3_gb[pop_adm3_gb['pop_count'].isna()]

Unnamed: 0,ADM3_PCODE,Shape_Area,year,pop_count,pop_density
4,IQG01Q02N02,0.157582,2020,,
5,IQG01Q02N03,0.219533,2020,,
12,IQG01Q05N01,0.526509,2020,,
13,IQG01Q05N02,0.587026,2020,,
14,IQG01Q06N01,0.151016,2020,,
15,IQG01Q06N02,0.189391,2020,,
24,IQG02Q03N01,0.101365,2020,,
25,IQG02Q03N02,0.038955,2020,,
125,IQG08Q07N01,0.007619,2020,,
127,IQG09Q01N02,0.039303,2020,,


In [44]:
pop_adm3_gb.to_csv('pop_adm3_2020.csv',index=False)