This notebook joins the July 13 and November 23 output to prepare for mapbox processing

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import shapely
import geopandas as gpd
import rasterio
import rasterio.features

import wget

import os
import glob
from zipfile import ZipFile

from datetime import datetime

In [13]:
# Change these to your desired filename labels
country_code = "mdg"
adm_code = "adm2"

# Time labels
# Add leading zero as needed
month = "november"
month_num = "11" #Two digits expected
year = "2021" 
day = "23" # Two digits expected

In [2]:
!gsutil cp gs://tm-ardie/2021-07-13-osm-completeness-madagascar/mapthegap-mdg-adm* ../data

Copying gs://tm-ardie/2021-07-13-osm-completeness-madagascar/mapthegap-mdg-adm2-2021-07-12.gpkg...
Copying gs://tm-ardie/2021-07-13-osm-completeness-madagascar/mapthegap-mdg-adm2-2021-07-12.mbtiles...
Copying gs://tm-ardie/2021-07-13-osm-completeness-madagascar/mapthegap-mdg-adm3-2021-07-12.gpkg...
Copying gs://tm-ardie/2021-07-13-osm-completeness-madagascar/mapthegap-mdg-adm3-2021-07-12.mbtiles...
\ [4 files][ 44.8 MiB/ 44.8 MiB]                                                
Operation completed over 4 objects/44.8 MiB.                                     


In [16]:
# Get back adm3!!
!gsutil cp gs://tm-ardie/mapthegap-mdg-adm3-2021-11-23.gpkg ../data

Copying gs://tm-ardie/mapthegap-mdg-adm3-2021-11-23.gpkg...
/ [1 files][ 23.9 MiB/ 23.9 MiB]                                                
Operation completed over 1 objects/23.9 MiB.                                     


ADM2 JOIN

In [5]:
#load the july and nov files
adm2_jul_gdf = gpd.read_file("../data/mapthegap-mdg-adm2-2021-07-12.gpkg",
                            driver='GPKG'
                            )
adm2_jul_gdf

Unnamed: 0,ADM2_PCODE,ADM2_EN,ADM2_TYPE,pixels_withbuilding_july2021,pixels_nobuilding_july2021,percentage_completeness_july2021,pixels_withbuilding_jan2020,pixels_nobuilding_jan2020,percentage_completeness_jan2020,geometry
0,MG11101001A,1er Arrondissement,District,3216,990,76.462197,2472,1734,58.773181,"POLYGON ((47.50556 -18.89146, 47.50563 -18.891..."
1,MG11101002A,2e Arrondissement,District,1519,3911,27.974217,1439,3991,26.500921,"POLYGON ((47.55842 -18.91178, 47.55857 -18.911..."
2,MG11101003A,3e Arrondissement,District,1456,1742,45.528455,1424,1774,44.527830,"POLYGON ((47.51365 -18.87834, 47.51775 -18.879..."
3,MG11101004A,4e Arrondissement,District,2789,2304,54.761437,1646,3447,32.318869,"POLYGON ((47.50262 -18.91043, 47.50261 -18.910..."
4,MG11101005A,5e Arrondissement,District,3037,6104,33.223936,2153,6988,23.553222,"POLYGON ((47.53500 -18.85464, 47.53518 -18.854..."
...,...,...,...,...,...,...,...,...,...,...
114,MG71719,Ambanja,District,2715,14104,16.142458,2687,14132,15.975980,"MULTIPOLYGON (((47.82532 -14.04337, 47.82940 -..."
115,MG72710,Antalaha,District,11661,3525,76.787831,2755,12431,18.141709,"POLYGON ((50.03756 -14.51991, 50.04068 -14.520..."
116,MG72711,Sambava,District,12205,10491,53.775996,5898,16798,25.986958,"POLYGON ((50.14061 -13.84548, 50.14055 -13.846..."
117,MG72712,Andapa,District,11310,1970,85.165663,3076,10204,23.162651,"POLYGON ((49.56381 -14.15472, 49.56410 -14.158..."


In [6]:
adm2_nov_gdf = gpd.read_file("../data/mapthegap-mdg-adm2-2021-11-23.gpkg",
                            driver='GPKG'
                            )

Unnamed: 0,ADM2_EN,ADM2_PCODE,pixels_withbuilding_november2021,pixels_nobuilding_november2021,percentage_completeness_november2021,geometry
0,1er Arrondissement,MG11101001A,3241,965,77.056586,"POLYGON ((47.50556 -18.89146, 47.50563 -18.891..."
1,2e Arrondissement,MG11101002A,1516,3914,27.918969,"POLYGON ((47.55842 -18.91178, 47.55857 -18.911..."
2,3e Arrondissement,MG11101003A,1458,1740,45.590994,"POLYGON ((47.51365 -18.87834, 47.51775 -18.879..."
3,4e Arrondissement,MG11101004A,2803,2290,55.036324,"POLYGON ((47.50262 -18.91043, 47.50261 -18.910..."
4,5e Arrondissement,MG11101005A,3043,6098,33.289574,"POLYGON ((47.53500 -18.85464, 47.53518 -18.854..."
...,...,...,...,...,...,...
114,Ambanja,MG71719,2717,14102,16.154349,"MULTIPOLYGON (((47.82532 -14.04337, 47.82940 -..."
115,Antalaha,MG72710,12285,2901,80.896879,"POLYGON ((50.03756 -14.51991, 50.04068 -14.520..."
116,Sambava,MG72711,19597,3099,86.345612,"POLYGON ((50.14061 -13.84548, 50.14055 -13.846..."
117,Andapa,MG72712,11310,1970,85.165663,"POLYGON ((49.56381 -14.15472, 49.56410 -14.158..."


In [11]:
# Subset nov output by needed columns (PCODE index, november values)
subset_cols = ['ADM2_PCODE',
               'pixels_withbuilding_november2021',
               'pixels_nobuilding_november2021', 
               'percentage_completeness_november2021'
              ]

# Left join
adm2_merged_gdf = pd.merge(
    adm2_jul_gdf, 
    adm2_nov_gdf[subset_cols], 
    how='left', 
    on='ADM2_PCODE'
)
adm2_merged_gdf.info() 

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 119 entries, 0 to 118
Data columns (total 13 columns):
 #   Column                                Non-Null Count  Dtype   
---  ------                                --------------  -----   
 0   ADM2_PCODE                            119 non-null    object  
 1   ADM2_EN                               119 non-null    object  
 2   ADM2_TYPE                             119 non-null    object  
 3   pixels_withbuilding_july2021          119 non-null    int64   
 4   pixels_nobuilding_july2021            119 non-null    int64   
 5   percentage_completeness_july2021      119 non-null    float64 
 6   pixels_withbuilding_jan2020           119 non-null    int64   
 7   pixels_nobuilding_jan2020             119 non-null    int64   
 8   percentage_completeness_jan2020       119 non-null    float64 
 9   geometry                              119 non-null    geometry
 10  pixels_withbuilding_november2021      119 non-null    int64   
 11

In [18]:
# Save to file
# To gpkg
filename = "../data/mapthegap-mdg-adm2-2021-11-23.gpkg"
adm2_merged_gdf.to_file(filename,driver='GPKG')

filename

'../data/mapthegap-mdg-adm2-2021-11-23.gpkg'

ADM3

In [32]:
#load the july and nov files
adm3_jul_gdf = gpd.read_file("../data/mapthegap-mdg-adm3-2021-07-12.gpkg",
                            driver='GPKG'
                            )
adm3_jul_gdf.head(2)

Unnamed: 0,ADM3_PCODE,ADM3_EN,ADM3_TYPE,pixels_withbuilding_july2021,pixels_nobuilding_july2021,percentage_completeness_july2021,pixels_withbuilding_jan2020,pixels_nobuilding_jan2020,percentage_completeness_jan2020,geometry
0,MG11101001,1er Arrondissement,Commune,3216,990,76.462197,2472,1734,58.773181,"POLYGON ((47.50556 -18.89146, 47.50563 -18.891..."
1,MG11101002,2e Arrondissement,Commune,1519,3911,27.974217,1439,3991,26.500921,"POLYGON ((47.55842 -18.91178, 47.55857 -18.911..."


In [25]:
# Add back the ADM2_PCODE by loading the ADM info
adm3_info_gdf = gpd.read_file(
    "../download_data/{}_adm_all/mdg_admbnda_adm3_BNGRC_OCHA_20181031.shp".format(country_code)
)
adm3_info_gdf.head(2)

Unnamed: 0,ADM0_PCODE,ADM0_EN,ADM1_PCODE,ADM1_EN,ADM1_TYPE,ADM2_PCODE,ADM2_EN,ADM2_TYPE,ADM3_PCODE,ADM3_EN,ADM3_TYPE,PROV_CODE_,OLD_PROVIN,PROV_TYPE,NOTES,SOURCE,geometry
0,MG,Madagascar,MG11,Analamanga,Region,MG11101001A,1er Arrondissement,District,MG11101001,1er Arrondissement,Commune,1,Antananarivo,Old Provinces/Faritany dissolved in 2007,Previous district name is Antananarivo Renivoh...,Note that Communes (admin 3) have become the D...,"POLYGON ((47.50556 -18.89146, 47.50563 -18.891..."
1,MG,Madagascar,MG11,Analamanga,Region,MG11101002A,2e Arrondissement,District,MG11101002,2e Arrondissement,Commune,1,Antananarivo,Old Provinces/Faritany dissolved in 2007,Previous district name is Antananarivo Renivoh...,Note that Communes (admin 3) have become the D...,"POLYGON ((47.55842 -18.91178, 47.55857 -18.911..."


In [33]:
adm3_jul_gdf = pd.merge(
    adm3_jul_gdf,
    adm3_info_gdf[['ADM2_PCODE','ADM3_PCODE']],
    how='left',
    on='ADM3_PCODE'
)
adm3_jul_gdf.head()

Unnamed: 0,ADM3_PCODE,ADM3_EN,ADM3_TYPE,pixels_withbuilding_july2021,pixels_nobuilding_july2021,percentage_completeness_july2021,pixels_withbuilding_jan2020,pixels_nobuilding_jan2020,percentage_completeness_jan2020,geometry,ADM2_PCODE
0,MG11101001,1er Arrondissement,Commune,3216,990,76.462197,2472,1734,58.773181,"POLYGON ((47.50556 -18.89146, 47.50563 -18.891...",MG11101001A
1,MG11101002,2e Arrondissement,Commune,1519,3911,27.974217,1439,3991,26.500921,"POLYGON ((47.55842 -18.91178, 47.55857 -18.911...",MG11101002A
2,MG11101003,3e Arrondissement,Commune,1456,1742,45.528455,1424,1774,44.52783,"POLYGON ((47.51365 -18.87834, 47.51775 -18.879...",MG11101003A
3,MG11101004,4e Arrondissement,Commune,2789,2304,54.761437,1646,3447,32.318869,"POLYGON ((47.50262 -18.91043, 47.50261 -18.910...",MG11101004A
4,MG11101005,5e Arrondissement,Commune,3037,6104,33.223936,2153,6988,23.553222,"POLYGON ((47.53500 -18.85464, 47.53518 -18.854...",MG11101005A


In [35]:
adm3_jul_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1579 entries, 0 to 1578
Data columns (total 11 columns):
 #   Column                            Non-Null Count  Dtype   
---  ------                            --------------  -----   
 0   ADM3_PCODE                        1579 non-null   object  
 1   ADM3_EN                           1579 non-null   object  
 2   ADM3_TYPE                         1579 non-null   object  
 3   pixels_withbuilding_july2021      1579 non-null   int64   
 4   pixels_nobuilding_july2021        1579 non-null   int64   
 5   percentage_completeness_july2021  1579 non-null   float64 
 6   pixels_withbuilding_jan2020       1579 non-null   int64   
 7   pixels_nobuilding_jan2020         1579 non-null   int64   
 8   percentage_completeness_jan2020   1579 non-null   float64 
 9   geometry                          1579 non-null   geometry
 10  ADM2_PCODE                        1579 non-null   object  
dtypes: float64(2), geometry(1), int64(4), object(4)


In [34]:
adm3_nov_gdf = gpd.read_file("../data/mapthegap-mdg-adm3-2021-11-23.gpkg",
                            driver='GPKG'
                            )
adm3_nov_gdf

Unnamed: 0,ADM3_EN,ADM3_PCODE,pixels_withbuilding_november2021,pixels_nobuilding_november2021,percentage_completeness_november2021,geometry
0,1er Arrondissement,MG11101001,3241,965,77.056586,"POLYGON ((47.50556 -18.89146, 47.50563 -18.891..."
1,2e Arrondissement,MG11101002,1516,3914,27.918969,"POLYGON ((47.55842 -18.91178, 47.55857 -18.911..."
2,3e Arrondissement,MG11101003,1458,1740,45.590994,"POLYGON ((47.51365 -18.87834, 47.51775 -18.879..."
3,4e Arrondissement,MG11101004,2803,2290,55.036324,"POLYGON ((47.50262 -18.91043, 47.50261 -18.910..."
4,5e Arrondissement,MG11101005,3043,6098,33.289574,"POLYGON ((47.53500 -18.85464, 47.53518 -18.854..."
...,...,...,...,...,...,...
1574,Amboriala,MG72716270,419,120,77.736549,"POLYGON ((49.43999 -13.77223, 49.44542 -13.773..."
1575,Maromokotra Loky,MG72716290,207,429,32.547170,"POLYGON ((49.62261 -12.80568, 49.62282 -12.806..."
1576,Antsirabe Nord,MG72716310,2036,324,86.271186,"POLYGON ((49.94996 -13.89482, 49.95034 -13.895..."
1577,Ampisikinana,MG72716330,401,220,64.573269,"MULTIPOLYGON (((49.83896 -12.85926, 49.83717 -..."


In [38]:
# Subset nov output by needed columns (PCODE index, november values)
subset_cols = ['ADM3_PCODE',
               'pixels_withbuilding_november2021',
               'pixels_nobuilding_november2021', 
               'percentage_completeness_november2021'
              ]

# Left join
adm3_merged_gdf = pd.merge(
    adm3_jul_gdf, 
    adm3_nov_gdf[subset_cols], 
    how='left', 
    on='ADM3_PCODE'
)
adm3_merged_gdf.info(), display(adm3_merged_gdf)

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 1579 entries, 0 to 1578
Data columns (total 14 columns):
 #   Column                                Non-Null Count  Dtype   
---  ------                                --------------  -----   
 0   ADM3_PCODE                            1579 non-null   object  
 1   ADM3_EN                               1579 non-null   object  
 2   ADM3_TYPE                             1579 non-null   object  
 3   pixels_withbuilding_july2021          1579 non-null   int64   
 4   pixels_nobuilding_july2021            1579 non-null   int64   
 5   percentage_completeness_july2021      1579 non-null   float64 
 6   pixels_withbuilding_jan2020           1579 non-null   int64   
 7   pixels_nobuilding_jan2020             1579 non-null   int64   
 8   percentage_completeness_jan2020       1579 non-null   float64 
 9   geometry                              1579 non-null   geometry
 10  ADM2_PCODE                            1579 non-null   object  
 

Unnamed: 0,ADM3_PCODE,ADM3_EN,ADM3_TYPE,pixels_withbuilding_july2021,pixels_nobuilding_july2021,percentage_completeness_july2021,pixels_withbuilding_jan2020,pixels_nobuilding_jan2020,percentage_completeness_jan2020,geometry,ADM2_PCODE,pixels_withbuilding_november2021,pixels_nobuilding_november2021,percentage_completeness_november2021
0,MG11101001,1er Arrondissement,Commune,3216,990,76.462197,2472,1734,58.773181,"POLYGON ((47.50556 -18.89146, 47.50563 -18.891...",MG11101001A,3241,965,77.056586
1,MG11101002,2e Arrondissement,Commune,1519,3911,27.974217,1439,3991,26.500921,"POLYGON ((47.55842 -18.91178, 47.55857 -18.911...",MG11101002A,1516,3914,27.918969
2,MG11101003,3e Arrondissement,Commune,1456,1742,45.528455,1424,1774,44.527830,"POLYGON ((47.51365 -18.87834, 47.51775 -18.879...",MG11101003A,1458,1740,45.590994
3,MG11101004,4e Arrondissement,Commune,2789,2304,54.761437,1646,3447,32.318869,"POLYGON ((47.50262 -18.91043, 47.50261 -18.910...",MG11101004A,2803,2290,55.036324
4,MG11101005,5e Arrondissement,Commune,3037,6104,33.223936,2153,6988,23.553222,"POLYGON ((47.53500 -18.85464, 47.53518 -18.854...",MG11101005A,3043,6098,33.289574
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1574,MG72716270,Amboriala,Commune,419,120,77.736549,0,539,0.000000,"POLYGON ((49.43999 -13.77223, 49.44542 -13.773...",MG72716,419,120,77.736549
1575,MG72716290,Maromokotra Loky,Commune,3,633,0.471698,3,633,0.471698,"POLYGON ((49.62261 -12.80568, 49.62282 -12.806...",MG72716,207,429,32.547170
1576,MG72716310,Antsirabe Nord,Commune,61,2299,2.584746,5,2355,0.211864,"POLYGON ((49.94996 -13.89482, 49.95034 -13.895...",MG72716,2036,324,86.271186
1577,MG72716330,Ampisikinana,Commune,401,220,64.573269,0,621,0.000000,"MULTIPOLYGON (((49.83896 -12.85926, 49.83717 -...",MG72716,401,220,64.573269


(None, None)

In [39]:
# Save to file
# To gpkg
filename = "../data/mapthegap-mdg-adm3-2021-11-23.gpkg"
adm3_merged_gdf.to_file(filename,driver='GPKG')

filename

'../data/mapthegap-mdg-adm3-2021-11-23.gpkg'

In [2]:
# Test if correct files are loaded
adm2_test_gdf = gpd.read_file("../data/mapthegap-mdg-adm2-2021-11-23.gpkg",
                            driver='GPKG'
                            )
adm2_test_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 119 entries, 0 to 118
Data columns (total 13 columns):
 #   Column                                Non-Null Count  Dtype   
---  ------                                --------------  -----   
 0   ADM2_PCODE                            119 non-null    object  
 1   ADM2_EN                               119 non-null    object  
 2   ADM2_TYPE                             119 non-null    object  
 3   pixels_withbuilding_july2021          119 non-null    int64   
 4   pixels_nobuilding_july2021            119 non-null    int64   
 5   percentage_completeness_july2021      119 non-null    float64 
 6   pixels_withbuilding_jan2020           119 non-null    int64   
 7   pixels_nobuilding_jan2020             119 non-null    int64   
 8   percentage_completeness_jan2020       119 non-null    float64 
 9   pixels_withbuilding_november2021      119 non-null    int64   
 10  pixels_nobuilding_november2021        119 non-null    int64   
 11

In [16]:
adm2_test_gdf['change(nov - jul)'] = adm2_test_gdf['percentage_completeness_november2021'] - adm2_test_gdf['percentage_completeness_july2021']
adm2_test_gdf.sort_values(by='change(nov - jul)', ascending=False).head(5)[['ADM2_PCODE','ADM2_EN','change(nov - jul)']]

Unnamed: 0,ADM2_PCODE,ADM2_EN,change(nov - jul)
48,MG25217,Vondrozo,54.179448
45,MG25213,Farafangana,46.648665
118,MG72716,Vohemar,41.39583
116,MG72711,Sambava,32.569616
38,MG23209,Mananjary,24.312915


In [15]:
adm2_test_gdf['change(nov - jan)'] = adm2_test_gdf['percentage_completeness_november2021'] - adm2_test_gdf['percentage_completeness_jan2020']
adm2_test_gdf.sort_values(by='change(nov - jan)', ascending=False).head(5)[['ADM2_PCODE','ADM2_EN','change(nov - jan)']]

Unnamed: 0,ADM2_PCODE,ADM2_EN,change(nov - jan)
118,MG72716,Vohemar,72.671348
115,MG72710,Antalaha,62.755169
104,MG53519,Amboasary-Atsimo,62.438022
117,MG72712,Andapa,62.003012
116,MG72711,Sambava,60.358654


In [14]:
adm2_test_gdf.sort_values(by='change', ascending=False).head(5)[['ADM2_PCODE','ADM2_EN','change(nov - jan)','change(nov - jul)']]

Unnamed: 0,ADM2_PCODE,ADM2_EN,change(nov - jan),change(nov - jul)
48,MG25217,Vondrozo,56.03911,54.179448
45,MG25213,Farafangana,47.452745,46.648665
118,MG72716,Vohemar,72.671348,41.39583
116,MG72711,Sambava,60.358654,32.569616
38,MG23209,Mananjary,51.52925,24.312915


In [41]:
# Test if correct files are loaded
adm3_test_gdf = gpd.read_file("../data/mapthegap-mdg-adm3-2021-11-23.gpkg",
                            driver='GPKG'
                            )
adm3_test_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1579 entries, 0 to 1578
Data columns (total 14 columns):
 #   Column                                Non-Null Count  Dtype   
---  ------                                --------------  -----   
 0   ADM3_PCODE                            1579 non-null   object  
 1   ADM3_EN                               1579 non-null   object  
 2   ADM3_TYPE                             1579 non-null   object  
 3   pixels_withbuilding_july2021          1579 non-null   int64   
 4   pixels_nobuilding_july2021            1579 non-null   int64   
 5   percentage_completeness_july2021      1579 non-null   float64 
 6   pixels_withbuilding_jan2020           1579 non-null   int64   
 7   pixels_nobuilding_jan2020             1579 non-null   int64   
 8   percentage_completeness_jan2020       1579 non-null   float64 
 9   ADM2_PCODE                            1579 non-null   object  
 10  pixels_withbuilding_november2021      1579 non-null   int64   
 