In [None]:
# Basics
import pandas as pd
import numpy as np
import random

# Data management
import csv

# Paths
import re
import os
from pathlib import Path

# Plots
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

# Geospatial
import geopandas as gpd
import folium

In [None]:
# Import master file
tus_df = pd.read_excel('Data Inventory.xlsx', sheet_name= 'Master', skiprows=1)

In [None]:
# Keep only surveys harmonized
tus_completed_df = tus_df[tus_df['Harmonization'] == 'Completed']
tus_completed_df = tus_completed_df[['ID', 'Country', 'Country Code', 'Year', 'Harmonization', 'Source for harmonization']]
tus_completed_df.sample(3)

Unnamed: 0,ID,Country,Country Code,Year,Harmonization,Source for harmonization
317,USA1993,United States of America,USA,1993.0,Completed,IPUMS
293,URY2021,Uruguay,URY,2021.0,Completed,National Office
318,USA1985,United States of America,USA,1985.0,Completed,IPUMS


## Basic map
Only shows surveys for which harmonization has been completed.

In [None]:
# Load the world shapefile
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# Merge data on the 'iso_a3' column of the shapefile with 'Country Code' of your data
world = world.merge(tus_completed_df[['Country Code']], left_on='iso_a3', right_on='Country Code', how='left', indicator=True)

# Create a new column 'survey_available' to indicate if a survey is available
world['survey_available'] = world['_merge'] == 'both'

# Initialize a folium map centered around the globe
m = folium.Map()

# Add country polygons to the map
for _, row in world.iterrows():
    if row['survey_available']:
        folium.GeoJson(
            row['geometry'],
            style_function=lambda x: {'fillColor': 'blue', 'color': 'black', 'weight': 0.5},
            tooltip=row['name']
        ).add_to(m)

# Save to an HTML file
m.save('survey_map.html')

  world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))


## Map with categories: IPUMS and National Statistic Office

In [None]:
# Load the world shapefile
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# Merge data on the 'iso_a3' column of the shapefile with 'Country Code' of your data
world = world.merge(tus_completed_df[['Country Code', 'Source for harmonization']], left_on='iso_a3', right_on='Country Code', how='left')

# Define colors for each source
color_dict = {
    'IPUMS': 'blue',
    'National Office': 'green'
}

# Initialize a folium map centered around the globe
m = folium.Map()

# Add country polygons to the map
for _, row in world.iterrows():
    if pd.notna(row['Source for harmonization']):
        folium.GeoJson(
            row['geometry'],
            style_function=lambda x, color=row['Source for harmonization']: {
                'fillColor': color_dict.get(color, 'gray'),
                'color': 'black',
                'weight': 0.5
            },
            tooltip=row['name']
        ).add_to(m)

# Add legend
legend_html = '''
<div style="
    position: fixed;
    bottom: 50px; left: 50px; width: 150px; height: 90px;
    background-color: white; z-index:9999; font-size:14px;
    border:2px solid grey;
    padding: 10px;
    ">
    <h4 style="margin-bottom:10px;">Source for Harmonization</h4>
    <i style="background:blue; width: 18px; height: 18px; float: left; margin-right: 8px;"></i> IPUMS<br>
    <i style="background:green; width: 18px; height: 18px; float: left; margin-right: 8px;"></i> National Office<br>
</div>
'''

m.get_root().html.add_child(folium.Element(legend_html))

# Save to an HTML file
m.save('surveymap_coverage.html')

  world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
