# Exercise

1. ___Early hit countries___
      * For each continent, which are the five countries that are statistically hit earliest by new variants?
2. ___Predictor countries___
      * For these countries which are the five countries on and the five off the respective continent, that serves as predictors for incoming variants?



## Importing ETL Framework
Mini Data Lake framework is being imported for easier data manipulations.

In [None]:
from pathlib import Path
import os
import sys
import pandas as pd
from pathlib import Path
from presentation_support import folium_del_legend
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
%load_ext autoreload
%autoreload 2
# Packages in src can be accessed via adding the src folder path to pythonpath.
module_path = os.path.abspath(os.path.join("../"))
sys.path.insert(0, module_path)

# Import custom transformations for exercise 1
from bnt_rfp_mini_project.conventions import data_processed_path, data_output_path
from bnt_rfp_mini_project.transformation.cleansed2processed.cleansed2processed_join_data1_data2 import merge_continent_to_countries
from bnt_rfp_mini_project.transformation.processed2output.processed2output_exercise_1 import exercise_1

In [None]:
# Custom transformations
merge_continent_to_countries()
exercise_1()

## 1. Early Hit Countries
For each continent, which are the five countries that are statistically hit earliest by new variants?

In [None]:
# Read output data
result_top_5_countries_per_continent_df = pd.read_csv(
    Path(
        data_output_path,
        "analysis_of_covid_variants_country_indicators",
        "covid_first_hit_countries_by_continent.csv",
    )
)
display(result_top_5_countries_per_continent_df)

In [None]:
import geopandas as gpd
import folium
from folium.plugins import Fullscreen, minimap
from presentation_support import folium_del_legend

world_geojson = "countries_custom.geojson"
world_gpd = gpd.read_file(world_geojson)
world_q1_gpd = world_gpd.merge(
    result_top_5_countries_per_continent_df, on="location", how="inner"
)
q1_bounds = world_q1_gpd.total_bounds
q1_bounds_lat_long_list = [[-50.9, q1_bounds[0]], [q1_bounds[3], q1_bounds[2]]]

In [None]:
m = folium.Map(location=[45.5236, -122.6750], overlay=False, tiles=None)
folium.TileLayer(
    name="bnt_map_dark",
    tiles="https://api.mapbox.com/styles/v1/petruf/cldzs3i35000501p8exy6biz1/tiles/256/{z}/{x}/{y}@2x?access_token=pk.eyJ1IjoicGV0cnVmIiwiYSI6ImNqa3YwMWVocTA3M3MzdnBjdmt2eHB2aXcifQ.JR18dBJuSUrcUgGHKouKmQ",
    attr="Mapbox BioNTech dark",
).add_to(m)
folium.TileLayer(name="OpenStreetMap", tiles="OpenStreetMap").add_to(m)
folium.TileLayer(
    name="bnt_map",
    tiles="https://api.mapbox.com/styles/v1/petruf/cldycm30b002601pdhzs6z99y/tiles/256/{z}/{x}/{y}@2x?access_token=pk.eyJ1IjoicGV0cnVmIiwiYSI6ImNqa3YwMWVocTA3M3MzdnBjdmt2eHB2aXcifQ.JR18dBJuSUrcUgGHKouKmQ",
    attr="Mapbox BioNTech colors",
).add_to(m)

for idx, continent in enumerate(world_q1_gpd["continent"].dropna().unique()):
    q1_gpd = world_q1_gpd[world_q1_gpd["continent"] == continent]
    # q1_pd = result_top_5_countries_per_continent_df[result_top_5_countries_per_continent_df["continent"]==continent]
    cp = folium_del_legend(
        folium.Choropleth(
            geo_data=q1_gpd,
            key_on="feature.properties.location",
            data=result_top_5_countries_per_continent_df,
            columns=["location", "days"],
            name=f"q1_{continent}",
            nan_fill_opacity=0.0,
            fill_color="OrRd_r",
            fill_opacity=0.9,
            line_opacity=0.2,
            zoom_start=5,
            highlight=True,
            use_jenks=True,
        ),
        idx,
    ).add_to(m)
    folium.GeoJsonPopup(["continent", "location", "days"], localize=True).add_to(
        cp.geojson
    )
folium.LayerControl(collapsed=False).add_to(m)
Fullscreen().add_to(m)
m.fit_bounds(q1_bounds_lat_long_list)
m

## 2. Predictor Countries
For these countries which are the five countries on and the five off the respective continent, that serves as predictors for incoming variants?

In [None]:
pairwise_df = pd.read_csv(
    Path(
        data_output_path,
        "analysis_of_covid_variants_country_indicators",
        "covid_countries_similarity.csv",
    ),
    index_col="location_from",
)

plt.figure(figsize=(10, 10))
sns.heatmap(pairwise_df, cmap="OrRd_r", linewidth=1)

In [None]:
countries_covid_similarity_df = pd.read_csv(
    Path(
        data_output_path,
        "analysis_of_covid_variants_country_indicators",
        "top_5_similar_countries.csv",
    )
)
pd.set_option('display.max_rows', 300)
display(countries_covid_similarity_df)

In [None]:
world_q2_gpd = world_gpd.merge(
    countries_covid_similarity_df, left_on="location", right_on="location_to", how="inner"
).sort_values("location_from")
q2_bounds = world_q2_gpd.total_bounds
q2_bounds_lat_long_list = [[-50.9, q2_bounds[0]], [q2_bounds[3], q2_bounds[2]]]

In [None]:
m2 = folium.Map(location=[45.5236, -122.6750], overlay=False, tiles=None)
folium.TileLayer(
    name="bnt_map_dark",
    tiles="https://api.mapbox.com/styles/v1/petruf/cldzs3i35000501p8exy6biz1/tiles/256/{z}/{x}/{y}@2x?access_token=pk.eyJ1IjoicGV0cnVmIiwiYSI6ImNqa3YwMWVocTA3M3MzdnBjdmt2eHB2aXcifQ.JR18dBJuSUrcUgGHKouKmQ",
    attr="Mapbox BioNTech dark",
).add_to(m2)
folium.TileLayer(name="OpenStreetMap", tiles="OpenStreetMap").add_to(m2)
folium.TileLayer(
    name="bnt_map",
    tiles="https://api.mapbox.com/styles/v1/petruf/cldycm30b002601pdhzs6z99y/tiles/256/{z}/{x}/{y}@2x?access_token=pk.eyJ1IjoicGV0cnVmIiwiYSI6ImNqa3YwMWVocTA3M3MzdnBjdmt2eHB2aXcifQ.JR18dBJuSUrcUgGHKouKmQ",
    attr="Mapbox BioNTech colors",
).add_to(m2)

for idx, location_from in enumerate(world_q2_gpd["location_from"].dropna().unique()):
    q2_gpd = world_q2_gpd[world_q2_gpd["location_from"] == location_from]

    cp2 = folium_del_legend(
        folium.Choropleth(
            geo_data=q2_gpd,
            key_on="feature.properties.location_to",
            data=world_q2_gpd,
            columns=["location_to", "distance"],
            name=f"q2_{location_from}",
            nan_fill_opacity=0.0,
            fill_color="OrRd_r",
            fill_opacity=0.9,
            line_opacity=0.2,
            zoom_start=5,
            legend_name="Distance",
            show=False,
            highlight=True
        ),
        idx,
    ).add_to(m2)
    folium.GeoJsonPopup(["continent", "location_to", "distance", "continent"], localize=True).add_to(
        cp2.geojson
    )
folium.LayerControl(collapsed=False).add_to(m2)
Fullscreen().add_to(m2)
m2.fit_bounds(q2_bounds_lat_long_list)
m2