In [None]:
from autumn.core.inputs.database import get_input_db
import pycountry
import pandas as pd


In [None]:

input_db = get_input_db()

In [None]:
manual_map = {
    "Democratic Republic of the Congo": "COD",
    "Laos": "LAO",
    "U.S. Virgin Islands": "VIR",
    "Bonaire": "BOS",
    "Curacao": "CUW",
    "Iran": "IRN",
    "Reunion": "REU",
    "Sint Maarten": "MAF",
    "Taiwan": "TWN",
    "USA": "USA"
}

def get_iso3(country):
    if country in manual_map:
        return manual_map[country]


    country_objects = pycountry.countries.search_fuzzy(country)
    if len(country_objects) == 1:
        return country_objects[0].alpha_3
    else:
        print(country)
        return pycountry.countries.get(name=country).alpha_3

### Excluded countries

In [None]:
excluded_countries = ["Canary Islands", "Crimea"]

## GISAID (n=206)

In [None]:

gisaid_countries = input_db.query(
    table_name='gisaid', 
    # conditions= {"iso_code": "AUS"}
).Country.unique()
gisaid_countries = [c for c in gisaid_countries if c not in excluded_countries]
gisaid_iso3s = [get_iso3(c) for c in gisaid_countries]

## UNESCO (N=210)

In [None]:
input_db.table_names()

In [None]:
unesco_iso3s = list(input_db.query(
     table_name='school_closure', 
    # conditions= {"iso_code": "AUS"}
).country_id.unique())


## SeroTracker

In [None]:
sero_data = pd.read_csv("SeroTracker_ Serosurveys Reporting Prevalence.csv")

In [None]:
national_filter = sero_data["Grade of Estimate Scope"] == "National"

In [None]:
categories_included = {
    "Health care workers and caregivers": "No",
    "Household and community samples": "Yes",
    "Residual sera": "Yes",
    "Patients seeking care for non-COVID-19 reasons": "Yes",
    "Blood donors": "Yes",
    "Non-essential workers and unemployed persons": "Yes",
    "Essential non-healthcare workers": "No",
    "Multiple populations": "Check",
    "Assisted living and long-term care facilities": "No",
    "Persons who are incarcerated": "No",
    "Pregnant or parturient women": "No",
    "Contacts of COVID patients": "No",
    "Students and Daycares": "No",
    "Persons experiencing homelessness": "No",
    "Multiple general populations": "Yes",
    "Hospital visitors": "Yes",
    "Perinatal": "No",
    "Family of essential workers": "No",
    "Representative patient population": "Yes",
    "Persons living in slums": "No",
    "Tissue donor": "Yes",
}
included_subset = [category for category, value in categories_included.items() if value == "Yes"]
category_filter = sero_data["Sample Frame (groups of interest)"].isin(included_subset)

In [None]:
filtered_sero_data = sero_data[national_filter][category_filter][size_filter]

In [None]:
perc_columns = ["Serum positive prevalence", "Serum pos prevalence, 95pct CI Lower", "Serum pos prevalence, 95pct CI Upper"]
for col in perc_columns:
    filtered_sero_data[col] = filtered_sero_data[col].str.replace("%", "").astype(float)

In [None]:
# a = filtered_sero_data["Country"].unique()
SeroTracker_iso3s = list(filtered_sero_data['Alpha3 Code'].unique())


# Intercept

In [None]:
included_iso3s = [iso3 for iso3 in gisaid_iso3s if iso3 in unesco_iso3s and iso3 in SeroTracker_iso3s]

In [None]:
included_countries = [pycountry.countries.get(alpha_3=iso3).name for iso3 in included_iso3s]


In [None]:
import plotly.express as px

df = pd.DataFrame({'country':included_iso3s, 'value': [1.]*len(included_iso3s)})

fig = px.choropleth(df, locations="country",
                    color="value", # lifeExp is a column of gapminder
                    # hover_name="country", # column to add to hover information
) #color_continuous_scale=px.colors.sequential.Plasma)
fig.show()

In [None]:
output_dict = {iso3: country for (iso3, country) in zip(included_iso3s, included_countries)}

In [None]:
import yaml

with open('included_countries.yml', 'w') as outfile:
    yaml.dump(output_dict, outfile, default_flow_style=False)