In [1]:
import altair as alt
import pandas as pd
from altair_saver import save

from gisaid_utils import *

In [2]:
gisaid_metadata_filename = 'metadata_2022-08-20_00-26.tsv.gz'
extract_africa_metadata(gisaid_metadata_filename)
africa_metadata = get_africa_metadata()

In [3]:
countries_df = get_countries()

In [4]:
income_groups = get_income_groups()

In [5]:
def date_greater_filter(date, cutoff):
    date_parts = date.split('-')
    date_day = int(date_parts[2]) if len(date_parts) == 3 else 0
    date_month = int(date_parts[1]) if len(date_parts) >= 2 else 0
    date_year = int(date_parts[0])
    return [date_year, date_month, date_day] >= cutoff

def date_lesser_filter(date, cutoff):
    return not date_greater_filter(date, cutoff)

days_to_submit_df_2020 = africa_metadata[africa_metadata.apply(
    lambda r:(date_lesser_filter(r['date_submitted'], [2021,1,1]) & date_greater_filter(r['date_submitted'], [2019,12,31])), axis=1)
                                   ].groupby('country').mean().reset_index().sort_values(by='days_to_submit')
days_to_submit_df_2020 = insert_income_groups(days_to_submit_df_2020, countries_df, income_groups, 'country')

days_to_submit_df_2021 = africa_metadata[africa_metadata.apply(
    lambda r:(date_lesser_filter(r['date_submitted'], [2022,1,1]) & date_greater_filter(r['date_submitted'], [2020,12,31])), axis=1)
                                   ].groupby('country').mean().reset_index().sort_values(by='days_to_submit')
days_to_submit_df_2021 = insert_income_groups(days_to_submit_df_2021, countries_df, income_groups, 'country')

days_to_submit_df_2022 = africa_metadata[africa_metadata.apply(
    lambda r:(date_lesser_filter(r['date_submitted'], [2023,1,1]) & date_greater_filter(r['date_submitted'], [2021,12,31])), axis=1)
                                   ].groupby('country').mean().reset_index().sort_values(by='days_to_submit')
days_to_submit_df_2022 = insert_income_groups(days_to_submit_df_2022, countries_df, income_groups, 'country')



In [17]:
def make_chart(df, year):
    chart = alt.Chart(df).mark_bar().encode(x=alt.X("country", title="Country"),
                                            y=alt.Y("days_to_submit", title=f"Days to submit sample to GISAID - {year}"),
                                            color=alt.Color('Income group', sort=['Low income', 'Lower middle income', 'Upper middle income', 'High income']))
    save(chart, f"images/days_to_submit_to_gisaid_{year}.png")
    save(chart, f"images/days_to_submit_to_gisaid_{year}.svg")
    return chart

In [18]:
make_chart(days_to_submit_df_2020, 2020)

In [19]:
make_chart(days_to_submit_df_2021, 2021)

In [21]:
make_chart(days_to_submit_df_2022, 2022)

In [11]:
def print_dts_stats(df, year, cutoff):
    print(f"median days to submit in {year}:", round(df.days_to_submit.median()), "standard deviation:", df.days_to_submit.median())
    print(f"number of countries submitting in under {cutoff} days:", len(df[df.days_to_submit < cutoff].sort_values(by='country')))
    print("number of countries submitting:", len(df))    

In [12]:
print_dts_stats(days_to_submit_df_2020, '2020', 100)

median days to submit in 2020: 116 standard deviation: 116.05125391849529
number of countries submitting in under 100 days: 12
number of countries submitting: 28


In [13]:
print_dts_stats(days_to_submit_df_2021, '2021', 100)

median days to submit in 2021: 105 standard deviation: 104.88906497622821
number of countries submitting in under 100 days: 23
number of countries submitting: 49


In [14]:
print_dts_stats(days_to_submit_df_2022, '2022', 100)

median days to submit in 2022: 128 standard deviation: 127.93392070484582
number of countries submitting in under 100 days: 18
number of countries submitting: 51
