In [1]:
import altair as alt
import pandas as pd

from gisaid_utils import *

In [2]:
gisaid_metadata_filename = 'metadata_2022-08-20_00-26.tsv.gz'
extract_africa_metadata(gisaid_metadata_filename)
africa_metadata = get_africa_metadata()

In [15]:
def date_greater_filter(date, cutoff):
    date_parts = date.split('-')
    date_day = int(date_parts[2]) if len(date_parts) == 3 else 0
    date_month = int(date_parts[1]) if len(date_parts) >= 2 else 0
    date_year = int(date_parts[0])
    return [date_year, date_month, date_day] >= cutoff

def date_lesser_filter(date, cutoff):
    return not date_greater_filter(date, cutoff)

days_to_submit_df_2020 = africa_metadata[africa_metadata.apply(
    lambda r:(date_lesser_filter(r['date_submitted'], [2021,1,1]) & date_greater_filter(r['date_submitted'], [2019,12,31])), axis=1)
                                   ].groupby('country').mean().reset_index().sort_values(by='days_to_submit')

days_to_submit_df_2021 = africa_metadata[africa_metadata.apply(
    lambda r:(date_lesser_filter(r['date_submitted'], [2022,1,1]) & date_greater_filter(r['date_submitted'], [2020,12,31])), axis=1)
                                   ].groupby('country').mean().reset_index().sort_values(by='days_to_submit')

days_to_submit_df_2022 = africa_metadata[africa_metadata.apply(
    lambda r:(date_lesser_filter(r['date_submitted'], [2023,1,1]) & date_greater_filter(r['date_submitted'], [2021,12,31])), axis=1)
                                   ].groupby('country').mean().reset_index().sort_values(by='days_to_submit')



In [16]:
alt.Chart(days_to_submit_df_2020).mark_bar().encode(x=alt.X("country", title="Country"), y=alt.Y("days_to_submit", title="Days to submit sample to GISAID"))

In [17]:
alt.Chart(days_to_submit_df_2021).mark_bar().encode(x=alt.X("country", title="Country"), y=alt.Y("days_to_submit", title="Days to submit sample to GISAID"))

In [18]:
alt.Chart(days_to_submit_df_2022).mark_bar().encode(x=alt.X("country", title="Country"), y=alt.Y("days_to_submit", title="Days to submit sample to GISAID"))

In [45]:
print_dts_stats(days_to_submit_df_2020, '2020', 100)

median days to submit in 2020: 116 standard deviation: 116.05125391849529
number of countries submitting in under 100 days: 12
number of countries submitting: 28


In [46]:
print_dts_stats(days_to_submit_df_2021, '2021', 100)

median days to submit in 2021: 105 standard deviation: 104.88906497622821
number of countries submitting in under 100 days: 23
number of countries submitting: 49


In [47]:
print_dts_stats(days_to_submit_df_2022, '2022', 100)

median days to submit in 2022: 128 standard deviation: 127.93392070484582
number of countries submitting in under 100 days: 18
number of countries submitting: 51


In [44]:
def print_dts_stats(df, year, cutoff):
    print(f"median days to submit in {year}:", round(df.days_to_submit.median()), "standard deviation:", df.days_to_submit.median())
    print(f"number of countries submitting in under {cutoff} days:", len(df[df.days_to_submit < cutoff].sort_values(by='country')))
    print("number of countries submitting:", len(df))    

In [7]:
days_to_submit_df

Unnamed: 0,country,length,purpose_of_sequencing,days_to_submit
40,Sierra Leone,29764.5,,28.0
3,Botswana,29414.857239,,33.34733
51,Zimbabwe,29748.708029,,48.39781
27,Mali,29809.5,,56.0
30,Morocco,29744.153558,,62.685393
8,Central African Republic,29693.851064,,64.468085
12,Djibouti,29742.280488,,65.72439
42,South Africa,29682.437628,,69.852152
39,Seychelles,29701.909012,,72.051127
35,Republic of the Congo,29664.869258,,77.24735


In [8]:
countries_df = get_countries()

In [9]:
days_to_submit_df.insert(0, 'iso3', days_to_submit_df['country'].apply(lambda c: country_name_to_iso3(c, countries_df)))

In [10]:
days_to_submit_df

Unnamed: 0,iso3,country,length,purpose_of_sequencing,days_to_submit
40,SLE,Sierra Leone,29764.5,,28.0
3,BWA,Botswana,29414.857239,,33.34733
51,ZWE,Zimbabwe,29748.708029,,48.39781
27,MLI,Mali,29809.5,,56.0
30,MAR,Morocco,29744.153558,,62.685393
8,CAF,Central African Republic,29693.851064,,64.468085
12,DJI,Djibouti,29742.280488,,65.72439
42,ZAF,South Africa,29682.437628,,69.852152
39,SYC,Seychelles,29701.909012,,72.051127
35,COG,Republic of the Congo,29664.869258,,77.24735
