In [None]:
import pandas as pd
import plotly.express as px
import numpy as np
import srsly
import pathlib 

In [None]:
# Data from WHO covid dashboard: https://data.who.int/dashboards/covid19/data

case_df = pd.read_csv(r'C:\Users\ehug0006\Downloads\WHO-COVID-19-global-data.csv')

In [None]:
case_df.head()

In [None]:
case_df

In [None]:
# select countries for analysis

countries = ['Malaysia', 'Philippines', 'Viet Nam']

analysis_df = case_df.loc[case_df['Country'].isin(countries)]

In [None]:
fig = px.scatter(analysis_df, 
              x='Date_reported', y='New_cases', 
              color='Country',
             log_y=False)

fig.show()

In [None]:
# Code for grabbing variant props for covariants github
# Data from covariants cluster tables https://github.com/hodcroftlab/covariants/tree/master/cluster_tables, covariants.org

In [None]:
# Read json files into dictionaries
Beta_20H = srsly.read_json(r'C:\Users\ehug0006\emu\variant_data_covariants\20H.Beta.V2_data.json')
Alpha_20I = srsly.read_json(r'C:\Users\ehug0006\emu\variant_data_covariants\20I.Alpha.V1_data.json')
Delta_21A = srsly.read_json(r'C:\Users\ehug0006\emu\variant_data_covariants\21A.Delta_data.json')
Delta_21I = srsly.read_json(r'C:\Users\ehug0006\emu\variant_data_covariants\21I.Delta_data.json')
Delta_21J = srsly.read_json(r'C:\Users\ehug0006\emu\variant_data_covariants\21J.Delta_data.json')
Omicron_21K_BA1 = srsly.read_json(r'C:\Users\ehug0006\emu\variant_data_covariants\21K.Omicron_data.json')
Omicron_21L_BA2 = srsly.read_json(r'C:\Users\ehug0006\emu\variant_data_covariants\21L.Omicron_data.json')
Omicron_22A_BA4 = srsly.read_json(r'C:\Users\ehug0006\emu\variant_data_covariants\22A.Omicron_data.json')
Omicron_22B_BA5 = srsly.read_json(r'C:\Users\ehug0006\emu\variant_data_covariants\22B.Omicron_data.json')

In [None]:
# Combine variant dictionaries into single nested dictionary
variant_data_dict = {'Beta_20H':Beta_20H,
                    'Alpha_20I':Alpha_20I,
                     'Delta_21A':Delta_21A,
                     'Delta_21I':Delta_21I,
                     'Delta_21J': Delta_21J,
                     'Omicron_21K_BA1': Omicron_21K_BA1,
                     'Omicron_21L_BA2': Omicron_21L_BA2,
                     'Omicron_22A_BA4': Omicron_22A_BA4,
                     'Omicron_22B_BA5':Omicron_22B_BA5
                    }

In [None]:
# create dataframe from nested variant dictionary
df = pd.DataFrame()

for variant,country in variant_data_dict.items():
    for c in country:
        new_df = pd.DataFrame(country[c])
        new_df['country'] = c
        new_df['variant'] = variant
        df = pd.concat([df, new_df])
        
# create new variant proportion column
df['variant_prop'] = df['cluster_sequences']/df['total_sequences']

In [None]:
# select countries
countries = ['Malaysia', 'Philippines', 'Vietnam']

df_analysis = df.loc[df['country'].isin(countries)]

In [None]:
# identify week first time proportion goes above a threshold
threshold = 0.01

emergence_df = df_analysis.loc[df_analysis.variant_prop>threshold].groupby(['country','variant']).head(2)

In [None]:
emergence_df

In [None]:
data_path = pathlib.Path('../data/target-data/WHO-COVID-19-global-data.csv')

In [None]:
mys_data = pd.read_csv(data_path, index_col=0)

In [None]:
mys_data