## GISAID Metadata preparation

In [1]:
import pandas as pd
from datetime import  datetime

In [2]:
dt = datetime.today().strftime(format='%d-%m-%Y')

### Load submission form

In [3]:
parent_dir = '/home/douso/SarsGenomics/Gisaid'
df_gisaid = pd.read_excel(f'{parent_dir}/20210222_EpiCoV.xls', 'Submissions')

In [4]:
df_gisaid.head()

Unnamed: 0,submitter,fn,covv_virus_name,covv_type,covv_passage,covv_collection_date,covv_location,covv_add_location,covv_host,covv_add_host_info,...,covv_coverage,covv_orig_lab,covv_orig_lab_addr,covv_provider_sample_id,covv_subm_lab,covv_subm_lab_addr,covv_subm_sample_id,covv_authors,covv_comment,comment_type
0,Submitter,FASTA filename,Virus name,Type,Passage details/history,Collection date,Location,Additional location information,Host,Additional host information,...,Coverage,Originating lab,Address,Sample ID given by originating laboratory,Submitting lab,Address,Sample ID given by the submitting laboratory,Authors,Comment,Comment Icon
1,GISAID username,all_sequences.fasta,hCoV-19/Country/Identifier/2020,betacoronavirus,"e.g. Original, Vero",2020-03-02,e.g. Continent / Country / Region,"e.g. Cruise Ship, Convention, Live animal market","e.g. Human, Animal, Environment, Laboratory de...",e.g. Patient infected while traveling in ….,...,"e.g. 70x, 1,000x, 10,000x (average)",Where the clinical specimen or virus isolate w...,,,Where sequence data have been generated and su...,,,"e.g. Jane Doe, John Doe",,


In [5]:
df_gisaid.columns

Index(['submitter', 'fn', 'covv_virus_name', 'covv_type', 'covv_passage',
       'covv_collection_date', 'covv_location', 'covv_add_location',
       'covv_host', 'covv_add_host_info', 'covv_sampling_strategy',
       'covv_gender', 'covv_patient_age', 'covv_patient_status',
       'covv_specimen', 'covv_outbreak', 'covv_last_vaccinated',
       'covv_treatment', 'covv_seq_technology', 'covv_assembly_method',
       'covv_coverage', 'covv_orig_lab', 'covv_orig_lab_addr',
       'covv_provider_sample_id', 'covv_subm_lab', 'covv_subm_lab_addr',
       'covv_subm_sample_id', 'covv_authors', 'covv_comment', 'comment_type'],
      dtype='object')

In [6]:
gisaid_header = ['submitter', 'fn', 'covv_virus_name', 'covv_type', 'covv_passage', 'DT_SAM_COLL', 
 'DT_SAM_RECEP', 'covv_location', 'covv_add_location','covv_host', 'covv_add_host_info', 'covv_sampling_strategy',
 'GEND', 'AGE_YRS', 'covv_patient_status', 'covv_specimen', 'covv_outbreak', 
 'covv_last_vaccinated', 'covv_treatment', 'covv_seq_technology', 'covv_assembly_method', 'covv_coverage', 
 'covv_orig_lab', 'covv_orig_lab_addr', 'S_NUM', 'covv_subm_lab', 'covv_subm_lab_addr', 
 'covv_subm_sample_id', 'covv_authors', 'covv_comment', 'comment_type']

In [7]:
len(gisaid_header)

31

In [8]:
df_metadata = pd.read_excel('/home/douso/Documents/TrendData/Results/ResultsMerged/COVID19-results-merged-cln-pos.xlsx')

In [9]:
df_headers = df_metadata[['S_NUM', 'AGE_YRS', 'GEND', 'NAT', 
           'COUNT_RES',  
           'DT_SAM_COLL', 
           'DT_SAM_RECEP']]

In [10]:
df_seqd = pd.read_excel('/home/douso/Documents/TrendData/Results/ResultsMerged/all-sequenced-samples-IDs_30-08-2021.xlsx')[['SAMPLE']]

In [11]:
df_seqd_int = df_seqd[df_seqd['SAMPLE'].str.contains('KEM') == False]#.head()

In [12]:
seqd_list = list(df_seqd_int['SAMPLE'])

In [13]:
df_headers_int = df_headers[df_headers['S_NUM'].isin(seqd_list)]

In [14]:
df_headers_int_loc = df_headers_int.assign(covv_location = df_headers_int['COUNT_RES'].apply(lambda x: f'Africa/Kenya/{x}'))

In [15]:
df_headers_int_loc.head()

Unnamed: 0,S_NUM,AGE_YRS,GEND,NAT,COUNT_RES,DT_SAM_COLL,DT_SAM_RECEP,covv_location
9,COVC00854,23.0,F,Kenya,Nairobi,2020-06-11,NaT,Africa/Kenya/Nairobi
11,COVC00867,32.0,F,Kenya,Nairobi,2020-06-10,NaT,Africa/Kenya/Nairobi
12,COVC00893,31.0,M,Kenya,Nairobi,2020-06-10,NaT,Africa/Kenya/Nairobi
13,COVC00915,22.0,M,Kenya,Nairobi,2020-06-10,NaT,Africa/Kenya/Nairobi
14,COVC00962,31.0,M,Kenya,Nairobi,2020-06-08,2020-06-14,Africa/Kenya/Nairobi


In [16]:
df_subf = df_headers_int_loc

In [17]:
df_subf.head()

Unnamed: 0,S_NUM,AGE_YRS,GEND,NAT,COUNT_RES,DT_SAM_COLL,DT_SAM_RECEP,covv_location
9,COVC00854,23.0,F,Kenya,Nairobi,2020-06-11,NaT,Africa/Kenya/Nairobi
11,COVC00867,32.0,F,Kenya,Nairobi,2020-06-10,NaT,Africa/Kenya/Nairobi
12,COVC00893,31.0,M,Kenya,Nairobi,2020-06-10,NaT,Africa/Kenya/Nairobi
13,COVC00915,22.0,M,Kenya,Nairobi,2020-06-10,NaT,Africa/Kenya/Nairobi
14,COVC00962,31.0,M,Kenya,Nairobi,2020-06-08,2020-06-14,Africa/Kenya/Nairobi


In [18]:
submitter = ''
fn = ''
cvn = 'hCoV-19'
vt = 'betacoronavirus'
cp = 'Original'
cal = 'Unkown'
ch = 'Human'
cahi = 'Unkown'
css = 'Surveillance'
cps = ''
cps = 'Unkown'
cs = 'NP Swab'
co = 'Unkown'
clv = 'Unkown'
ct = 'Unkown'
cst = 'Illumina'
cam = pd.NA
cc = ''
col = 'International Livestock Research Institute'
cola = 'Uthiru, Naivasha road, Nairobi-Kenya'
# cpsi = 'ILRI'
csl = 'International Livestock Research Institute'
csla = 'Uthiru, Naivasha road, Nairobi-Kenya'
cssi = list(df_headers_int['S_NUM'])
ca = ''
ccomm = ''
ct = ''

In [19]:
df_subf['submitter'] = submitter
df_subf['fn'] = fn
df_subf['covv_virus_name'] = cvn
df_subf['covv_type'] = vt
df_subf['covv_passage'] = cp
df_subf['covv_add_location'] = cal
df_subf['covv_host'] = ch
df_subf['covv_add_host_info'] = cahi
df_subf['covv_sampling_strategy'] = css
df_subf['covv_patient_status'] = cps
df_subf['covv_specimen'] = cs
df_subf['covv_outbreak'] = co
df_subf['covv_last_vaccinated'] = clv
df_subf['covv_treatment'] = ct
df_subf['covv_seq_technology'] = cst
df_subf['covv_assembly_method'] = cam
df_subf['covv_coverage'] = cc
df_subf['covv_orig_lab'] = col
df_subf['covv_orig_lab_addr'] = cola
# df_subf['covv_provider_sample_id'] = cpsi
df_subf['covv_subm_lab'] = csl
df_subf['covv_subm_lab_addr'] = csla
df_subf['covv_subm_sample_id'] = cssi
df_subf['covv_authors'] = ca
df_subf['covv_comment'] = ccomm
df_subf['comment_type'] = ct
df_subf.shape

(858, 33)

In [20]:
df_subf.columns

Index(['S_NUM', 'AGE_YRS', 'GEND', 'NAT', 'COUNT_RES', 'DT_SAM_COLL',
       'DT_SAM_RECEP', 'covv_location', 'submitter', 'fn', 'covv_virus_name',
       'covv_type', 'covv_passage', 'covv_add_location', 'covv_host',
       'covv_add_host_info', 'covv_sampling_strategy', 'covv_patient_status',
       'covv_specimen', 'covv_outbreak', 'covv_last_vaccinated',
       'covv_treatment', 'covv_seq_technology', 'covv_assembly_method',
       'covv_coverage', 'covv_orig_lab', 'covv_orig_lab_addr', 'covv_subm_lab',
       'covv_subm_lab_addr', 'covv_subm_sample_id', 'covv_authors',
       'covv_comment', 'comment_type'],
      dtype='object')

In [21]:
# df_subf.assign(submitter = submitter)
# df_subf.assign(fn = fn)
# df_subf.assign(covv_virus_name = cvn)
# df_subf.assign(covv_type = cp)
# df_subf.assign(covv_host = ch)
# df_subf.assign(covv_add_host_info = cahi)
# df_subf.assign(covv_sampling_strategy = css)
# df_subf.assign(covv_patient_status = cps)
# df_subf.assign(covv_specimen = cs)
# df_subf.assign(covv_outbreak = co)
# df_subf.assign(covv_last_vaccinated = clv)
# df_subf.assign(covv_treatment = ct)
# df_subf.assign(covv_seq_technology = cst)
# df_subf.assign(covv_assembly_method = cam)
# df_subf.assign(covv_coverage = cc)
# df_subf.assign(covv_orig_lab = col)
# df_subf.assign(covv_orig_lab_addr = cola)
# df_subf.assign(covv_subm_lab = csl)
# df_subf.assign(covv_subm_lab_addr = csla)
# df_subf.assign(covv_subm_sample_id = cssi)
# df_subf.assign(covv_authors = ca)
# df_subf.assign(covv_comment = ccomm)
# df_subf.assign(comment_type = ct)
# df_subf.shape

In [22]:
df_subf1 = df_subf.drop(['NAT', 'COUNT_RES'], axis=1)

In [23]:
df_subf1.head()

Unnamed: 0,S_NUM,AGE_YRS,GEND,DT_SAM_COLL,DT_SAM_RECEP,covv_location,submitter,fn,covv_virus_name,covv_type,...,covv_assembly_method,covv_coverage,covv_orig_lab,covv_orig_lab_addr,covv_subm_lab,covv_subm_lab_addr,covv_subm_sample_id,covv_authors,covv_comment,comment_type
9,COVC00854,23.0,F,2020-06-11,NaT,Africa/Kenya/Nairobi,,,hCoV-19,betacoronavirus,...,,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00854,,,
11,COVC00867,32.0,F,2020-06-10,NaT,Africa/Kenya/Nairobi,,,hCoV-19,betacoronavirus,...,,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00867,,,
12,COVC00893,31.0,M,2020-06-10,NaT,Africa/Kenya/Nairobi,,,hCoV-19,betacoronavirus,...,,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00893,,,
13,COVC00915,22.0,M,2020-06-10,NaT,Africa/Kenya/Nairobi,,,hCoV-19,betacoronavirus,...,,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00915,,,
14,COVC00962,31.0,M,2020-06-08,2020-06-14,Africa/Kenya/Nairobi,,,hCoV-19,betacoronavirus,...,,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00962,,,


In [24]:
df_subf1[df_subf1['DT_SAM_COLL'].isna() == True]

Unnamed: 0,S_NUM,AGE_YRS,GEND,DT_SAM_COLL,DT_SAM_RECEP,covv_location,submitter,fn,covv_virus_name,covv_type,...,covv_assembly_method,covv_coverage,covv_orig_lab,covv_orig_lab_addr,covv_subm_lab,covv_subm_lab_addr,covv_subm_sample_id,covv_authors,covv_comment,comment_type
18,COVC01056,,M,NaT,2020-06-15,Africa/Kenya/nan,,,hCoV-19,betacoronavirus,...,,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC01056,,,
19,COVC01065,25.0,M,NaT,2020-06-15,Africa/Kenya/Nairobi,,,hCoV-19,betacoronavirus,...,,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC01065,,,
20,COVC01067,43.0,M,NaT,2020-06-15,Africa/Kenya/Nairobi,,,hCoV-19,betacoronavirus,...,,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC01067,,,
21,COVC01069,26.0,M,NaT,2020-06-15,Africa/Kenya/Kajiado,,,hCoV-19,betacoronavirus,...,,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC01069,,,
1069,COVC12481,39.0,M,NaT,2020-10-06,Africa/Kenya/Kiambu,,,hCoV-19,betacoronavirus,...,,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC12481,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3809,NPHL-12780,,,NaT,2021-04-07,Africa/Kenya/nan,,,hCoV-19,betacoronavirus,...,,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",NPHL-12780,,,
3810,NPHL1,,,NaT,2021-04-07,Africa/Kenya/nan,,,hCoV-19,betacoronavirus,...,,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",NPHL1,,,
3811,NPHL2,,,NaT,2021-04-07,Africa/Kenya/nan,,,hCoV-19,betacoronavirus,...,,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",NPHL2,,,
3812,NPHL3,,,NaT,2021-04-07,Africa/Kenya/nan,,,hCoV-19,betacoronavirus,...,,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",NPHL3,,,


In [25]:
df_subf1.columns

Index(['S_NUM', 'AGE_YRS', 'GEND', 'DT_SAM_COLL', 'DT_SAM_RECEP',
       'covv_location', 'submitter', 'fn', 'covv_virus_name', 'covv_type',
       'covv_passage', 'covv_add_location', 'covv_host', 'covv_add_host_info',
       'covv_sampling_strategy', 'covv_patient_status', 'covv_specimen',
       'covv_outbreak', 'covv_last_vaccinated', 'covv_treatment',
       'covv_seq_technology', 'covv_assembly_method', 'covv_coverage',
       'covv_orig_lab', 'covv_orig_lab_addr', 'covv_subm_lab',
       'covv_subm_lab_addr', 'covv_subm_sample_id', 'covv_authors',
       'covv_comment', 'comment_type'],
      dtype='object')

In [26]:
df_gisaid_sub = df_subf1[gisaid_header]#.drop('DT_SAM_COLL', axis=1)

In [27]:
df_gisaid_sub.head()

Unnamed: 0,submitter,fn,covv_virus_name,covv_type,covv_passage,DT_SAM_COLL,DT_SAM_RECEP,covv_location,covv_add_location,covv_host,...,covv_coverage,covv_orig_lab,covv_orig_lab_addr,S_NUM,covv_subm_lab,covv_subm_lab_addr,covv_subm_sample_id,covv_authors,covv_comment,comment_type
9,,,hCoV-19,betacoronavirus,Original,2020-06-11,NaT,Africa/Kenya/Nairobi,Unkown,Human,...,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00854,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00854,,,
11,,,hCoV-19,betacoronavirus,Original,2020-06-10,NaT,Africa/Kenya/Nairobi,Unkown,Human,...,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00867,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00867,,,
12,,,hCoV-19,betacoronavirus,Original,2020-06-10,NaT,Africa/Kenya/Nairobi,Unkown,Human,...,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00893,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00893,,,
13,,,hCoV-19,betacoronavirus,Original,2020-06-10,NaT,Africa/Kenya/Nairobi,Unkown,Human,...,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00915,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00915,,,
14,,,hCoV-19,betacoronavirus,Original,2020-06-08,2020-06-14,Africa/Kenya/Nairobi,Unkown,Human,...,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00962,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00962,,,


In [28]:
# df_gisaid_sub_srt = df_gisaid_sub.assign(DT_SAM_COLL=df_gisaid_sub.sort_values('DT_SAM_COLL')['DT_SAM_COLL'].map(lambda x: x.strftime('%Y-%m-%d')))

In [29]:
df_gisaid_sub.head()

Unnamed: 0,submitter,fn,covv_virus_name,covv_type,covv_passage,DT_SAM_COLL,DT_SAM_RECEP,covv_location,covv_add_location,covv_host,...,covv_coverage,covv_orig_lab,covv_orig_lab_addr,S_NUM,covv_subm_lab,covv_subm_lab_addr,covv_subm_sample_id,covv_authors,covv_comment,comment_type
9,,,hCoV-19,betacoronavirus,Original,2020-06-11,NaT,Africa/Kenya/Nairobi,Unkown,Human,...,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00854,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00854,,,
11,,,hCoV-19,betacoronavirus,Original,2020-06-10,NaT,Africa/Kenya/Nairobi,Unkown,Human,...,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00867,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00867,,,
12,,,hCoV-19,betacoronavirus,Original,2020-06-10,NaT,Africa/Kenya/Nairobi,Unkown,Human,...,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00893,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00893,,,
13,,,hCoV-19,betacoronavirus,Original,2020-06-10,NaT,Africa/Kenya/Nairobi,Unkown,Human,...,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00915,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00915,,,
14,,,hCoV-19,betacoronavirus,Original,2020-06-08,2020-06-14,Africa/Kenya/Nairobi,Unkown,Human,...,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00962,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00962,,,


In [30]:
df_gisaid_sub.shape

(858, 31)

In [31]:
df_brief_ids = pd.read_excel('/home/douso/Documents/TrendData/Results/ResultsMerged/all-brief-samples-IDs_30-08-2021.xlsx')[['SAMPLE']]

In [32]:
brief_id_list = list(df_brief_ids['SAMPLE'])

In [33]:
#DT_SAM_RECEP
gisaid_cols = ['submitter', 'fn', 'covv_virus_name', 'covv_type', 'covv_passage',
       'covv_collection_date', 'DT_SAM_RECEP','covv_location', 'covv_add_location',
       'covv_host', 'covv_add_host_info', 'covv_sampling_strategy',
       'covv_gender', 'covv_patient_age', 'covv_patient_status',
       'covv_specimen', 'covv_outbreak', 'covv_last_vaccinated',
       'covv_treatment', 'covv_seq_technology', 'covv_assembly_method',
       'covv_coverage', 'covv_orig_lab', 'covv_orig_lab_addr',
       'covv_provider_sample_id', 'covv_subm_lab', 'covv_subm_lab_addr',
       'covv_subm_sample_id', 'covv_authors', 'covv_comment', 'comment_type']


In [34]:
df_gisaid_sub.columns = gisaid_cols

In [35]:
df_gisaid_fin = df_gisaid_sub[df_gisaid_sub['covv_subm_sample_id'].isin(brief_id_list) == True] 

In [36]:
df_gisaid_fin.shape

(758, 31)

In [37]:
df_gisaid_fin.head()

Unnamed: 0,submitter,fn,covv_virus_name,covv_type,covv_passage,covv_collection_date,DT_SAM_RECEP,covv_location,covv_add_location,covv_host,...,covv_coverage,covv_orig_lab,covv_orig_lab_addr,covv_provider_sample_id,covv_subm_lab,covv_subm_lab_addr,covv_subm_sample_id,covv_authors,covv_comment,comment_type
9,,,hCoV-19,betacoronavirus,Original,2020-06-11,NaT,Africa/Kenya/Nairobi,Unkown,Human,...,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00854,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00854,,,
11,,,hCoV-19,betacoronavirus,Original,2020-06-10,NaT,Africa/Kenya/Nairobi,Unkown,Human,...,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00867,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00867,,,
12,,,hCoV-19,betacoronavirus,Original,2020-06-10,NaT,Africa/Kenya/Nairobi,Unkown,Human,...,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00893,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00893,,,
13,,,hCoV-19,betacoronavirus,Original,2020-06-10,NaT,Africa/Kenya/Nairobi,Unkown,Human,...,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00915,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00915,,,
14,,,hCoV-19,betacoronavirus,Original,2020-06-08,2020-06-14,Africa/Kenya/Nairobi,Unkown,Human,...,,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00962,International Livestock Research Institute,"Uthiru, Naivasha road, Nairobi-Kenya",COVC00962,,,


In [38]:
def my_date_parser(x):
    try:
        if type(x) == pd.Timestamp:
            x = x#.strftime('%d-%m-%Y')
        if type(x) != pd.Timestamp and type(x) != (int, float, pd.NaT):
            x = pd.to_datetime(x, errors='ignore', dayfirst=True)#.strftime('%d-%m-%Y')
        if x in ['None indicated', 'NIL', 'Not indicated on form ',
                    'Nil', 'None indicated', 'Leaked Sample - Empty', 'nan']:
            x = pd.NaT
    except (ValueError, AttributeError):
        x = pd.NaT
    return x

In [39]:
df_gisaid_fin = df_gisaid_fin.assign(covv_collection_date=df_gisaid_fin['covv_collection_date'].apply(lambda x: my_date_parser(x)))

In [40]:
df_gisaid_fin.sort_values(['covv_collection_date']).to_excel(f'/home/douso/Documents/TrendData/Results/ResultsMerged/gisaid_data_{dt}.xlsx', index=False)