In [1]:
import altair as alt
import pandas as pd

In [2]:
d = pd.read_csv('./huanan-market-2024/metadata/Liu_etal_2023_with_sequencing.csv')
a = pd.read_csv('./huanan-market-2024/mitochondrial_mappings/mitochondrial_metazoa_counts_93.tsv', sep="\t")
a2 = pd.read_csv('./huanan-market-2024/mitochondrial_mappings/mitochondrial_metazoa_coveredbases_93.tsv', sep="\t")

a = a[['Lab code', 'Run', 'Sample_category', 'Stall_corrected'] + list(a2[a2.columns[5:]].loc[:, a2[a2.columns[5:]].sum() >= 1000].columns)]

a2 = a2[['Lab code', 'Run', 'Sample_category', 'Stall_corrected'] + list(a2[a2.columns[5:]].loc[:, a2[a2.columns[5:]].sum() >= 1000].columns)]


data = a.melt(id_vars=['Lab code', 'Run', 'Sample_category', 'Stall_corrected'],value_vars=a.columns[5:], value_name='read_counts')
data2 = a2.melt(id_vars=['Lab code', 'Run', 'Sample_category', 'Stall_corrected'],value_vars=a.columns[5:], value_name='covered_bases')



In [3]:
final = pd.merge(data,data2, on=['Run','variable'])

sample_types = {"market_stall_wildlife":"Wildlife_stalls_Jan12",
"market_west_Jan1": "Non-wildlife_stalls_Jan1",
"market_825_late":"Wildlife_stall_B_825",
"market_west_drain":"Drain",
"market_sewage_west":"NA",
"market_629_Jan12":"Wildlife_stall_A_629",
"market_west_late":"All_stalls_AfterJan12",
"market_629_late":"Wildlife_stall_A_629",
"non_market_sewage":"NA",
"market_825":"Wildlife_stall_B_825",
"market_sewage_east":"NA",
"market_east_drain":"Drain",
"warehouse":"All_stalls_AfterJan12",
"market_east_late":"All_stalls_AfterJan12",
"market_east_Jan1":"Non-wildlife_stalls_Jan1",
"market_stall_wildlife_late":"All_stalls_AfterJan12"}

final['new_category'] = final.Sample_category_x.map(sample_types)
old_final = final
final = final.query("new_category != 'NA'") ## Removes Non-market samples

seq_data = pd.read_csv('./huanan-market-2024/metadata/Sequencing_run_info.tsv', sep="\t")
not_included = seq_data[(seq_data.Sample_Type == 'sars2_amplicon') | (seq_data.Sample_category == 'single_end_duplicate')].Run.to_list()

final = final[~final.Run.isin(not_included)]

final.new_category = pd.Categorical(final.new_category, ["Wildlife_stall_A_629", "Wildlife_stall_B_825", "Wildlife_stalls_Jan12",
                                                        'Non-wildlife_stalls_Jan1', 'All_stalls_AfterJan12', 'Drain'])
final = final.sort_values("new_category")

sample_id_to_date = dict(zip(d['Lab.code'], d['Sampling.date']))
final['Sampling_date'] = final['Lab code_x'].map(sample_id_to_date)

# print("Removed samples:")
# old_final[~old_final.Run.isin(list(final.Run))].groupby("Run").first()

In [4]:
import matplotlib.pyplot as plt

### Get SARS2 read info

sars2 = pd.read_csv('./huanan-market-2024/sars2_mappings/sars2_reads_post_trimming.tsv', sep="\t")
sars2.columns = ['Run','SARS2_read_count','SARS2_covered_bases']


# sars2.plot.scatter("SARS2_covered_bases","SARS2_read_count")
# plt.yscale("log")

# plt.show()

sars2_seqs = pd.merge(sars2, d[d.Sequencing_Run.isin(final.Run)][['Sample.ID', 'Lab.code','Sequencing_Run','SARS.CoV.2.qPCR.result']], left_on='Run', right_on='Sequencing_Run')

pcr = pd.read_csv('./huanan-market-2024/metadata/Liu_PCR_results.csv')

sars2_seqs = pd.merge(sars2_seqs, pcr, left_on='Sample.ID', right_on='Sample ID')

sars2_seqs = sars2_seqs[['Run','SARS2_read_count','SARS2_covered_bases','PCR','CT']]

final = pd.merge(final, sars2_seqs, on='Run', how='left')

final['SARS2_positive'] = 'Negative'
final.loc[(final.PCR == '+') | (final.SARS2_covered_bases > 0), 'SARS2_positive'] = 'Positive'

final['Contig'] = final['variable'].str.split(expand=True)[0]

In [5]:
species = pd.read_csv('./huanan-market-2024/mitochondrial_mappings/species_descriptions_with_common_name.csv')
species = species[['Contig','Species','Common_name', 'Group']]
species['Common_name'] = species['Common_name'].str.capitalize()
species['Common_and_scientific'] = species['Common_name'] + " (" + species['Species'] + ")"
species['is_mammal'] = False
species.loc[species.Group == 'Mammal','is_mammal'] = True

In [6]:
final = pd.merge(final, species, on='Contig')

final.columns = ['Sample lab code', 'Sequencing run', 'Sample category old', 'Stall', 'variable', 'Read counts',
                 'Lab code_y', 'Sample_category_y', 'Stall_corrected_y', 'MT Covered bases (bp)', 'Sample category',
                 'Sampling date','SARS2 reads count', 'SARS2 covered bases', 'SARS2 PCR', 'CT', 'SARS2 positivity',
                 'Contig', 'Species', 'Common name', 'Group', 'Common_and_scientific', 'is_mammal']
final = final.query("`Read counts` > 0")


In [9]:
import numpy as np
alt.data_transformers.enable("vegafusion")

facet_order = ["Wildlife_stall_A_629", "Wildlife_stall_B_825", "Wildlife_stalls_Jan12",
                                                        'Non-wildlife_stalls_Jan1', 'All_stalls_AfterJan12', 'Drain']


source = final
source['Log(MT Read count)'] = np.log10(source['Read counts'] + 1)

source = source.query("is_mammal == True")


#### Animal selection
options = list(source.Common_and_scientific.sort_values().unique())

input_dropdown = alt.binding_select(
    options=options + [None],
    labels=options + ['All'],
    name='Species: '
)
selection = alt.selection_point(
    fields=['Common_and_scientific'],
    bind=input_dropdown,
)


#### SARS2 selection
input_dropdown2 = alt.binding_radio(
    options=['Positive',None],
    labels=['Positive Only','All'],
    name='SARS2 detection status: '
)
selection2 = alt.selection_point(
    fields=['SARS2 positivity'],
    bind=input_dropdown2,
)

#### Sampling dates selection
options2 = list(final['Sampling date'].unique())

input_dropdown3 = alt.binding_radio(
    options=[None] + options2,
    labels=['All'] + options2 ,
    name='Sampling dates: ',
)

selection3 = alt.selection_point(
    fields=['Sampling date'],
    bind=input_dropdown3
)

alt.Chart(source).mark_circle(size=80).encode(
    x='MT Covered bases (bp)',
    y='Log(MT Read count)',
    color='Common name',
    tooltip=['Sample lab code', 'Sample category', 'Stall', 'Sequencing run', 'Common name', 'Species', 'Read counts', 'MT Covered bases (bp)', 'SARS2 reads count','SARS2 covered bases','SARS2 PCR']
).interactive().add_params(
    selection
).add_params(selection2).add_params(selection3).transform_filter(
    selection
).transform_filter(
    selection2).transform_filter(
    selection3).facet(
    facet=alt.Facet('Sample category', sort=facet_order),
    columns=3
).save("mammals.html")