# Add predicted counts to clade-wise dataframe

## Snakemake inputs

In [None]:
counts_df_csv = snakemake.input.counts_df
pre_omicron = snakemake.input.pre_omicron
omicron = snakemake.input.omicron
pred_count_csv = snakemake.output.pred_count_csv

## Import packages

In [1]:
import pandas as pd
import sys
import os

In [2]:
# Adding module folder to system paths
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [3]:
from modules import load
from modules import rates

## Load dataframes

In [4]:
counts_by_clade = pd.read_csv(counts_df_csv, low_memory=False)

In [5]:
counts_by_clade.head()

Unnamed: 0,clade,nt_site,nt_mutation,exclude,masked_in_usher,expected_count,actual_count,clade_founder_nt,gene,clade_founder_codon,...,mut_type,mut_class,pre_omicron_or_omicron,nt_site_before_boundary,ss_prediction,unpaired,motif,ref_motif,predicted_count,tau_squared
0,20A,1,A1C,True,True,0.96873,0,A,noncoding,noncoding,...,AC,noncoding,pre_omicron,False,unpaired,1,AAT,,1.738421,0.433274
1,20A,1,A1G,True,True,3.6091,0,A,noncoding,noncoding,...,AG,noncoding,pre_omicron,False,unpaired,1,AAT,,6.308562,0.736113
2,20A,1,A1T,True,True,1.2782,0,A,noncoding,noncoding,...,AT,noncoding,pre_omicron,True,unpaired,1,AAT,,0.629081,0.635531
3,20A,2,T2A,True,True,0.90342,0,T,noncoding,noncoding,...,TA,noncoding,pre_omicron,False,unpaired,1,ATT,ATT,0.20519,0.520788
4,20A,2,T2C,True,True,3.5889,0,T,noncoding,noncoding,...,TC,noncoding,pre_omicron,False,unpaired,1,ATT,ATT,6.277356,0.719985


In [6]:
counts_pre_om = load.load_synonymous_muts(pre_omicron)

In [7]:
counts_om = load.load_synonymous_muts(omicron)

## Populate with predicted counts

In [8]:
rates.add_predicted_count_all_clades(counts_pre_om, counts_om, counts_by_clade)

In [9]:
counts_by_clade.tail()

Unnamed: 0,clade,nt_site,nt_mutation,exclude,masked_in_usher,expected_count,actual_count,clade_founder_nt,gene,clade_founder_codon,...,mut_type,mut_class,pre_omicron_or_omicron,nt_site_before_boundary,ss_prediction,unpaired,motif,ref_motif,predicted_count,tau_squared
2422138,23I,29902,A29902G,True,True,3.6645,0,A,noncoding,noncoding,...,AG,noncoding,omicron,False,nd,0,AAA,AAA,2.526564,0.279517
2422139,23I,29902,A29902T,True,True,1.2852,0,A,noncoding,noncoding,...,AT,noncoding,omicron,False,nd,0,AAA,AAA,1.42171,0.75194
2422140,23I,29903,A29903C,True,True,0.4188,0,A,noncoding,noncoding,...,AC,noncoding,omicron,False,nd,0,AAA,,0.559207,0.642931
2422141,23I,29903,A29903G,True,True,3.6645,0,A,noncoding,noncoding,...,AG,noncoding,omicron,False,nd,0,AAA,,2.526564,0.279517
2422142,23I,29903,A29903T,True,True,1.2852,0,A,noncoding,noncoding,...,AT,noncoding,omicron,False,nd,0,AAA,,1.42171,0.75194


In [10]:
counts_by_clade.to_csv(pred_count_csv, index=False)