# Notebook for defining predicted counts master tables

## Import packages

In [1]:
import numpy as np
import pandas as pd
import sys
import os

In [2]:
# Adding module folder to system paths
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [3]:
from modules import rates
from modules import load

## Load training dataframes

In [4]:
counts_pre_omicron = load.load_synonymous_muts('../results/curated/curated_mut_counts_pre_omicron.csv')

In [5]:
counts_omicron = load.load_synonymous_muts('../results/curated/curated_mut_counts_omicron.csv')

## Initialize rates objects

In [6]:
rate_pre_om = rates.Rates()
rate_om = rates.Rates()

## Populate rates and add predicted counts

In [7]:
rate_pre_om.populate_rates(counts_pre_omicron)
rate_om.populate_rates(counts_omicron)

In [8]:
rate_pre_om.predicted_counts(counts_pre_omicron)
rate_om.predicted_counts(counts_omicron)

In [9]:
rate_pre_om.rates.head()

Unnamed: 0,mut_type,motif,unpaired,nt_site_before_boundary,rate,condition,predicted_count,cond_count
0,AC,AAA,0,False,0.110585,"(AC, AAA, 0, False)",13.247765,0
1,AC,AAA,1,False,0.128033,"(AC, AAA, 1, False)",15.33798,0
2,AC,AAC,0,False,0.09745,"(AC, AAC, 0, False)",11.674235,0
3,AC,AAC,1,False,0.112919,"(AC, AAC, 1, False)",13.527447,0
4,AC,AAG,0,False,0.189517,"(AC, AAG, 0, False)",22.703676,21


In [10]:
rate_om.rates.head()

Unnamed: 0,mut_type,motif,unpaired,nt_site_before_boundary,rate,condition,predicted_count,cond_count
0,AC,AAA,0,False,0.084546,"(AC, AAA, 0, False)",13.947653,0
1,AC,AAA,1,False,0.09878,"(AC, AAA, 1, False)",16.29593,0
2,AC,AAC,0,False,0.072427,"(AC, AAC, 0, False)",11.948311,0
3,AC,AAC,1,False,0.084707,"(AC, AAC, 1, False)",13.974226,0
4,AC,AAG,0,False,0.145828,"(AC, AAG, 0, False)",24.057343,21


## Formatting master tables

### Adding lightswitch boundaries

In [11]:
rate_pre_om.rates['nt_site_boundary'] = np.zeros(rate_pre_om.rates.shape[0], int)
rate_om.rates['nt_site_boundary'] = np.zeros(rate_om.rates.shape[0], int)

In [12]:
rate_pre_om.rates.loc[rate_pre_om.rates.mut_type == 'CT', 'nt_site_boundary'] = int(13467)
rate_pre_om.rates.loc[(rate_pre_om.rates.mut_type == 'AT') | (rate_pre_om.rates.mut_type == 'GC') | (rate_pre_om.rates.mut_type == 'CG'), 'nt_site_boundary'] = int(21562)

In [13]:
rate_om.rates.loc[rate_om.rates.mut_type == 'CT', 'nt_site_boundary'] = int(13467)
rate_om.rates.loc[(rate_om.rates.mut_type == 'AT') | (rate_om.rates.mut_type == 'GC') | (rate_om.rates.mut_type == 'CG'), 'nt_site_boundary'] = int(21562)

## Save master tables

In [14]:
cols = ['mut_type', 'motif', 'unpaired', 'nt_site_boundary', 'nt_site_before_boundary', 'rate', 'predicted_count']

In [15]:
rate_pre_om.rates[cols].head()

Unnamed: 0,mut_type,motif,unpaired,nt_site_boundary,nt_site_before_boundary,rate,predicted_count
0,AC,AAA,0,0,False,0.110585,13.247765
1,AC,AAA,1,0,False,0.128033,15.33798
2,AC,AAC,0,0,False,0.09745,11.674235
3,AC,AAC,1,0,False,0.112919,13.527447
4,AC,AAG,0,0,False,0.189517,22.703676


In [16]:
rate_pre_om.rates.drop(columns=['condition'], inplace=True)
rate_pre_om.rates[cols].to_csv('../results/master_tables/master_table_pre_omicron.csv', index=False)

In [17]:
rate_om.rates.drop(columns=['condition'], inplace=True)
rate_om.rates[cols].to_csv('../results/master_tables/master_table_omicron.csv', index=False)