# Covid Escape Data Analysis

## Imports

In [None]:
import pandas as pd
import plotly.express as px

## Reading data

In [None]:
files = [
  'data/AZ_cocktail_raw_data.txt',
  'data/Ellebedy_invivo_raw_data.txt',
  'data/MAP_paper_antibodies_raw_data.txt',
  'data/REGN_and_LY-CoV016_raw_data.txt',
  'data/human_sera_raw_data.txt',
]

dfs = dict()
for filepath in files:
  dfs[filepath] = pd.read_csv(filepath)

## Stats by file

`mut_escape` value stats:

In [None]:
for df_name, df in dfs.items():
  print(df_name + '\n')
  print(df['mut_escape'].describe())
  print('\n')

and corresponding histograms:

In [None]:
for df in dfs.values():
  fig = px.histogram(df, x="mut_escape", nbins=100)
  fig.show()

## Stats by condition

Obtaining a list of conditions:

In [None]:
conditions = []
for df in dfs.values():
  conditions.extend(df.condition.unique())

conditions = list(set(conditions))
conditions = sorted(conditions)

print('Conditions:\n\t{}'.format('\n\t'.join(conditions)))
print()
print('Number of conditions: {}'.format(len(conditions)))

Splitting all the data by the condition type:

In [None]:
df_by_condition = dict()

for condition in conditions:
  current_df = pd.DataFrame()
  
  for df in dfs.values():
    current_df = current_df.append(df[df['condition'] == condition], ignore_index=True)
  
  df_by_condition[condition] = current_df

Plot histogram of `mut_escape` for each condition type:

In [None]:
for condition, df in df_by_condition.items():
  fig = px.histogram(df, x="mut_escape", nbins=100, title=condition)
  fig.show()