# Load Modules

In [2]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

from plotly.subplots import make_subplots
from multiprocessing import Pool
from tqdm import tqdm
import os

pio.templates.default = 'plotly_white'
pd.options.mode.chained_assignment = None

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Define Paths

In [35]:
# AVARDA Results Directory (from RunAVARDA)
fn_dir = "../results/AVARDA_z10_c8/"
fn_avarda = os.path.join(fn_dir, "26-August-2020_07-51/AVARDA_nonoverlapped_cluster_counts.csv")

# cohort meta
fn_cohort = "../data/meta/cohort_meta/LinkedPrism1Meta.tab"

# random effects
fn_re = "../src/immunity/random_effects.csv"

# Load Data

## Cohort Meta

In [22]:
cohort_meta = pd.read_csv(
    fn_cohort, sep="\t"
)
cohort_meta

Unnamed: 0,Barcode,id,date,age,gender,tsi,inc_yr,patent,malariacat,agecat,siteid,inf_type,positive
0,CK3-8DFD,2424,2014-09-25,2.391781,Female,467,0.594870,0,Blood smear negative / LAMP negative,6 months - < 5 years,Kanungu,malaria,0
1,CK3-ESLU,2466,2016-05-02,3.452055,Female,61,0.974000,0,Blood smear negative / LAMP negative,6 months - < 5 years,Kanungu,malaria,0
2,CK3-92DV,2110,2014-10-28,3.997260,Male,167,0.602723,0,Blood smear negative / LAMP negative,6 months - < 5 years,Kanungu,malaria,0
3,CK3-6EZE,2302,2013-08-14,2.898630,Male,61,2.132402,0,Blood smear negative / LAMP negative,6 months - < 5 years,Kanungu,malaria,0
4,CK3-CB2J,2419,2015-05-11,3.128767,Male,95,1.091928,0,Blood smear negative / LAMP negative,6 months - < 5 years,Kanungu,malaria,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,CT3-55PN,3342,2012-09-10,7.019178,Male,0,1.043571,1,Blood smear positive / no malaria,5 years - < 11 years,Tororo,patent,1
196,CT3-559K,3352,2012-09-10,6.323288,Male,0,0.573391,0,Blood smear negative / LAMP positive,5 years - < 11 years,Tororo,subpatent,1
197,CT3-5646,3355,2012-09-10,27.950686,Female,0,1.518260,0,Blood smear negative / LAMP positive,>= 18 years,Tororo,subpatent,1
198,CT3-54VE,3356,2012-09-10,7.361644,Male,83,3.687203,0,Blood smear negative / LAMP negative,5 years - < 11 years,Tororo,malaria,0


## AB Breadth

In [24]:
av_frame = pd.read_csv(
    fn_avarda, header=None, 
    names = ['Barcode', 'NumHits', "Breadth"]
)

## Immunity Random Effects

In [58]:
re_frame = pd.read_csv(
    fn_re
)
re_frame.rename(columns = {'uid_f' : 'id'}, inplace=True)
re_frame = re_frame[
    ['id', 'antidisease_re', 'antimalaria_re', 'antiparasite_re', 'eir_geom3']
].drop_duplicates()
re_frame

Unnamed: 0,id,antidisease_re,antimalaria_re,antiparasite_re,eir_geom3
0,1006,-0.058891,-0.504627,0.075845,1.761222
2,1007,0.051929,0.451152,0.025785,1.761222
5,1011,-0.061241,0.003746,-0.119928,2.023615
8,1015,-0.059114,-0.064430,0.026648,1.881411
10,1019,-0.005443,0.064916,0.156829,3.029007
...,...,...,...,...,...
5632,3448,-0.040425,0.071614,-0.295772,129.909529
5633,3449,0.041075,-0.084333,-0.290134,19.130896
5635,3450,-0.047503,-0.350492,-0.039380,19.130896
5637,3451,-0.002361,-0.580846,-0.326088,19.130896


## Merge DataFrames

In [59]:
frame = pd.merge(av_frame, cohort_meta, how = 'inner').\
    merge(re_frame, how = 'inner')
frame

Unnamed: 0,Barcode,NumHits,Breadth,id,date,age,gender,tsi,inc_yr,patent,malariacat,agecat,siteid,inf_type,positive,antidisease_re,antimalaria_re,antiparasite_re,eir_geom3
0,CT3-3MMT,470,66,3030,2012-08-21,10.191781,Male,45,2.289969,0,Blood smear negative / LAMP negative,5 years - < 11 years,Tororo,patent,0,0.161190,-0.291768,-0.189064,49.350493
1,CT3-3MXN,511,94,3033,2012-08-21,3.243836,Male,0,4.328584,1,Symptomatic malaria,6 months - < 5 years,Tororo,malaria,1,-0.144957,-0.013771,-0.223846,49.350493
2,CT3-3NJM,557,114,3035,2012-08-21,9.019178,Male,45,3.652500,0,Blood smear negative / LAMP negative,5 years - < 11 years,Tororo,subpatent,0,-0.061873,-0.016523,-0.171801,15.067760
3,CT3-47EL,186,47,3076,2012-08-22,9.816439,Female,46,4.019257,0,Blood smear negative / LAMP negative,5 years - < 11 years,Tororo,subpatent,0,0.006699,0.666147,-0.081461,31.960381
4,CT3-47N4,348,78,3038,2012-08-22,6.509589,Male,46,2.716390,0,Blood smear negative / LAMP negative,5 years - < 11 years,Tororo,subpatent,0,0.055309,-0.210962,0.006832,36.726134
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,CT3-58H5,579,81,3233,2012-09-04,7.468493,Male,0,0.800987,0,Blood smear negative / LAMP positive,5 years - < 11 years,Tororo,subpatent,1,-0.016399,-0.195580,-0.038694,180.993691
128,CT3-59UV,296,60,3137,2012-08-27,5.241096,Female,0,2.408242,1,Blood smear positive / no malaria,5 years - < 11 years,Tororo,patent,1,-0.078050,-0.320054,-0.234903,92.087564
129,CT3-5A9T,154,39,3138,2012-08-27,2.827397,Female,0,3.210989,0,Blood smear negative / LAMP positive,6 months - < 5 years,Tororo,subpatent,1,0.063335,0.110423,0.127191,92.087564
130,CK3-APTQ,15,14,2359,2015-02-17,4.265753,Male,0,1.216486,0,Blood smear negative / LAMP positive,6 months - < 5 years,Kanungu,subpatent,1,-0.034217,-0.169394,-0.188437,10.012314


# Plot Antibody Breadth

## Correlation with Number of Enriched Peptides

In [60]:
px.scatter(
    frame, x = 'NumHits', y = "Breadth",
    color = "siteid", 
    marginal_x = "box",
    marginal_y = "box"
)

## Correlation with Random Effects

In [65]:
def plot_corr_re(frame, re_name = 'antidisease_re'):
    fig = px.scatter(
        frame, x = re_name, y = 'Breadth',
        color = 'siteid', trendline = 'ols'
    )
    return fig

### Antidisease

In [67]:
plot_corr_re(frame, 'antidisease_re')