In [1]:
import pandas as pd
import os
from Bio import SeqIO

In [2]:
# load binding data
binding_df = pd.read_csv("data/bli_binding_data.csv")
binding_df

Unnamed: 0,Include,Index,Color,Sensor Location,Sensor Type,Sensor Info,Replicate Group,Baseline Loc.,Assoc. (Sample) Loc.,Sample ID,...,kobs(1/s),Req,Req/Rmax(%),Full X^2,Full R^2,SSG KD,SSG Rmax,SSG R^2,Unnamed: 32,sample_id
0,x,0,-1264826,A3,SA (Streptavidin),,,A2,A3,C2,...,2.9800,0.1179,97.2,0.0646,0.9686,1.400000e-07,0.1251,0.9993,,IL7Ra_binder_design_30
1,x,1,-1264826,B3,SA (Streptavidin),,,B2,B3,C2,...,0.6626,0.1060,87.4,0.0646,0.9686,1.400000e-07,0.1251,0.9993,,IL7Ra_binder_design_30
2,x,2,-1264826,C3,SA (Streptavidin),,,C2,C3,C2,...,0.1991,0.0706,58.2,0.0646,0.9686,1.400000e-07,0.1251,0.9993,,IL7Ra_binder_design_30
3,x,3,-1264826,D3,SA (Streptavidin),,,D2,D3,C2,...,0.1064,0.0264,21.8,0.0646,0.9686,1.400000e-07,0.1251,0.9993,,IL7Ra_binder_design_30
4,x,4,-363184,E3,SA (Streptavidin),,,E2,E3,D2,...,0.7506,0.1185,69.6,0.1456,0.8841,1.900000e-06,0.1710,0.9927,,IL7Ra_binder_design_1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,x,75,-14513767,D12,SA (Streptavidin),,,D2,D12,D12,...,0.1672,0.0209,15.8,0.0697,0.9701,1.900000e-07,0.1366,0.9962,,IL7Ra_binder_design_88
76,x,76,-3221939,E12,SA (Streptavidin),,,E2,E12,F12,...,2.8690,0.0962,77.2,0.0605,0.9323,1.200000e-06,0.1257,0.9246,,IL7Ra_binder_design_17
77,x,77,-3221939,F12,SA (Streptavidin),,,F2,F12,F12,...,1.0980,0.0503,40.3,0.0605,0.9323,1.200000e-06,0.1257,0.9246,,IL7Ra_binder_design_17
78,x,78,-3221939,G12,SA (Streptavidin),,,G2,G12,F12,...,0.7437,0.0148,11.9,0.0605,0.9323,1.200000e-06,0.1257,0.9246,,IL7Ra_binder_design_17


In [3]:
# load screening results
screen_df = pd.read_csv("data/IL7Ra_binders_screening_results.csv", index_col=0)
screen_df

Unnamed: 0,RFdiffusion_output,AF2_prediction,Noise_level,Fold_conditioned,FastRelax,Monomeric,Binder
0,IL7Ra_binder_design_1,IL7Ra_binder_AF2_1,0.5,False,False,True,True
1,IL7Ra_binder_design_2,IL7Ra_binder_AF2_2,0.0,True,False,True,True
2,IL7Ra_binder_design_3,IL7Ra_binder_AF2_3,0.0,False,False,True,False
3,IL7Ra_binder_design_4,IL7Ra_binder_AF2_4,0.0,False,False,True,False
4,IL7Ra_binder_design_5,IL7Ra_binder_AF2_5,0.0,False,False,True,False
...,...,...,...,...,...,...,...
90,IL7Ra_binder_design_91,IL7Ra_binder_AF2_91,0.0,False,False,False,False
91,IL7Ra_binder_design_92,IL7Ra_binder_AF2_92,0.0,False,False,True,True
92,IL7Ra_binder_design_93,IL7Ra_binder_AF2_93,0.5,False,False,True,False
93,IL7Ra_binder_design_94,IL7Ra_binder_AF2_94,0.5,True,False,False,False


In [4]:
# load protein sequences
records = list(SeqIO.parse("data/IL7Ra_binders_sequences.fasta", "fasta"))
fasta_df = pd.DataFrame([(record.id, str(record.seq)) for record in records], columns=["AF2_prediction", "sequence"])


In [5]:
screen_df = screen_df.merge(right=fasta_df, on="AF2_prediction", how="left" )

In [6]:
# load kinetic results
kinetic_df = pd.read_csv("data/kinetic_measurement_summary.csv")
kinetic_df

Unnamed: 0,Name,Replicate,Loading,KD (M),Kon (1/Ms),Koff (1/s),Binder
0,Blank,1,0.01,,,,none
1,Blank,2,0.0,,,,none
2,Blank,3,0.01,,,,none
3,Blank,4,0.01,,,,none
4,IL7Ra_binder_design_1,1,4.4,2.15e-05,,,weak
5,IL7Ra_binder_design_10,1,3.37,,,,none
6,IL7Ra_binder_design_11,1,13.5,,,,none
7,IL7Ra_binder_design_12,1,5.78,6.40e-06,,,strong
8,IL7Ra_binder_design_13,1,6.47,,,,none
9,IL7Ra_binder_design_14,1,6.04,> 1e-6,,0.00457,weak


In [7]:
# merge screening results and kinetic results
data = screen_df.merge(right=kinetic_df, left_on="RFdiffusion_output", right_on = "Name", how="left", suffixes=["_bool", "_class"])

In [None]:
data.rename(columns={"RFdiffusion_output": "Design"})

In [8]:
data[["Binder_bool", "Binder_class"]]

Unnamed: 0,Binder_bool,Binder_class
0,True,weak
1,True,weak
2,False,none
3,False,none
4,False,weak
...,...,...
90,False,
91,True,strong
92,False,
93,False,


In [10]:
data.to_csv('gs://polaris-public/polaris-recipes/org-AdaptyvBio/raw/IL7Ra_binders_data.csv', index=False)