In [62]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from pprint import pprint


workdir = r"dataset/2024.3.16_TCR&mutation_3_Breast_cancer"
info = pd.read_excel(os.path.join(workdir, "SPRINTSEQ-primer-BZ02TNBC-20240316.xlsx"), sheet_name='for TCR', index_col=0).sort_index()

In [63]:
def trans(seq):
    translib = {"A": "T", "T": "A", "C": "G", "G": "C"}
    seq_minus = "".join(list(reversed([translib[i] for i in seq])))
    return seq_minus

In [68]:
from Bio.SeqUtils import MeltingTemp as mt
bds = []
for index, line in info.iterrows():
    bds_item = {}
    rbound = 20
    lbound = 20
    while True:
        bds_median = len(line['before_CDR3']) + len(line['CDR3_NT']) // 2
        binding_left = trans(line['TRBV_NT'][bds_median: bds_median + rbound])
        binding_right = trans(line['TRBV_NT'][bds_median - lbound: bds_median])
        Tm_left = mt.Tm_NN(binding_left, nn_table=mt.R_DNA_NN1)
        Tm_right = mt.Tm_NN(binding_right, nn_table=mt.R_DNA_NN1)
        if Tm_left < 45 or Tm_right > 70:
            rbound += 1
            lbound -= 1
        elif Tm_right < 45 or Tm_left > 70:
            lbound += 1
            rbound -= 1
        else: 
            break    
    bds_item['index'] = index
    bds_item['binding_left'] = binding_left
    bds_item['binding_right'] = binding_right
    bds_item['Tm_left'] = Tm_left
    bds_item['Tm_right'] = Tm_right
    bds_item['bds'] = binding_left + binding_right
    bds_item['seq'] = line['TRBV_NT']
    
    bds.append(bds_item)


In [69]:
df = pd.DataFrame(bds)
df.set_index('index', inplace=True)

In [70]:
df

Unnamed: 0_level_0,binding_left,binding_right,Tm_left,Tm_right,bds,seq
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
clonotype1_TRB,GCCGAAGTACTGAATGTTTTTGG,CTCCCCCCCTGCTGGCG,48.874727,68.419815,GCCGAAGTACTGAATGTTTTTGGCTCCCCCCCTGCTGGCG,AGAGGCCCCATCTCAGACCCGAGGCTAGCATGGGCTGCAGGCTGCT...
clonotype2_TRB,GTACTGCTCGTAGAGTCCGT,CCCGGGGGACCCCACTGGCA,56.115117,68.787202,GTACTGCTCGTAGAGTCCGTCCCGGGGGACCCCACTGGCA,GGAGGTGCGAATGACTCTGCTCTCTGTCCTGTCTCCTCATCTGCAA...
clonotype3_TRB,AAGAACCCGGGGCCCCCCGC,TAATCCGAGGCTGCTGGCGC,66.908797,57.899482,AAGAACCCGGGGCCCCCCGCTAATCCGAGGCTGCTGGCGC,GGGGAGAGCCCAGCACCTCGCCCAAAGGACTTGAGTCAGAGGCCCC...
clonotype4_TRB,AAGTACTGCTCGTTAGCTGT,CCCGGACATTCTACTGGCAC,50.272346,55.511935,AAGTACTGCTCGTTAGCTGTCCCGGACATTCTACTGGCAC,GGAGGTGCGAATGACTCTGCTCTCTGTCCTGTCTCCTCATCTGCAA...
clonotype5_TRB,AAAATACTGCGTATCTGCCC,CCCTTAAACTGCTGGCACAG,47.113452,51.824163,AAAATACTGCGTATCTGCCCCCCTTAAACTGCTGGCACAG,ACCTGGAGCCCCCAGAACTGGCAGACACCTGCCTGATGCTGCCATG...


In [None]:
df = pd.DataFrame(bds)
with pd.ExcelWriter(os.path.join(workdir, 'binding_site_revised.xlsx'), mode='a') as writer:
    df.to_excel(writer, sheet_name='TCR', index=False)  