In [1]:
import pandas as pd
import numpy as np
import re
from ast import literal_eval
import itertools

In [2]:
import os
import sys
module_path = os.path.abspath(os.path.join('../scripts/'))
if module_path not in sys.path:
    sys.path.append(module_path) #"/Volumes/tuba/herpov/tcr-pmhc-sc-project/scripts/"
from F_imputations import Impute

In [3]:
def cdr3_lst_converter(x):
    #define format of datetime
    return x.replace("[","").replace("]","").replace("'","").split(" ")

def peptide_hla_converter(x):
    return re.findall("\w+\s{1}\w{1}\d+", x.replace("[","").replace("]","").replace("'",""))

def literal_converter(val):
    try:
        return literal_eval(val)
    except SyntaxError:
        return np.nan
    except ValueError:
        return np.nan

converters={'peptide_HLA_lst': peptide_hla_converter, 'cdr3_lst_TRA': cdr3_lst_converter, 'cdr3_lst_TRB': cdr3_lst_converter, 'umi_count_lst_mhc': literal_eval, 'umi_count_lst_TRA': literal_converter, 'umi_count_lst_TRB': literal_converter} #

In [4]:
def calc_binding_concordance(df, clonotype_fmt):
    #assert df.size > 0, "df empty"
    gems_per_specificity = df.groupby([clonotype_fmt,'peptide']).gem.count().to_dict()
    df['gems_per_specificity'] = df.set_index([clonotype_fmt,'peptide']).index.map(gems_per_specificity)

    gems_per_clonotype = df.groupby([clonotype_fmt]).gem.count().to_dict()
    df['gems_per_clonotype'] = df[clonotype_fmt].map(gems_per_clonotype)
    
    df['binding_concordance'] = df.gems_per_specificity / df.gems_per_clonotype

In [5]:
EXP = "exp3"

In [6]:
library = "/Volumes/tuba/herpov/tcr-pmhc-sc-project/data/exp3_TCR/library/CDR3_beta1_29_20.xlsx"

In [7]:
TCR_BARCODE = "/Volumes/tuba/herpov/tcr-pmhc-sc-project/data/" + EXP + "_CAT_IONTORRENT_KMA_AKB/tables/tcr_barcode.cleaned.csv"
ORIGINAL    = "/Volumes/tuba/herpov/tcr-pmhc-sc-project/data/" + EXP + "_TCR/processed/cellranger_out/TCR_VDJ/outs/all_contig_annotations.csv"
SIM_TRA     = "/Volumes/tuba/herpov/tcr-pmhc-sc-project/kernel_similarity_tra.{}.tab".format(EXP)
SIM_TRB     = "/Volumes/tuba/herpov/tcr-pmhc-sc-project/kernel_similarity_trb.{}.tab".format(EXP)

In [8]:
df = pd.read_csv(TCR_BARCODE, converters=converters)
#calc_binding_concordance(df, 'ct')
#df['chain_count_TRA'] = df.apply(lambda x: len(x.cdr3_lst_TRA) if x.cdr3_lst_TRA[0] != '' else 0, axis=1)
#df['chain_count_TRB'] = df.apply(lambda x: len(x.cdr3_lst_TRB) if x.cdr3_lst_TRB[0] != '' else 0, axis=1)

In [9]:
sim_tra = pd.read_csv(SIM_TRA, index_col=0)
sim_trb = pd.read_csv(SIM_TRB, index_col=0)

In [10]:
df = Impute.prepare_dataframe(df)

In [11]:
inst = Impute(df)

In [12]:
inst.get_reference()

In [13]:
inst.get_hit('TRA_NA')
inst.get_hit('TRB_NA')
inst.get_hit('TRA')
inst.get_hit('TRB')
inst.get_hit('PEP')

In [14]:
inst.modify()

In [15]:
inst.df

Unnamed: 0,gem,peptide,epitope,cdr3_TRA,cdr3_TRB,peptide_HLA,umi_count_TRA,umi_count_TRB,umi_count_mhc,ct
0,AAACCTGAGGTCATCT-1,YSEHPTFTSQY,v9,CAVRSAYSGAGSYQLTF,unknown,YSEHPTFTSQY A0101,1.0,,1.0,42
1,AAACCTGAGTTCGATC-1,YSEHPTFTSQY,v9,CALNTGGFKTIF,CASSPPFLAGSGSSYEQYF,YSEHPTFTSQY A0101,4.0,5.0,2.0,35
2,AAACCTGCAGTAACGG-1,VTEHDTLLY,v15,CAVNIEGQKLLF,unknown,VTEHDTLLY A0101,1.0,,1.0,129
3,AAACCTGGTCTTGTCC-1,IPSINVHHY,v35,CAAKSDSGGGADGLTF,CASSAWTSNRDEQFF,IPSINVHHY B3501,3.0,5.0,1.0,1
4,AAACCTGTCCATGAGT-1,VTEHDTLLY,v15,CAAGGGGNKLTF,CASSWRGSSSYEQYF,VTEHDTLLY A0101,2.0,4.0,1.0,45
...,...,...,...,...,...,...,...,...,...,...
3592,TTTGCGCTCAGCACAT-1,SLAAYIPRL,CLYBL,unknown,CASSPRVYGELFF,SLAAYIPRL A0201,,1.0,3.0,27
3593,TTTGCGCTCATCGGAT-1,QIDVSQFGSY,134 (I-A22)neo26,unknown,CASSLLGTSGTGNEQFF,QIDVSQFGSY A0101,,1.0,1.0,9
3594,TTTGGTTCAGATAATG-1,YSEHPTFTSQY,v9,unknown,CSGIVDYGYTF,YSEHPTFTSQY A0101,,1.0,1.0,163
3595,TTTGGTTGTTCCGGCA-1,RAKFKQLL,v17,unknown,CATSGESGGLKGYNEQFF,RAKFKQLL B0801,,1.0,1.0,571


In [16]:
inst.stats

Unnamed: 0,ref,TRA_NA,TRB_NA,TRA,TRB,PEP
potential,551.0,573.0,145.0,129.0,116.0,717.0
imputations,551.0,61.0,11.0,11.0,12.0,219.0
percent,100.0,10.65,7.59,8.53,10.34,30.54
corrected,0.0,61.0,11.0,7.0,1.0,21.0


In [27]:
def lol():
    print('hello')

In [38]:
class Employee:
    
    num_of_emps = 0
    raise_amount = 1.04
    empls = list()
    
    def __init__(self, first, last, pay):
        self.first = first
        self.last = last
        self.pay = pay
        self.email = '{}.{}@company.com'.format(first, last)
        self.empls.append(first)
        
        Employee.num_of_emps += 1
        
    def fullname(self):
        return '{} {}'.format(self.first, self.last)
    
    def test(self, txt):
        print(txt)
    
    def testme(self):
        return self.test

In [39]:
inst = Employee('Helle', 'Povlsen', 10000000000)

In [40]:
a = inst.testme()

In [41]:
a('lol')

lol


In [83]:
class Developer(Employee):
    pass

In [48]:
class Manager(Employee):

    def __init__(self, first, last, pay, employees=None):
        super().__init__(first, last, pay)
        if employees is None:
            self.employees = []
        else:
            self.employees = employees

    def add_emp(self, emp):
        if emp not in self.employees:
            self.employees.append(emp)
            
    def print_emps(self):
        lol = list()
        for emp in self.employees:
            lol.append(emp)
        return emp

In [84]:
Employee.num_of_emps

0

In [85]:
Employee.empls

[]

In [86]:
Employee('Helle', 'Povlsen', 10000000000)

<__main__.Employee at 0x104b67310>

In [87]:
print(Employee.num_of_emps)

1


In [88]:
Employee.empls

['Helle']

In [89]:
Developer('Lola', 'Povlsen', 10000000000)

<__main__.Developer at 0x104b6b1d0>

In [90]:
print(Employee.num_of_emps)

2


In [93]:
Employee.empls

['Helle', 'Lola', 'Corey', 'Test']

In [92]:
dev_1 = Employee('Corey', 'Schafer', 50000)
dev_2 = Employee('Test', 'Employee', 60000)

mgr_1 = Manager('Sue', 'Smith', 90000, [dev_1])

In [55]:
mgr_1.print_emps()

<__main__.Employee at 0x104c5c110>

In [51]:
mgr_1.fullname()

'Sue Smith'

In [108]:
class Person:
    all_names = []
    num_persons = 0

    def __init__(self, name):
        self.name = name
        Person.all_names.append(name)
        Person.num_persons += 1

joe = Person('Joe')
bob = Person('Bob')
bob = Person('Jan')
Person.peptide = 'al'
print(Person.all_names)
print(Person.num_persons)
print(Person.peptide)
## ['Joe', 'Bob']

['Joe', 'Bob', 'Jan']
3
al


In [117]:
class Tester:
    def __init__(self):
        self.cdr3_TRAs = glob_var1
        self.cdr3_TRBs = glob_var2

    def print_smth(self):
        print('can I print a global variable?', glob_var)

In [118]:
glob_var = 2
glob_var1 = [1,2,3]
glob_var2 = [4,5,6]

In [119]:
job1 = Tester()

In [120]:
job1.print_smth()

can I print a global variable? 2


In [121]:
job1.cdr3_TRAs

[1, 2, 3]

In [101]:
class A:
    class_var1 = set()
    
    def __init__(self):
        self.a = 1
    
    def setb(self, var):
        A.b = var
        
    @classmethod
    def setc(cls, var):
        cls.c = var
        
    @staticmethod
    def setd(var):
        return var*2
    
    @property
    def evar(self):
        A.class_var1.update([2])
        return 3
    
    def setf(self):
        self.f = self.evar + 1

In [102]:
d = A()

In [106]:
A.class_var1

{2}

In [97]:
d.setf()

In [98]:
d.f

4

In [105]:
d.evar

3

In [74]:
d.a

1

In [76]:
d.setb(3)

In [77]:
A.b

3

In [67]:
d.b

3

In [68]:
A.static_elem = 'lol'

In [71]:
A.static_elem

'lol'

In [79]:
A.setc(4)

In [80]:
A.c

4

In [82]:
A.setd(4)

8

In [85]:
A.setb(d, 2)

In [100]:
a = set()
a.update([2])
a

{2}