## Setup (Django)

In [1]:
from helpers.setup import setup_django

In [2]:
setup_django()

  """)


## Setup

In [3]:
from pathlib import Path
from helpers.utility import *
from protein.models import ProteinGProtein
from seqsign.sequence_signature import SequenceSignature

## Supporting Datastores

In [4]:
rec_clas = get_receptor_classes()
rec_segs = get_receptor_segments()
gpr_clas = get_gprot_classes()
gpr_segs = get_gprot_segments()

In [5]:
gpr_clas2 = ProteinGProtein.objects.all().values_list('name', flat=True)
gpr_clas2 = [i.split(' ')[0] for i in gpr_clas2]

## Generating a table of all receptors and their coupling data (Guide To Pharmacology)

In [6]:
coupling_data = prepare_coupling_data_container()

In [7]:
coupling_data = fill_coupling_data_container(coupling_data)

In [8]:
coupling_data = process_coupling_data(coupling_data)

In [9]:
df = pd.DataFrame(coupling_data)
df.sort_values('key').head()

Unnamed: 0,G12/G13,Gi/Go,Gq/G11,Gs,coupling,gprot,key,rec_class,rec_obj
230,False,True,False,False,{'Gi/Go': 'primary'},[Gi/Go],5HT1A,Class A (Rhodopsin),5ht1a_human
209,False,True,False,False,{'Gi/Go': 'primary'},[Gi/Go],5HT1B,Class A (Rhodopsin),5ht1b_human
147,False,True,False,False,{'Gi/Go': 'primary'},[Gi/Go],5HT1D,Class A (Rhodopsin),5ht1d_human
18,False,True,False,False,{'Gi/Go': 'primary'},[Gi/Go],5HT1E,Class A (Rhodopsin),5ht1e_human
206,False,True,False,False,{'Gi/Go': 'primary'},[Gi/Go],5HT1F,Class A (Rhodopsin),5ht1f_human


## Calculating Sequence Signatures for all receptors and their coupling partners

In [10]:
path = '/protwis/sites/protwis/signprot/pickles/'

columns = [
    'rec_class',
    'gprot',
    'with',
    'wo',
    'file_with',
    'file_wo',
]
result_files = pd.DataFrame(columns=columns)

In [None]:
for rc in rec_clas:
    rc = str(rc)

    for gp in gpr_clas2:
        print(rc, '+', gp)
        
        data_with = df[
            (df[gp].astype(bool)) &
            (df['rec_class'] == rc)
        ]
        data_without = df[
            (df[gp].astype(bool) == False) &
            (df['rec_class'] == rc)
        ]
        
        with_set = data_with['rec_obj']
        wo_set = data_without['rec_obj']

        signature = SequenceSignature()
        signature.setup_alignments(rec_segs, with_set.tolist())
        signature.calculate_signature_onesided()

        file_with = Path(rc+'_'+gp+'_with.p')
        file_with = str(file_with).replace('/', '_').replace(' ', '_')
        pickle_signature({
            'type': 'with',
            'rec_class': rc,
            'gprot': gp,
            'signature': signature,
        }, path, file_with)

        
        signature = SequenceSignature()
        signature.setup_alignments(rec_segs, wo_set.tolist())
        signature.calculate_signature_onesided()

        file_wo = Path(rc+'_'+gp+'_wo.p')
        file_wo = str(file_wo).replace('/', '_').replace(' ', '_')
        pickle_signature({
            'type': 'without',
            'rec_class': rc,
            'gprot': gp,
            'signature': signature,
        }, path, file_wo)


        result_files.loc[len(result_files)] = [
            rc,
            gp,
            data_with.shape[0],
            data_without.shape[0],
            file_with,
            file_wo,
        ]

result_files.to_csv(path+'index.csv')

Class A (Rhodopsin) + Gi/Go
Class A (Rhodopsin) + Gq/G11
Class A (Rhodopsin) + Gs


### Data Analysis
## Comparing Receptor Sets

In [12]:
import itertools
result_file = Path(path + 'index.csv')
result_file = pd.DataFrame.from_csv(result_file)

  This is separate from the ipykernel package so we can avoid doing imports until


FileNotFoundError: File b'/protwis/sites/protwis/signprot/pickles/index.csv' does not exist

### Class A

#### Difference

Which entries are unique to each of these sets?
In other words: "Which entries are a unique type of interaction for that recptor class + signal protein combination in comparison to that receptor class versus all other possible signal proteins?"

In [18]:
obj = load_pickle_signature(path, result_file, 0, 0)
cons = calc_consensus_from_signature(obj)
df1 = aggregate_consensus_data(cons)
df1 = pd.DataFrame(df1)

In [19]:
obj = load_pickle_signature(path, result_file, 0, 1)
cons = calc_consensus_from_signature(obj)
df2 = aggregate_consensus_data(cons)
df2 = pd.DataFrame(df2)

In [31]:
drop_list = [
    'key',
    'score',
    'cons',
    'gprot',
    'rec_class'
]
df1 = df1.loc[df1['code'] != '-']
df2 = df2.loc[df2['code'] != '-']
comp = compare_sets(df1, df2, method=set.difference, drop_list=drop_list)

Dataframe description:


Unnamed: 0,code,feature,gn,length
count,337,337,337,337
unique,10,10,337,6
top,HY,Hydrophobic,3.52x52,any
freq,182,182,1,330




Dataframe size:
(337, 4)




Unnamed: 0,code,feature,gn,length
109,Hb,Hydrogen bonding (polar),C.01-C-term-0033,any
112,Hb,Hydrogen bonding (polar),C.01-C-term-0030,any
114,Hb,Hydrogen bonding (polar),C.01-C-term-0028,any
115,Hb,Hydrogen bonding (polar),C.01-C-term-0027,any
116,Hb,Hydrogen bonding (polar),C.01-C-term-0026,any


Dataframe description:


Unnamed: 0,code,feature,gn,length
count,332,332,332,332
unique,7,8,332,6
top,HY,Hydrophobic,3.52x52,any
freq,190,190,1,322




Dataframe size:
(332, 4)




Unnamed: 0,code,feature,gn,length
324,Hb,Hydrogen bonding (polar),C.01-C-term-0029,any
325,Hb,Hydrogen bonding (polar),C.01-C-term-0028,any
326,Hb,Hydrogen bonding (polar),C.01-C-term-0027,any
327,Hb,Hydrogen bonding (polar),C.01-C-term-0026,any
328,Hb,Hydrogen bonding (polar),C.01-C-term-0025,any


In [32]:
summarize_df(comp)

Dataframe description:


Unnamed: 0,code,feature,gn,length
count,60,60,60,60
unique,6,7,60,5
top,HY,Hydrophobic,1.35-36x36,any
freq,34,34,1,52




Dataframe size:
(60, 4)




Unnamed: 0,code,feature,gn,length
0,HY,Hydrophobic,7.32x31,any
1,Hb,Hydrogen bonding (polar),3.50x50,any
2,HY,Hydrophobic,2.66-67x66,any
3,Sm,Small,3.25x25,1-2
4,HY,Hydrophobic,3.37x37,any
