##### Setup

This boilerplate enables the use of django models in the notebook.

In [1]:
import os, sys
PWD = os.getenv('PWD')
os.chdir(PWD)
sys.path.insert(0, os.getenv('PWD'))
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "protwis.settings")
import django
django.setup()

  """)


In [2]:
from tqdm import tqdm
from contactnetwork.models import *
from protein.models import ProteinFamily


class InteractionStatistics:
    
    def __init__(self, prot_class=None, debug=False):
        self.debug = debug
        self.prot_class = prot_class
        self.interactions = None
        self.parsed_interactions = []
    
    def set_class(self, choice=-1):
        fam_objects = ProteinFamily.objects.filter(parent_id=1)
        prot_classes = [i.name for i in fam_objects]
        if not choice >= 0:
            for index, name in enumerate(prot_classes):
                print('[{}]: {}'.format(index, name))
            choice = input('Pick a class (number): ')
        self.prot_class = fam_objects[int(choice)]
        
    def get_interactions(self):
        x = self.prot_class.id
        self.interactions = Interaction.objects.filter(
                interacting_pair__referenced_structure__protein_conformation__protein__parent__family__parent__parent__parent__id=x,
                interacting_pair__referenced_structure__state__id=3  #active
            ).select_related('interacting_pair')
        self._parse_interactions()
        return 'Interactions queried and parsed'
    
    def _parse_interactions(self):
        for a in tqdm(list(self.interactions)):
            d = {}
            d['res1'] = {}
            d['res2'] = {}
            d['class'] = self.prot_class.name
            d['int_ty'] = a.interaction_type
            d['int_ty_spe'] = a.specific_type
            d['int_ty_lev'] = a.interaction_level
            d['res1']['aa'] = a.interacting_pair.res1.amino_acid
            d['res1']['gn'] = a.interacting_pair.res1.display_generic_number.label
            d['res2']['aa'] = a.interacting_pair.res1.amino_acid
            d['res2']['gn'] = a.interacting_pair.res2.display_generic_number.label
            self.parsed_interactions.append(d)
            if self.debug and len(self.parsed_interactions) > 1000:
                break

    def _check_parsed(self):
        if not self.parsed_interactions:
            print('Interactions have not been parsed yet, run _parse_interactions')
            return False
        else:
            return True
        
    
    def summarize_interaction_types(self):
        '''Print the total number of occurence for each interaction type'''
        d = {}
        if self._check_parsed():
            for i in self.parsed_interactions:
                d.setdefault(i['int_ty'], 0)
                d[i['int_ty']] += 1
            return d

    def count_interaction_types(self, which_res='res1'):
        '''Count the interaction types per each generic number'''
        d = {}
        if self._check_parsed():
            for i in self.parsed_interactions:
                int_ty = i['int_ty']
                gn = i[which_res]['gn']
                d.setdefault(gn, {})
                d[gn].setdefault(int_ty, 0)
                d[gn][int_ty] += 1
            return d

    def calc_frequencies(self, which_res='res1'):
        '''Calculate the frequency of each interaction type per generic number'''
        d = self.count_interaction_types(which_res)
        for key in d:
            num_int_ty = sum(d[key].values())
            for int_ty in d[key]:
                d[key][int_ty] = d[key][int_ty] / num_int_ty
        return d

### Accessing Interactions
## Class A

Proof of concept for when the signal protein residues are added to the database.
For now this only gets the interactions that are used in the contact network.

In [3]:
stats_a = InteractionStatistics(debug=True)
stats_a.set_class(0)

In [4]:
stats_a.get_interactions()
frq_a = stats_a.calc_frequencies()

  3%|▎         | 997/36909 [00:35<05:19, 112.29it/s]

In [5]:
import plotly.offline as py
import cufflinks as cf
import pandas as pd

py.offline.init_notebook_mode(connected=True)
cf.set_config_file(offline=True)

df = pd.DataFrame(frq_a).T
df.head()

Unnamed: 0,Aromatic,Hydrophobic,Polar,VanDerWaals,h-bond
1.30x30,,0.5,,0.5,
1.31x31,,0.5,,0.5,
1.32x32,,0.4,0.4,0.2,
1.35x35,,1.0,,,
1.36x36,,0.5,0.25,0.25,


In [6]:
df.iplot(kind='box', legend=False)

  3%|▎         | 997/36909 [00:50<05:19, 112.29it/s]