In [1]:
from helpers.setup import setup_django

In [2]:
setup_django()

In [3]:
import time
import datetime
import os
import json
from collections import Counter

import pandas as pd

from django.conf import settings
from django.db import connection
from django.db.models import F, Q, Prefetch
from django.contrib.postgres.aggregates import ArrayAgg
from django.core.exceptions import ObjectDoesNotExist

from structure.models import Structure
from residue.models import Residue
from contactnetwork.models import InteractingResiduePair, Interaction
from signprot.models import SignprotStructure, SignprotBarcode, SignprotInteractions, SignprotComplex
from signprot.views import SequenceSignature, SignatureMatch
from protein.models import Protein, ProteinConformation, ProteinSegment
from residue.models import ResidueGenericNumberEquivalent

from signprot.interactions import get_class_slug, get_generic_numbers, get_signature_features, group_signature_features, prepare_signature_match

In [4]:
request = {'ignore': ['{"2x39":["6d9h",null],"2x40":["6n4b","6osa","6qno",null],"34x53":["6osa","6d9h",null],"34x54":["6n4b","6d9h",null],"34x55":["6n4b","6d9h",null],"34x50":["6osa",null],"6x26":["6qno","6d9h",null],"6x30":["6n4b","6qno","6d9h",null],"6x32":["6osa","6d9h",null],"5x69":["6n4b","6osa","6qno",null],"6x36":["6n4b","6qno","6d9h",null],"6x40":["6n4b","6osa","6d9h",null],"6x37":["6n4b","6qno",null],"5x61":["6n4b","6osa",null],"5x71":["6n4b","6osa",null],"5x72":["6osa",null],"5x75":["6osa",null],"6x23":["6osa",null],"8x48":["6n4b","6osa","6d9h",null],"8x52":["6n4b","6qno","6d9h",null],"5x65":["6n4b",null],"7x56":["6n4b","6qno",null],"8x49":["6n4b","6qno",null]}'], 'seg[]': ['2x36', '2x39', '2x40', '34x50', '34x51', '34x53', '34x54', '34x55', '3x50', '3x53', '3x54', '5x61', '5x65', '5x68', '5x69', '5x71', '5x72', '5x75', '6x22', '6x23', '6x25', '6x26', '6x29', '6x30', '6x32', '6x33', '6x36', '6x37', '6x40', '7x56', '8x47', '8x48', '8x49', '8x52'], 'csrfmiddlewaretoken': ['zPkvsxdu24a1fEwvV8EQOnjM8VCzlNbG6tlesobatURZ3dmlTyEgcFjm5hAsb39F'], 'pos[]': ['20804', '20981', '21036', '21038'], 'selectedreceptorclasses[]': ['Class A (Rhodopsin)', 'Class A (Rhodopsin)', 'Class A (Rhodopsin)', 'Class A (Rhodopsin)']}

In [5]:
# Entry Names
prot_confs = request["pos[]"]
complex_objs = SignprotComplex.objects.prefetch_related('structure__protein_conformation__protein').filter(structure__protein_conformation__in=prot_confs)
entry_names = [complex_obj.structure.protein_conformation.protein.entry_name for complex_obj in complex_objs]
pos_set_in = entry_names

# Ignore in Alignment
ignore_in_alignment = json.loads(request["ignore"][0])

# Segments
segments = []
segment_raw = request["seg[]"]
selected_receptor_classes = request["selectedreceptorclasses[]"]
most_common_class = Counter(selected_receptor_classes).most_common(1)
slug_ending = get_class_slug(most_common_class)

for s in segment_raw:
    try:
        gen_object = ResidueGenericNumberEquivalent.objects.filter(
            label=s, scheme__slug__in=['gpcrdb' + slug_ending]
        ).get()
        segments.append(gen_object)
    except ObjectDoesNotExist as e:
        print("For {} a {} ".format(s, e))
        continue

In [6]:
# get pos objects
pos_set = Protein.objects.filter(entry_name__in=pos_set_in).select_related('residue_numbering_scheme', 'species')

# Calculate Sequence Signature
signature = SequenceSignature()

signature.setup_alignments_signprot(segments, pos_set, ignore_in_alignment=ignore_in_alignment)
signature.calculate_signature_onesided()
# preprocess data for return
signature_data = signature.prepare_display_data_onesided()

# FEATURES AND REGIONS
feats = [feature for feature in signature_data['a_pos'].features_combo]

# GET GENERIC NUMBERS
generic_numbers = get_generic_numbers(signature_data)

# FEATURE FREQUENCIES
signature_features = get_signature_features(signature_data, generic_numbers, feats)
grouped_features = group_signature_features(signature_features)

signature_data = signature.prepare_session_data()

In [7]:
# table of frequencies of features across sequences
# pd.DataFrame(signature_data['diff_matrix']['Custom'])

In [8]:
ss_pos = entry_names

pos_set = Protein.objects.filter(entry_name__in=ss_pos).select_related('residue_numbering_scheme', 'species')
pos_set = [protein for protein in pos_set]
pfam = [protein.family.slug[:3] for protein in pos_set]

signature_match = SignatureMatch(
    signature_data['common_positions'],
    signature_data['numbering_schemes'],
    signature_data['common_segments'],
    signature_data['diff_matrix'],
    pos_set,
    # pos_set,
    cutoff = 0,
    signprot=True
)

maj_pfam = Counter(pfam).most_common()[0][0]
# signature_match.score_protein_class(maj_pfam)
# request.session['signature_match'] = signature_match

signature_match = {
    'scores': signature_match.protein_report,
    'scores_pos': signature_match.scores_pos,
    # 'scores_neg': signature_match.scores_neg,
    'protein_signatures': signature_match.protein_signatures,
    'signatures_pos': signature_match.signatures_pos,
    # 'signatures_neg': signature_match.signatures_neg,
    'signature_filtered': signature_match.signature_consensus,
    'relevant_gn': signature_match.relevant_gn,
    'relevant_segments': signature_match.relevant_segments,
    'numbering_schemes': signature_match.schemes,
}

[('gpcrdba', 'GPCRdb (Class A)', 'BW')]
Time elapsed for 6osa:  0.005104541778564453
Time elapsed for 6n4b:  0.006355762481689453
Time elapsed for 6d9h:  0.00502467155456543
Time elapsed for 6qno:  0.004848003387451172
Total time:  0.0407869815826416


In [9]:
signature_match.keys()

dict_keys(['relevant_gn', 'relevant_segments', 'scores_pos', 'signature_filtered', 'scores', 'numbering_schemes', 'protein_signatures', 'signatures_pos'])

In [10]:
signature_match['scores_pos']
# while 'scores' contains data for the proteins in class

OrderedDict([(<ProteinConformation: 6osa (intermediate)>, (11.25, 75.0)),
             (<ProteinConformation: 6d9h (active)>,
              (10.25, 68.33333333333333)),
             (<ProteinConformation: 6n4b (active)>, (9.75, 65.0)),
             (<ProteinConformation: 6qno (active)>, (9.0, 60.0))])