In [1]:
from helpers.setup import setup_django

In [2]:
setup_django()

In [3]:
import pandas as pd
import pypandoc

from django.db.models import F, Q, Prefetch

from signprot.models import SignprotComplex
from protein.models import Protein, ProteinConformation

# LaTeX: Signprot Complexes Overview

In [97]:
data = SignprotComplex.objects.values(
    pub_date=F('structure__publication_date'),
    resolution=F('structure__resolution'),
    pdb=F('structure__pdb_code__index'),
    gprot=F('protein__entry_name'),
    receptor=F('structure__protein_conformation__protein__parent__name'),
    rec_class=F('structure__protein_conformation__protein__family__parent__parent__parent__name')
)

In [98]:
df = pd.DataFrame(list(data))
df.head()

Unnamed: 0,gprot,pdb,pub_date,rec_class,receptor,resolution
0,gnai1_human,6N4B,2019-01-30,Class A (Rhodopsin),CB<sub>1</sub> receptor,3.0
1,gnas2_human,5VAI,2017-05-24,Class B1 (Secretin),GLP-1 receptor,4.1
2,gnao_human,6G79,2018-06-20,Class A (Rhodopsin),5-HT<sub>1B</sub> receptor,3.78
3,gnai2_human,6D9H,2018-06-20,Class A (Rhodopsin),A<sub>1</sub> receptor,3.6
4,gnas2_human,6NIY,2019-01-23,Class B1 (Secretin),CT receptor,3.34


In [99]:
df.to_csv('./signprot/thesis_steven/signprot_release.csv')

In [100]:
df['receptor'] = [pypandoc.convert_text(i, 'plain', format='html').replace('receptor', '').replace('\n', '').replace('\\', '\\\\') for i in df['receptor']]
df['gprot'] = [i.split('_')[0].upper() for i in df['gprot']]
df['rec_class'] = [i.split(' ')[1] for i in df['rec_class']]
df['resolution'] = [round(float(i), 1) for i in df['resolution']]

In [101]:
a  = ['6qno','6n4b','6g79','6d9h','6osa','6ddf','6oik']
b1 = ['6b3j', '6niy', '6nbf']
dataset = []
for i in df.itertuples():
    if i.pdb.lower() in a:
        dataset.append('D1')
    elif i.pdb.lower() in b1:
        dataset.append('D2')
    else:
        dataset.append(' ')
df['Dataset'] = dataset

In [102]:
df = df.sort_values(by=['rec_class', 'Dataset', 'pub_date'], ascending=True)

In [103]:
df = df.rename(index=str, columns={
    'pub_date': "Publication Date",
    'pdb': "PDB ID",
    'resolution': 'Resoulution (\AA)',
    'receptor': 'Receptor',
    'rec_class': 'Class',
    'gprot': 'G protein'
})

In [104]:
df = df[['Class', 'Receptor','G protein','PDB ID','Publication Date', 'Resoulution (\AA)', 'Dataset']]

In [107]:
df

Unnamed: 0,Class,Receptor,G protein,PDB ID,Publication Date,Resoulution (\AA),Dataset
21,A,β2-adrenoceptor,GNAS2,3SN6,2011-07-20,3.2,
20,A,A2A,GNAS2,5G53,2016-08-03,3.4,
13,A,A2A,GNAS2,6GDG,2018-05-16,4.1,
12,A,μ,GNAI1,6DDE,2018-06-13,3.5,
17,A,Rhodopsin,GNAI1,6CMO,2018-06-20,4.5,
14,A,Rhodopsin,GNAO,6FUF,2018-10-03,3.1,
6,A,M1,GNA11,6OIJ,2019-05-08,3.3,
18,A,NTS1,GNAI1,6OS9,2019-07-10,3.0,
8,A,μ,GNAI1,6DDF,2018-06-13,3.5,D1
2,A,5-HT1B,GNAO,6G79,2018-06-20,3.8,D1


In [109]:
caption = 'Receptor – G Protein complexes availabe via the GPCRdb and the individual subsets of data used for the class specific analyses in this thesis'
short_caption = 'Receptor – G Protein complexes availabe via the GPCRdb'
label = 'num_complexes'

start = '\\begin{{table}}[]\n\centering\\n\caption[{}]{{{}}}\n\\label{{tbl:{}}}\n\\resizebox{{\\textwidth}}{{!}}{{%\n'.format(short_caption, caption, label)
end = '}\n\end{table}'

with open("./signprot/thesis_steven/latex_tables/receptor_complexes.tex", "w") as text_file:
    print(
        start,
        df.to_latex(index=False),
        end,
        file=text_file
    )

# Export: Receptor Families and Sub-Famileis

In [277]:
qs = Protein.objects.filter(
    species__common_name='Human',
    family__lte=532
).exclude(
    accession=None
)

d = []
for i in qs:
    r = {}
    r['uniprot'] = i.entry_name.split('_')[0].upper()
    r['class'] = i.get_protein_class()
    r['family'] = i.get_protein_family()
    r['sub_family'] = i.get_protein_subfamily()
    d.append(r)
pd.DataFrame(d).to_csv('./signprot/thesis_steven/receptor_families.csv', index=False)

# LaTeX: Amino Acid Groups

In [23]:
from common.definitions import AMINO_ACID_GROUPS, AMINO_ACID_GROUP_NAMES, AMINO_ACID_GROUP_PROPERTIES

In [24]:
data = []
for key in AMINO_ACID_GROUP_PROPERTIES:
    row = {}
    row['Name'] = AMINO_ACID_GROUP_NAMES[key]
    row['Sidechain Length'] = AMINO_ACID_GROUP_PROPERTIES[key]['length']
    row['Feature Code'] =AMINO_ACID_GROUP_PROPERTIES[key]['display_name_short']
    row['Amino Acids'] = ', '.join(AMINO_ACID_GROUPS[key])
    data.append(row)
df = pd.DataFrame(data)
df = df[['Name', 'Feature Code', 'Sidechain Length', 'Amino Acids']]
df.head()

Unnamed: 0,Name,Feature Code,Sidechain Length,Amino Acids
0,Hydrophobic,HY,any,"A, C, F, I, L, M, P, V, W, Y"
1,Hydrophobic,HY,4-5,"F, M, Y"
2,Hydrophobic aliphatic,HA,any,"A, I, L, M, V"
3,Hydrophobic aliphatic,HA,1-2,"A, V"
4,Hydrophobic aliphatic,HA,2-3,"I, L, V"


In [22]:
caption = 'Sequence Signature Features and their Corresponding Amino Acids'
short_caption = caption
label = 'amino_acid_groups'

start = '\\begin{{table}}[H]\n\centering\\n\caption[{}]{{{}}}\n\\label{{tbl:{}}}\n\\resizebox{{\\textwidth}}{{!}}{{%\n'.format(short_caption, caption, label)
end = '}\n\end{table}'

with open("./signprot/thesis_steven/latex_tables/appendix_amino_acid_groups.tex", "w") as text_file:
    print(
        start,
        df.to_latex(index=False),
        end,
        file=text_file
    )

In [26]:
df.to_csv('./signprot/thesis_steven/amino_acid_features.csv')