In [29]:
import matplotlib
matplotlib.use('Agg')
matplotlib.rc('pdf', fonttype=42)
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = "Arial"
import gpcrdb
from ape import EnsemblHomology, GreatApe, EnsemblHomologGene, Annotation, MatchedResidue
from config import *
import os
from utils import *

In [2]:
family_A_homolog = set()
for gpcrdb_entry in gpcrdb.get_filtered_receptor_list():
        homology = EnsemblHomology(gpcrdb_entry, GreatApe.chimpanzee)
        for human_gene_id, chimp_gene_id in homology.gene_ids.items():
            if chimp_gene_id in CHIMPANZEE_BLOCK_LIST:
                continue
            ensembl_entry = EnsemblHomologGene(chimp_gene_id, GreatApe.chimpanzee, gpcrdb_entry, human_gene_id)
            if gpcrdb_entry.receptor_class == 'Class A (Rhodopsin)':
                 family_A_homolog.add(chimp_gene_id) # FFAR3 and GPR42 have the same homolog (ENSPTRG00000029043).
print(len(family_A_homolog))

258


In [43]:
aa3x50 = {chr(ord('A') + i): [] for i in range(26)}
for gene_id in family_A_homolog:
    ali = os.path.join("apes", GreatApe.chimpanzee.value, f"{gene_id}.csv")
    with open(ali) as f:
        for l in f:
            mr = MatchedResidue.from_csv_line(l)
            if mr:
                if mr.generic_number == "3.50x50":
                    aa3x50[mr.source_residue_aa].append(gene_id)
                    break
        else:
            print(gene_id)

In [70]:
# Fig. R2a
fig, ax = plt.subplots(1, 1, figsize=(4, 1.5))
amino_acids = [aa for aa in aa3x50.keys() if len(aa3x50[aa]) > 0]
amino_acids.sort(key=lambda aa: len(aa3x50[aa]), reverse=True)
left = 0
ax2 = ax.twiny()
for aa in amino_acids:
    width = len(aa3x50[aa])
    ax.barh(0, width, left=left, color='tab:orange' if aa == 'R' else 'lightgray', linewidth=0.5, edgecolor='k')
    left += width

    if aa == 'R':
        ax.text(left / 2, 0, f"Arg\n({width})", ha='center', va='center')
ax.set_xticks([50 * i for i in range(6)])
ax.set_xlabel("Number of family A GPCR homologs in chimpanzee")
ax.set_xlim(0, left)

ax2.set_xlim(0, left)
ax2.set_xticks([left])

ax.set_yticks([])
ax.set_ylim(-0.5, 0.5)

fig.tight_layout()
fig.savefig("./figures/R2a_3x50_aa.pdf")
plt.close()

meta NOT subset; don't know how to subset; dropped


In [71]:
def gn_sorter(gn):
    seg, pos = gn.split('x')
    seg, pos = int(seg), int(pos)
    if seg < 10:
        seg *= 10
    return seg, pos

generic_numbers_assigned = {}
for gene_id in family_A_homolog:
    ali = os.path.join("apes", GreatApe.chimpanzee.value, f"{gene_id}.csv")
    with open(ali) as f:
        for l in f:
            mr = MatchedResidue.from_csv_line(l)
            if mr and mr.generic_number:
                gn = f"{mr.generic_number.split('.')[0]}x{mr.generic_number.split('x')[-1]}"
                generic_numbers_assigned[gn] = generic_numbers_assigned.get(gn, 0) + 1

generic_numbers = list(generic_numbers_assigned.keys())
generic_numbers.sort(key=gn_sorter)

In [72]:
missenses = {}
for gene_id in family_A_homolog:
    csv = os.path.join("apes", GreatApe.chimpanzee.value, f"{gene_id}_CDS.csv")
    with open(csv) as f:
        for l in f:
            anno = Annotation.from_csv_line(l)
            if anno and anno.generic_number and anno.var_type == VariationType.MISSENSE:
                gn = f"{anno.generic_number.split('.')[0]}x{anno.generic_number.split('x')[-1]}"
                missenses[gn] = missenses.get(gn, 0) + 1

In [73]:
common_residues = {"3x50", "3x53", "3x54", "34x50", "34x51", "34x55", "5x65", "5x68", "6x32", "6x33", "6x36", "6x37", "7x56", "8x47"}
gs_residues = common_residues | {"3x54", "3x55", "34x51", "34x54", "34x55", "5x64", "5x68", "5x69", "5x71", "5x72", "5x74", "5x75", "5x77", "8x48"}
gi_residues = common_residues | {"12x49", "2x40", "3x50", "3x53", "34x52", "34x55", "5x71", "6x32", "7x56", "8x47", "8x49"}
gq_residues = common_residues | {"2x37", "2x39", "2x40", "3x49", "34x51", "34x53", "34x55", "34x56", "34x57", "4x38", "4x39", "6x30", "6x33", "8x48", "8x49"}

roi = frozenset(gs_residues | gi_residues | gq_residues)
gs_only = frozenset(gs_residues - gi_residues - gq_residues)
print("Gs only", " ".join(sorted(list(gs_only), key=gn_sorter)))
gi_only = frozenset(gi_residues - gs_residues - gq_residues)
print("Gi only", " ".join(sorted(list(gi_only), key=gn_sorter)))
gq_only = frozenset(gq_residues - gs_residues - gi_residues)
print("Gq only", " ".join(sorted(list(gq_only), key=gn_sorter)))
gs_gi = frozenset(gs_residues & gi_residues - gq_residues)
print("Gs^Gi", " ".join(sorted(list(gs_gi), key=gn_sorter)))
gi_gq = frozenset(gi_residues & gq_residues - gs_residues)
print("Gi^Gq", " ".join(sorted(list(gi_gq), key=gn_sorter)))
gq_gs = frozenset(gq_residues & gs_residues - gi_residues)
print("Gq^Gs", " ".join(sorted(list(gq_gs), key=gn_sorter)))
gs_gi_gq = frozenset(gs_residues & gi_residues & gq_residues)
print("Gs^Gi^Gq", " ".join(sorted(list(gs_gi_gq), key=gn_sorter)))

Gs only 3x55 34x54 5x64 5x69 5x72 5x74 5x75 5x77
Gi only 12x49 34x52
Gq only 2x37 2x39 3x49 34x53 34x56 34x57 4x38 4x39 6x30
Gs^Gi 5x71
Gi^Gq 2x40 8x49
Gq^Gs 8x48
Gs^Gi^Gq 3x50 3x53 3x54 34x50 34x51 34x55 5x65 5x68 6x32 6x33 6x36 6x37 7x56 8x47


In [76]:
# Fig. R2b
fig, ax = plt.subplots(1, 1, figsize=(4, 8))
ax.invert_yaxis()
yticks, yticklabels = [], []
for y, gn in enumerate(generic_numbers):
    ax.barh(y, generic_numbers_assigned.get(gn, 0), height=1, left=0, color='lightgray')
    ax.barh(y, missenses.get(gn, 0), height=1, left=0, color=Segment.generic_number_of(gn).color)
    if gn.endswith('x50'):
        yticks.append(y)
        yticklabels.append(gn)

ax.set_yticks(yticks)
ax.set_yticklabels(yticklabels)
ax.set_ylim(y + 1, -1)
ax.set_ylabel("Structure-based generic number")

ax.set_yticks([y for y, gn in enumerate(generic_numbers) if gn in roi], minor=True)
ax.set_yticklabels([], minor=True)
ax.tick_params(axis='y', which='minor', color='tab:orange', width=1, length=3)

ax.set_xlim(0, len(family_A_homolog))
ax.set_xlabel("Number of family A GPCR homologs in chimpanzee")

fig.tight_layout()
fig.savefig("./figures/R2b_chimpanzee.pdf")
fig.savefig("./figures/R2b_chimpanzee.png")

meta NOT subset; don't know how to subset; dropped


In [75]:
r3x50_missenses = {}
for gene_id in family_A_homolog:
    csv = os.path.join("apes", GreatApe.chimpanzee.value, f"{gene_id}_CDS.csv")
    with open(csv) as f:
        for l in f:
            anno = Annotation.from_csv_line(l)
            if anno and anno.ref_aa == 'R' and anno.generic_number == '3.50x50' and anno.var_type == VariationType.MISSENSE:
                r3x50_missenses[gene_id] = anno
for gene_id, anno in r3x50_missenses.items():
    print(gene_id, f"{anno.ref_aa}{anno.residue_number}{anno.alt_aa}", f"AF={anno.snv.AF}={anno.snv.AC}/{anno.snv.AN}")

ENSPTRG00000004170 R138S AF=1.0=50/50
ENSPTRG00000038846 R156G AF=1.0=50/50
ENSPTRG00000018611 R132C AF=1.0=50/50
ENSPTRG00000002985 R159G AF=0.06=3/50
ENSPTRG00000028910 R139G AF=1.0=50/50
