In [5]:
import json
import pandas as pd
import numpy as np
from yome import Session
from yome.models import *
from yome.util import to_df, report, mpl_setup
import re
from sqlalchemy import or_, and_
from sqlalchemy.orm import aliased
import itertools as it
import seaborn as sns

In [6]:
%mpl_setup

Populating the interactive namespace from numpy and matplotlib


In [7]:
session = Session()

In [8]:
report(session, 'b3026')

locus_id        b3026
primary_name     qseC
Name: 0, dtype: object


Unnamed: 0_level_0,Unnamed: 1_level_0,feature
knowledgebase_name,feature_type,Unnamed: 2_level_1
EcoCyc (high),description,sensory histidine kinase QseC
EcoCyc (high),summary_html,"SummaryQseC is the sensor kinase component of the QseB-QseC two-component system, which is one component of the quorum-sensing regulatory cascade involved in the regulation of flagella and motility genes. QseBC modulates transcription of flhDC, the master regulator for flagella and motility genes [Sperandio02].A transposon insertion mutant in qseC is more susceptible to pyrazinoic acid [Schaller02]; qseBC mutants show hypersensitivity to several toxic cations [Zhou03b].qseC: ""quorum-sensing E. coli regulator C"" [Sperandio02]A site for RbsR in negatively controlling the qseC gene was identified based on genomic Selex analysis with the absolute center positions in the genome at position 3168522.5 and with the palindromic octanucleotide sequence TTtAcccAACGTcTtaGTcT [Shimada13b]. Additional Citations: [Clarke05]"
EcoCyc (high),product_type,polypeptide
EcoCyc (high),reaction_equation,QseC + ATP → sensory histidine kinase QseC - phosphorylated + ADP
EcoCyc (high),is_pseudogene,f
EcoCyc (high),is_phantom_gene,f
EcoCyc (high),evidence_html,Inferred from expression pattern [Sperandio02]Human inference of function from sequence [Sperandio02]
EcoCyc (high),is_insertion_element,f
EcoGene (tbd),protein,quorum sensing sensory histidine kinase in two-component regulatory system with QseB
EcoGene (tbd),function,Null


# Load putative transporter list from ecocyc

In [4]:
ec_putative_transporters = pd.read_table('../sources/ecocyc/ecocyc-putative-inner-membrane-transporters.tsv', header=0)

In [5]:
len(ec_putative_transporters)

163

In [6]:
ec_putative_transporters[ec_putative_transporters.duplicated(subset='primary_name')]

Unnamed: 0,primary_name,gene_product


# Get db Y-ome transporters

In [7]:
transporter_summary_html_strings = [
    'is a member of the Multi Antimicrobial Extrusion (MATE) Family of transporters',
]
transporter_strings = [
    'transporter',
    'antiporter',
    'exporter',
    'permease',
    'transport protein',
    'PTS enzyme',
    'efflux pump',
]

In [8]:
Knowledgebase2 = aliased(Knowledgebase)
KnowledgebaseGene2 = aliased(KnowledgebaseGene)
KnowledgebaseFeature2 = aliased(KnowledgebaseFeature)

In [9]:
transporters = to_df(
    session.query(
        Gene.locus_id,
        KnowledgebaseGene.primary_name,
        KnowledgebaseFeature.feature,
        Knowledgebase2.name,
        KnowledgebaseGene2.annotation_quality
    )
    .join(KnowledgebaseGene, KnowledgebaseGene.gene_id == Gene.id)
    .join(Knowledgebase, Knowledgebase.id == KnowledgebaseGene.knowledgebase_id)
    .join(KnowledgebaseFeature, KnowledgebaseFeature.knowledgebase_gene_id == KnowledgebaseGene.id)
    .filter(or_(
        and_(
            or_(*[KnowledgebaseFeature.feature.ilike(f'%{x}%') for x in transporter_strings]),
            KnowledgebaseFeature.feature_type.notin_(['summary_html']),
        ),
        and_(
            or_(*[KnowledgebaseFeature.feature.ilike(f'%{x}%') for x in transporter_summary_html_strings]),
            KnowledgebaseFeature.feature_type == 'summary_html',
        ),
    ))
    .join(KnowledgebaseGene2, KnowledgebaseGene2.gene_id == Gene.id)
    .join(Knowledgebase2, Knowledgebase2.id == KnowledgebaseGene2.knowledgebase_id)
    .filter(Knowledgebase2.name == 'Y-ome')
    .filter(KnowledgebaseGene2.annotation_quality == 'low')
)

In [10]:
transporters_ar = (
    transporters
    .groupby('primary_name')
    .agg(lambda x: list(x))
    .drop(['locus_id', 'name', 'annotation_quality'], axis=1)
)

In [11]:
len(transporters_ar)

220

In [12]:
'ydaN' in transporters_ar.index

False

# Merge

In [13]:
merged = transporters_ar.merge(ec_putative_transporters, how='outer', left_index=True, right_on='primary_name')

In [14]:
merged.head()

Unnamed: 0,feature,primary_name,gene_product
162,[DUF1656 family putative inner membrane efflux...,aaeX,
162,[CP4-6 prophage; ABC transporter ATP-binding p...,afuC,
162,"[L-asparagine transporter, transporter, L-aspa...",ansP,
0,[putative major facilitator superfamily transp...,araJ,putative major facilitator superfamily transpo...
162,[putative ABC transporter periplasmic binding ...,artI,


In [15]:
print(len(merged[~merged.gene_product.isnull()]))

163


In [16]:
print(len(merged[~merged.feature.isnull()]))

220


In [17]:
len(merged[~merged.gene_product.isnull() & ~merged.feature.isnull()])

124

In [18]:
len(merged[merged.gene_product.isnull() & merged.feature.isnull()])

0

# set up for export

NOTES
- b3682 / glvB excluded as pseudogene
- b1342 / ydaN evidence from EcoCyc makes this not in the Y-ome "Assay of unpurified protein [Worlock02]"
- might want to pull some out like b4662 / sgrT

In [19]:
yome_transporters = (
    transporters
    .groupby('locus_id')
    .agg(lambda x: list(x))
    .drop(['name', 'annotation_quality'], axis=1)
    .reset_index()
    .rename(columns={'locus_id': 'locus_tag', 'feature': 'matched_features'})
)
yome_transporters.primary_name = yome_transporters.primary_name.apply(lambda x: x[0])
yome_transporters.matched_features = yome_transporters.matched_features.apply(lambda x: '; '.join(x))
yome_transporters.head()

Unnamed: 0,locus_tag,primary_name,matched_features
0,b0007,yaaJ,putative alanine/glycine:cation symporter fami...
1,b0045,yaaU,putative major facilitator superfamily transpo...
2,b0106,hofC,Protein transport protein HofC homolog
3,b0107,hofB,Protein transport protein HofB homolog
4,b0127,yadG,putative ABC transporter ATP-binding protein Y...


In [21]:
yome_transporters.to_csv('../data/yome-transporters.tsv', sep='\t', index=False)