Skip to content

Commit

Permalink
Change geneid2nt var name to match pylinted in parsers/ncbi_gene_file…
Browse files Browse the repository at this point in the history
…_reader.py
  • Loading branch information
dvklopfenstein committed Jul 30, 2018
1 parent 0b86e3e commit de68020
Show file tree
Hide file tree
Showing 20 changed files with 10,639 additions and 23 deletions.
10,609 changes: 10,609 additions & 0 deletions data/goea_data_3702/id_sym.txt

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions goatools/cli/find_enrichment.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,13 +390,13 @@ def wr_txt(self, fout_txt):

def prt_tsv(self, prt=sys.stdout):
"""Print an ASCII text format."""
prtfmt = self.objprt.get_prtfmt(self.flds_cur)
prtfmt = self.objprt.get_prtfmt_str(self.flds_cur)
prt.write("{FLDS}\n".format(FLDS=" ".join(self.flds_cur)))
WrSectionsTxt.prt_sections(prt, self.desc2nts['sections'], prtfmt, secspc=True)

def prt_txt(self, prt=sys.stdout):
"""Print an ASCII text format."""
prtfmt = self.objprt.get_prtfmt(self.flds_cur)
prtfmt = self.objprt.get_prtfmt_str(self.flds_cur)
prt.write("{FLDS}\n".format(FLDS=" ".join(self.flds_cur)))
WrSectionsTxt.prt_sections(prt, self.desc2nts['sections'], prtfmt, secspc=True)

Expand Down
3 changes: 2 additions & 1 deletion goatools/go_enrichment.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,8 @@ def prt_txt(self, prt, goea_results, prtfmt=None, **kws):
objprt = PrtFmt()
if prtfmt is None:
flds = ['GO', 'NS', 'p_uncorrected', 'ratio_in_study', 'ratio_in_pop', 'depth', 'name', 'study_items']
prtfmt = " ".join([objprt.default_fld2fmt[f] for f in flds])
prtfmt = objprt.get_prtfmt_str(flds)
#### prtfmt = " ".join([objprt.default_fld2fmt[f] for f in flds])
#### prtfmt = ("{GO} {NS} {p_uncorrected:5.2e} {ratio_in_study:>6} {ratio_in_pop:>9} "
#### "{depth:02} {name:40} {study_items}\n")
prtfmt = objprt.adjust_prtfmt(prtfmt)
Expand Down
2 changes: 2 additions & 0 deletions goatools/parsers/ncbi_gene_file_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def get_nts(self):
if nt_obj is not None:
flds = re.split(self.sep, line)
self.convert_ints_floats(flds)
flds[6] = [s.strip() for s in flds[6].split(',')]
ntdata = nt_obj._make(flds)
data.append(ntdata)
# Obtain the header
Expand Down Expand Up @@ -169,6 +170,7 @@ def _init_nt_hdr(self, line):
# Init indexes which will be converted to int or float
self.idxs_int = [idx for idx, hdr in enumerate(hdrs) if hdr in self.int_hdrs]
self.idxs_float = [idx for idx, hdr in enumerate(hdrs) if hdr in self.float_hdrs]
assert hdrs[6] == 'Aliases'
return namedtuple('ntncbi', ' '.join(hdrs))

@staticmethod
Expand Down
8 changes: 6 additions & 2 deletions goatools/rpt/prtfmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,11 @@ class PrtFmt(object):
def __init__(self):
pass

def get_prtfmt(self, flds, add_nl=True):
def get_prtfmt_str(self, flds, add_nl=True):
fmts = self.get_prtfmt_list(flds, add_nl)
return " ".join(fmts)

def get_prtfmt_list(self, flds, add_nl=True):
"""Get print format, given fields."""
fmts = []
for fld in flds:
Expand All @@ -60,7 +64,7 @@ def get_prtfmt(self, flds, add_nl=True):
raise Exception("UNKNOWN FORMAT: {FLD}".format(FLD=fld))
if add_nl:
fmts.append("\n")
return " ".join(fmts)
return fmts

@staticmethod
def adjust_prtfmt(prtfmt):
Expand Down
2 changes: 1 addition & 1 deletion goatools/test_data/genes_NCBI_10090_All.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import collections as cx

NtData = cx.namedtuple('NtData', 'tax_id Org_name GeneID CurrentID Status Symbol Aliases description other_designations map_location chromosome genomic_nucleotide_accession_version start_position_on_the_genomic_accession end_position_on_the_genomic_accession orientation exon_count OMIM no_hdr0')
GeneID2nt = {
GENEID2NT = {
21846 : NtData(tax_id=10090, Org_name='Mus musculus', GeneID=21846, CurrentID=0, Status='live', Symbol='Tie1', Aliases='D430008P04Rik, TIE, tie-1', description='tyrosine kinase with immunoglobulin-like and EGF-like domains 1', other_designations='tyrosine kinase receptor 1', map_location='4 54.67 cM', chromosome='4', genomic_nucleotide_accession_version='NC_000070.6', start_position_on_the_genomic_accession=118471191, end_position_on_the_genomic_accession=118489849, orientation='minus', exon_count=23, OMIM='', no_hdr0=''),
21855 : NtData(tax_id=10090, Org_name='Mus musculus', GeneID=21855, CurrentID=0, Status='live', Symbol='Timm17b', Aliases='17kDa, Sfc3, mTim17b', description='translocase of inner mitochondrial membrane 17b', other_designations='translocator of inner mitochondrial membrane 17 kDa, b|translocator of inner mitochondrial membrane 17b|translocator of inner mitochondrial membrane b', map_location='X 3.56 cM', chromosome='X', genomic_nucleotide_accession_version='NC_000086.7', start_position_on_the_genomic_accession=7899242, end_position_on_the_genomic_accession=7907652, orientation='plus', exon_count=6, OMIM='', no_hdr0=''),
546144 : NtData(tax_id=10090, Org_name='Mus musculus', GeneID=546144, CurrentID=0, Status='live', Symbol='Wdr72', Aliases='AW558070, D230040E23', description='WD repeat domain 72', other_designations='', map_location='9 D', chromosome='9', genomic_nucleotide_accession_version='NC_000075.6', start_position_on_the_genomic_accession=74110331, end_position_on_the_genomic_accession=74283203, orientation='plus', exon_count=21, OMIM='', no_hdr0=''),
Expand Down
2 changes: 1 addition & 1 deletion goatools/test_data/genes_NCBI_10090_ProteinCoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import collections as cx

NtData = cx.namedtuple('NtData', 'tax_id Org_name GeneID CurrentID Status Symbol Aliases description other_designations map_location chromosome genomic_nucleotide_accession_version start_position_on_the_genomic_accession end_position_on_the_genomic_accession orientation exon_count OMIM no_hdr0')
GeneID2nt = {
GENEID2NT = {
21846 : NtData(tax_id=10090, Org_name='Mus musculus', GeneID=21846, CurrentID=0, Status='live', Symbol='Tie1', Aliases='D430008P04Rik, TIE, tie-1', description='tyrosine kinase with immunoglobulin-like and EGF-like domains 1', other_designations='tyrosine kinase receptor 1', map_location='4 54.67 cM', chromosome='4', genomic_nucleotide_accession_version='NC_000070.6', start_position_on_the_genomic_accession=118471191, end_position_on_the_genomic_accession=118489849, orientation='minus', exon_count=23, OMIM='', no_hdr0=''),
21855 : NtData(tax_id=10090, Org_name='Mus musculus', GeneID=21855, CurrentID=0, Status='live', Symbol='Timm17b', Aliases='17kDa, Sfc3, mTim17b', description='translocase of inner mitochondrial membrane 17b', other_designations='translocator of inner mitochondrial membrane 17 kDa, b|translocator of inner mitochondrial membrane 17b|translocator of inner mitochondrial membrane b', map_location='X 3.56 cM', chromosome='X', genomic_nucleotide_accession_version='NC_000086.7', start_position_on_the_genomic_accession=7899242, end_position_on_the_genomic_accession=7907652, orientation='plus', exon_count=6, OMIM='', no_hdr0=''),
546144 : NtData(tax_id=10090, Org_name='Mus musculus', GeneID=546144, CurrentID=0, Status='live', Symbol='Wdr72', Aliases='AW558070, D230040E23', description='WD repeat domain 72', other_designations='', map_location='9 D', chromosome='9', genomic_nucleotide_accession_version='NC_000075.6', start_position_on_the_genomic_accession=74110331, end_position_on_the_genomic_accession=74283203, orientation='plus', exon_count=21, OMIM='', no_hdr0=''),
Expand Down
2 changes: 1 addition & 1 deletion goatools/test_data/genes_NCBI_7227_ProteinCoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import collections as cx

NtData = cx.namedtuple('NtData', 'tax_id Org_name GeneID CurrentID Status Symbol Aliases description other_designations map_location chromosome genomic_nucleotide_accession_version start_position_on_the_genomic_accession end_position_on_the_genomic_accession orientation exon_count OMIM no_hdr0')
GeneID2nt = {
GENEID2NT = {
32768 : NtData(tax_id=7227, Org_name='Drosophila melanogaster', GeneID=32768, CurrentID=0, Status='live', Symbol='CG6762', Aliases='Dmel_ Dmel\\CG6762', description='CG6762 gene product from transcript CG6762-RD', other_designations='CG6762-PA|CG6762-PB|CG6762-PC|CG6762-PD', map_location='16E2-16E2', chromosome='X', genomic_nucleotide_accession_version='NC_004354.4', start_position_on_the_genomic_accession=17856974, end_position_on_the_genomic_accession=17858610, orientation='plus', exon_count=4, OMIM='', no_hdr0=''),
32769 : NtData(tax_id=7227, Org_name='Drosophila melanogaster', GeneID=32769, CurrentID=0, Status='live', Symbol='Arp8', Aliases='Dmel_CG7846, CG7846, Dmel\\CG7846, arp8, dArp8', description='Actin-related protein 8', other_designations='Arp8-PA|CG7846-PA|actin related protein 8', map_location='16E2-16E2', chromosome='X', genomic_nucleotide_accession_version='NC_004354.4', start_position_on_the_genomic_accession=17858379, end_position_on_the_genomic_accession=17860800, orientation='minus', exon_count=3, OMIM='', no_hdr0=''),
32770 : NtData(tax_id=7227, Org_name='Drosophila melanogaster', GeneID=32770, CurrentID=0, Status='live', Symbol='CG6769', Aliases='Dmel_ Dmel\\CG6769', description='CG6769 gene product from transcript CG6769-RB', other_designations='CG6769-PA|CG6769-PB', map_location='16E2-16E2', chromosome='X', genomic_nucleotide_accession_version='NC_004354.4', start_position_on_the_genomic_accession=17861002, end_position_on_the_genomic_accession=17862775, orientation='plus', exon_count=3, OMIM='', no_hdr0=''),
Expand Down
2 changes: 1 addition & 1 deletion goatools/test_data/genes_NCBI_9606_All.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import collections as cx

NtData = cx.namedtuple('NtData', 'tax_id Org_name GeneID CurrentID Status Symbol Aliases description other_designations map_location chromosome genomic_nucleotide_accession_version start_position_on_the_genomic_accession end_position_on_the_genomic_accession orientation exon_count OMIM no_hdr0')
GeneID2nt = {
GENEID2NT = {
1 : NtData(tax_id=9606, Org_name='Homo sapiens', GeneID=1, CurrentID=0, Status='live', Symbol='A1BG', Aliases='A1B, ABG, GAB, HYST2477', description='alpha-1-B glycoprotein', other_designations='HEL-S-163pA|epididymis secretory sperm binding protein Li 163pA', map_location='19q13.4', chromosome='19', genomic_nucleotide_accession_version='NC_000019.10', start_position_on_the_genomic_accession=58346806, end_position_on_the_genomic_accession=58353499, orientation='minus', exon_count=8, OMIM=138670, no_hdr0=''),
2 : NtData(tax_id=9606, Org_name='Homo sapiens', GeneID=2, CurrentID=0, Status='live', Symbol='A2M', Aliases='A2MD, CPAMD5, FWP007, S863-7', description='alpha-2-macroglobulin', other_designations='C3 and PZP-like alpha-2-macroglobulin domain-containing protein 5|alpha-2-M', map_location='12p13.31', chromosome='12', genomic_nucleotide_accession_version='NC_000012.12', start_position_on_the_genomic_accession=9067708, end_position_on_the_genomic_accession=9115962, orientation='minus', exon_count=36, OMIM=103950, no_hdr0=''),
3 : NtData(tax_id=9606, Org_name='Homo sapiens', GeneID=3, CurrentID=0, Status='live', Symbol='A2MP1', Aliases='A2MP', description='alpha-2-macroglobulin pseudogene 1', other_designations='pregnancy-zone protein pseudogene', map_location='12p13.31', chromosome='12', genomic_nucleotide_accession_version='NC_000012.12', start_position_on_the_genomic_accession=9228533, end_position_on_the_genomic_accession=9234207, orientation='minus', exon_count=9, OMIM='', no_hdr0=''),
Expand Down
2 changes: 1 addition & 1 deletion goatools/test_data/genes_NCBI_9606_ProteinCoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import collections as cx

NtData = cx.namedtuple('NtData', 'tax_id Org_name GeneID CurrentID Status Symbol Aliases description other_designations map_location chromosome genomic_nucleotide_accession_version start_position_on_the_genomic_accession end_position_on_the_genomic_accession orientation exon_count OMIM no_hdr0')
GeneID2nt = {
GENEID2NT = {
1 : NtData(tax_id=9606, Org_name='Homo sapiens', GeneID=1, CurrentID=0, Status='live', Symbol='A1BG', Aliases='A1B, ABG, GAB, HYST2477', description='alpha-1-B glycoprotein', other_designations='HEL-S-163pA|epididymis secretory sperm binding protein Li 163pA', map_location='19q13.4', chromosome='19', genomic_nucleotide_accession_version='NC_000019.10', start_position_on_the_genomic_accession=58346806, end_position_on_the_genomic_accession=58353499, orientation='minus', exon_count=8, OMIM=138670, no_hdr0=''),
2 : NtData(tax_id=9606, Org_name='Homo sapiens', GeneID=2, CurrentID=0, Status='live', Symbol='A2M', Aliases='A2MD, CPAMD5, FWP007, S863-7', description='alpha-2-macroglobulin', other_designations='C3 and PZP-like alpha-2-macroglobulin domain-containing protein 5|alpha-2-M', map_location='12p13.31', chromosome='12', genomic_nucleotide_accession_version='NC_000012.12', start_position_on_the_genomic_accession=9067708, end_position_on_the_genomic_accession=9115962, orientation='minus', exon_count=36, OMIM=103950, no_hdr0=''),
131076 : NtData(tax_id=9606, Org_name='Homo sapiens', GeneID=131076, CurrentID=0, Status='live', Symbol='CCDC58', Aliases='', description='coiled-coil domain containing 58', other_designations='', map_location='3q21.1', chromosome='3', genomic_nucleotide_accession_version='NC_000003.12', start_position_on_the_genomic_accession=122359589, end_position_on_the_genomic_accession=122383281, orientation='minus', exon_count=7, OMIM='', no_hdr0=''),
Expand Down
2 changes: 1 addition & 1 deletion goatools/test_data/nature3102_goea.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import os
import xlrd
from goatools.test_data.genes_NCBI_10090_ProteinCoding import GeneID2nt as GeneID2nt_mus
from goatools.test_data.genes_NCBI_10090_ProteinCoding import GENEID2NT as GeneID2nt_mus
from goatools.base import get_godag
from goatools.associations import get_assoc_ncbi_taxids
from goatools.go_enrichment import GOEnrichmentStudy
Expand Down
2 changes: 1 addition & 1 deletion tests/test_genes_cell_cycle.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def prt_genes(fout_genes, geneids, taxid, log):
import importlib
module_name = "".join(["goatools.test_data.", fin_symbols[:-3]])
module = importlib.import_module(module_name)
geneid2nt = module.geneid2nt
geneid2nt = module.GENEID2NT
fmtstr = "{GeneID:>9} {Symbol:<16} {description}\n"
nts = [geneid2nt[geneid] for geneid in sorted(geneids) if geneid in geneid2nt]
with open(fout_genes, 'w') as prt:
Expand Down
6 changes: 3 additions & 3 deletions tests/test_i96_goea_ncbi.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
from goatools.base import get_godag
from goatools.associations import get_assoc_ncbi_taxids
from goatools.go_enrichment import GOEnrichmentStudy
from goatools.test_data.genes_NCBI_9606_All import GeneID2nt
from goatools.test_data.genes_NCBI_9606_All import GENEID2NT


def test_i96():
"""Test to re-produce issue#96: Passes currently."""
# Trying to duplicate: ValueError("All values in table must be nonnegative.
# Get genes
study_ids = _get_geneids()
population_ids = GeneID2nt.keys()
population_ids = GENEID2NT.keys()
# Get databases
gene2go = get_assoc_ncbi_taxids([9606], loading_bar=None)
fin_obo = os.path.join(os.getcwd(), "go-basic.obo")
Expand All @@ -25,7 +25,7 @@ def test_i96():

def _get_geneids():
"""Return study gene set."""
symbol2geneid = {nt.Symbol:g for g, nt in GeneID2nt.items()}
symbol2geneid = {nt.Symbol:g for g, nt in GENEID2NT.items()}
symbols = ['MICAL2', 'MIR1231', 'ZMIZ1', 'CRIM1', 'SMAD3', 'EFEMP1', 'CRIM1', 'ANXA2', 'VGLL3', 'FHL2', 'FSTL1', 'KIAA1456', 'MIR4316', 'MYH9', 'SIPA1L1', 'C15orf53', 'TRAM2', 'IGFBP7-AS1', 'CALD1',
'RP5-1120P11.1', 'WNT2B', 'DDAH1', 'MIR1203', 'NRG1', 'SEC24D', 'NHSL2', 'ERGIC1', 'RPL37A', 'PTPN14', 'FEZ2', 'VEGFC', 'C2orf61', 'MIR30A', 'CAPZB', 'SMAD3', 'AAGAB', 'EPS8', 'ITGB5', 'LRP1-AS',
'NRP1', 'WWTR1-AS1', 'CDK6', 'ENTPD6', 'THBS1', 'AC016735.2', 'ZCCHC24', 'LINC00592', 'HSPG2', 'MIRLET7A2', 'SMAD6', 'STARD13', 'EMP1', 'LINC00656', 'CALD1', 'C10orf142', 'ARID5B', 'MIR6809',
Expand Down
2 changes: 1 addition & 1 deletion tests/test_nbt3102.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from collections import Counter, defaultdict, OrderedDict
import pytest

from goatools.test_data.genes_NCBI_10090_ProteinCoding import GeneID2nt as GeneID2nt_mus
from goatools.test_data.genes_NCBI_10090_ProteinCoding import GENEID2NT as GeneID2nt_mus
from goatools.test_data.nature3102_goea import get_geneid2symbol, get_goeaobj
from goatools.rpt.goea_nt_xfrm import get_study_items
from goatools.godag_plot import plot_gos, plot_goid2goobj
Expand Down
2 changes: 1 addition & 1 deletion tests/test_nbt3102_goea.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
http://www.nature.com/nbt/journal/v33/n2/full/nbt.3102.html#methods
"""

from goatools.test_data.genes_NCBI_10090_ProteinCoding import GeneID2nt as GeneID2nt_mus
from goatools.test_data.genes_NCBI_10090_ProteinCoding import GENEID2NT as GeneID2nt_mus
from goatools.test_data.nature3102_goea import get_geneid2symbol, get_goeaobj

__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang, All rights reserved."
Expand Down
2 changes: 1 addition & 1 deletion tests/test_nbt3102_goea_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
http://www.nature.com/nbt/journal/v33/n2/full/nbt.3102.html#methods
"""

from goatools.test_data.genes_NCBI_10090_ProteinCoding import GeneID2nt as GeneID2nt_mus
from goatools.test_data.genes_NCBI_10090_ProteinCoding import GENEID2NT as GeneID2nt_mus
from goatools.test_data.nature3102_goea import get_geneid2symbol, get_goeaobj

__copyright__ = "Copyright (C) 2016-2018, DV Klopfenstein, H Tang, All rights reserved."
Expand Down
4 changes: 2 additions & 2 deletions tests/test_ncbi_entrez_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
import sys
from collections import defaultdict
from goatools.associations import get_assoc_ncbi_taxids
from goatools.test_data.genes_NCBI_9606_ProteinCoding import GeneID2nt as GeneID2nt_hsa
from goatools.test_data.genes_NCBI_7227_ProteinCoding import GeneID2nt as GeneID2nt_dme
from goatools.test_data.genes_NCBI_9606_ProteinCoding import GENEID2NT as GeneID2nt_hsa
from goatools.test_data.genes_NCBI_7227_ProteinCoding import GENEID2NT as GeneID2nt_dme

def test_ncbi_gene2go(log=sys.stdout):
"""Return GO associations to Entrez GeneIDs. Download if necessary.
Expand Down
2 changes: 1 addition & 1 deletion tests/test_plot_goids.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import collections as cx
from goatools.rpt.goea_nt_xfrm import MgrNtGOEAs # get_goea_nts_all
from goatools.test_data.genes_NCBI_10090_ProteinCoding import GeneID2nt as GeneID2nt_mus
from goatools.test_data.genes_NCBI_10090_ProteinCoding import GENEID2NT as GeneID2nt_mus
from goatools.test_data.nature3102_goea import get_geneid2symbol, get_goeaobj
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.plot import plt_goids
Expand Down
2 changes: 1 addition & 1 deletion tests/test_plot_objgoearesults.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import sys
from goatools.rpt.goea_nt_xfrm import MgrNtGOEAs # get_goea_nts_all
from goatools.test_data.genes_NCBI_10090_ProteinCoding import GeneID2nt as GeneID2nt_mus
from goatools.test_data.genes_NCBI_10090_ProteinCoding import GENEID2NT as GeneID2nt_mus
from goatools.test_data.nature3102_goea import get_geneid2symbol, get_goeaobj
from goatools.gosubdag.plot.plot import plot_results
from goatools.gosubdag.plot.goea_results import GoeaResults
Expand Down
2 changes: 1 addition & 1 deletion tests/test_pvalcalc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from goatools.go_enrichment import GOEnrichmentStudy
from goatools.base import get_godag
from goatools.test_data.genes_NCBI_10090_ProteinCoding import GeneID2nt as GeneID2nt_mus
from goatools.test_data.genes_NCBI_10090_ProteinCoding import GENEID2NT as GeneID2nt_mus
from goatools.test_data.nature3102_goea import get_geneid2symbol
from goatools.associations import get_assoc_ncbi_taxids

Expand Down

0 comments on commit de68020

Please sign in to comment.