Skip to content

Commit

Permalink
Change: Move NCBI db initialization to inside Lineage
Browse files Browse the repository at this point in the history
Hopefully this will prevent the download of the whole `NCBI` database on
`readthedocs`
  • Loading branch information
dileep-kishore committed Feb 4, 2021
1 parent 18e6beb commit 6037fdd
Showing 1 changed file with 97 additions and 97 deletions.
194 changes: 97 additions & 97 deletions micone/main/lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,24 @@

BaseLineage = namedtuple("Lineage", "Kingdom Phylum Class Order Family Genus Species")

NCBI = NCBITaxa()


class Lineage(BaseLineage):
"""
`NamedTuple` that stores the lineage of a taxon and methods to interact with it
Attributes
----------
Kingdom: str
Phylum: str
Class: str
Order: str
Family: str
Genus: str
Species: str
`NamedTuple` that stores the lineage of a taxon and methods to interact with it
Attributes
----------
Kingdom: str
Phylum: str
Class: str
Order: str
Family: str
Genus: str
Species: str
"""

NCBI = NCBITaxa()

def __new__(
cls,
Kingdom: str = "",
Expand All @@ -56,16 +56,16 @@ def __new__(
@staticmethod
def _normalize_tax(tax: str) -> str:
"""
Normalize taxonomy name by removing unwanted characters
Normalize taxonomy name by removing unwanted characters
Parameters
----------
tax : str
Parameters
----------
tax : str
Returns
-------
str
Normalized taxonomy name
Returns
-------
str
Normalized taxonomy name
"""
return (
tax.strip()
Expand All @@ -77,16 +77,16 @@ def _normalize_tax(tax: str) -> str:

def __sub__(self, other: "Lineage") -> "Lineage":
"""
Returns the lineage that is in common between two lineages
Returns the lineage that is in common between two lineages
Parameters
----------
other : "Lineage"
Parameters
----------
other : "Lineage"
Returns
-------
Lineage
Common lineage
Returns
-------
Lineage
Common lineage
"""
for i, (s_lin, o_lin) in enumerate(zip(self, other)):
if s_lin != o_lin:
Expand All @@ -96,12 +96,12 @@ def __sub__(self, other: "Lineage") -> "Lineage":
@property
def name(self) -> Tuple[str, str]:
"""
Get the lowest populated level and name of the taxon
Get the lowest populated level and name of the taxon
Returns
-------
Tuple[str, str]
Tuple containing (level, name)
Returns
-------
Tuple[str, str]
Tuple containing (level, name)
"""
fields = self._fields
for field in reversed(fields):
Expand All @@ -114,20 +114,20 @@ def name(self) -> Tuple[str, str]:
@classmethod
def from_str(cls, lineage_str: str, style: str = "gg") -> "Lineage":
"""
Create `Lineage` instance from a lineage string
Parameters
----------
lineage_str : str
Lineage in the form of a string
style : {'gg', 'silva'}, optional
The style of the lineage string
Default is 'gg'
Returns
-------
Lineage
Instance of the `Lineage` class
Create `Lineage` instance from a lineage string
Parameters
----------
lineage_str : str
Lineage in the form of a string
style : {'gg', 'silva'}, optional
The style of the lineage string
Default is 'gg'
Returns
-------
Lineage
Instance of the `Lineage` class
"""
if style == "gg":
if lineage_str.startswith("k"):
Expand All @@ -150,18 +150,18 @@ def from_str(cls, lineage_str: str, style: str = "gg") -> "Lineage":

def to_str(self, style: str, level: str) -> str:
"""
Return the string Lineage of the instance in requested 'style'
Parameters
----------
style : {'gg', 'silva'}
The style of the lineage string
level : str
The lowest Lineage field that is to be populated
Returns
-------
str
Return the string Lineage of the instance in requested 'style'
Parameters
----------
style : {'gg', 'silva'}
The style of the lineage string
level : str
The lowest Lineage field that is to be populated
Returns
-------
str
"""
if level not in self._fields:
raise ValueError(f"{level} not a valid field for Lineage")
Expand All @@ -179,23 +179,23 @@ def to_str(self, style: str, level: str) -> str:

def __str__(self) -> str:
"""
Get the lineage in the form of a string
Get the lineage in the form of a string
Returns
-------
str
The lineage string in 'gg' format
Returns
-------
str
The lineage string in 'gg' format
"""
return self.to_str(style="gg", level="Species")

def to_dict(self, level: str) -> Dict[str, str]:
"""
Get the lineage in the form of a dictionary
Get the lineage in the form of a dictionary
Parameters
----------
level : str
The lowest Lineage field to be used to populate the dictionary
Parameters
----------
level : str
The lowest Lineage field to be used to populate the dictionary
"""
if level not in self._fields:
raise ValueError(f"{level} not a valid field for Lineage")
Expand All @@ -205,17 +205,17 @@ def to_dict(self, level: str) -> Dict[str, str]:

def get_superset(self, level: str) -> "Lineage":
"""
Return a superset of the current lineage for the requested level
Return a superset of the current lineage for the requested level
Parameters
----------
level : str
The lowest Lineage field to be used to calculate the superset
Parameters
----------
level : str
The lowest Lineage field to be used to calculate the superset
Returns
-------
Lineage
Lineage instance that is a superset of current instance
Returns
-------
Lineage
Lineage instance that is a superset of current instance
"""
if level not in self._fields:
raise ValueError(f"{level} not a valid field for Lineage")
Expand All @@ -226,19 +226,19 @@ def get_superset(self, level: str) -> "Lineage":
@property
def taxid(self) -> Tuple[str, int]:
"""
Get the NCBI taxonomy id of the Lineage
Get the NCBI taxonomy id of the Lineage
Returns
-------
Tuple[str, int]
A tuple containing (taxonomy level, NCBI taxonomy id)
Returns
-------
Tuple[str, int]
A tuple containing (taxonomy level, NCBI taxonomy id)
"""
query = list(self)
# species or subspecies level
query.append(query[-2] + " " + query[-1].strip())
# species level
query[-2] = query[-3] + " " + query[-2].split(" ")[0].strip()
taxid_dict = NCBI.get_name_translator(query)
taxid_dict = self.NCBI.get_name_translator(query)
taxid_list = [12908]
for taxa in reversed(query):
if taxa != "" and taxa in taxid_dict:
Expand All @@ -262,22 +262,22 @@ def taxid(self) -> Tuple[str, int]:
@classmethod
def from_taxid(cls, taxid: int) -> "Lineage":
"""
Create `Lineage` instance from taxid
Create `Lineage` instance from taxid
Parameters
----------
taxid : int
A valid NCBI taxonomy id
Parameters
----------
taxid : int
A valid NCBI taxonomy id
Returns
-------
"Lineage"
Instance of the `Lineage` class
Returns
-------
"Lineage"
Instance of the `Lineage` class
"""
lineage_taxids = NCBI.get_lineage(taxid)
lineage_names = NCBI.get_taxid_translator(lineage_taxids)
lineage_taxids = self.NCBI.get_lineage(taxid)
lineage_names = self.NCBI.get_taxid_translator(lineage_taxids)
lineage_ranks = {
v.capitalize(): k for k, v in NCBI.get_rank(lineage_taxids).items()
v.capitalize(): k for k, v in self.NCBI.get_rank(lineage_taxids).items()
}
if "Superkingdom" in lineage_ranks:
lineage_ranks["Kingdom"] = lineage_ranks["Superkingdom"]
Expand Down

0 comments on commit 6037fdd

Please sign in to comment.