# Quickstart

In this notebook, we'll introduce the API that defines entities species, genes, and proteins.

It shows how to 

- query different semantic representations based on underlying ontologies
- standardize columns in `DataFrame` objects

In [None]:
import bionty as bt
import pandas as pd

In [None]:
%load_ext autoreload
%autoreload 2

## Species

In [None]:
sp = bt.Taxon(species="human")

Listing species fields

In [None]:
sp.fields

The field of standardized id

In [None]:
sp.std_id

Searching for a field

In [None]:
sp.search("scientific_name"), sp.search("assembly")

## Gene

In [None]:
gn = bt.Gene(species="human")

Listing gene related fields

In [None]:
gn.fields

The field of standardized id

In [None]:
gn.std_id

Mapping between fields

In [None]:
hgnc_ids = ["HGNC:1100", "HGNC:1101"]
ensembl_ids = ["ENSG00000012048", "ENSG00000139618"]

In [None]:
# default is to convert into .std_id

gn.search(ensembl_ids, id_type_from="ensembl.gene_id")

In [None]:
# OR you can convert between any two of the attributes

gn.search(["BRCA1", "BRCA2"], id_type_from="hgnc_symbol", id_type_to="entrez.gene_id")

Standardizing gene symbols

In [None]:
# default is to standardizing gene symbols

df = pd.DataFrame(index=["RNF53", "BRCA2", "FakeGene"])
gn.standardize(df)

df

In [None]:
# can also input e.g. ensembl id

df = pd.DataFrame(index=["ENSG00000012048", "ENSG00000139618"])
gn.standardize(df, id_type="ensembl.gene_id")

df

## Protein

In [None]:
pt = bt.Protein(species="human")

In [None]:
pt.fields

In [None]:
pt.std_id

In [None]:
uniprot_ids = ["P40925", "P40926", "O43175", "Q9UM73"]

pt.search(uniprot_ids, id_type_from="UNIPROT_ID", id_type_to="CHEMBL_ID")

## Celltype

In [None]:
ct = bt.Celltype()

In [None]:
ct.onto

In [None]:
ct.onto_dict["CL_0002000"]

In [None]:
ct.search("T cell")