In [7]:
from wikidata2df import wikidata2df

query = """

SELECT ?geneLabel ?cellTypeLabel ?processLabel
WHERE 
{
  ?protein wdt:P682 wd:Q1456827. # protein molecular process neurogenesis
  ?protein wdt:P702 ?gene.       # protein encoded by gene
  
  {?gene wdt:P31 wd:Q277338.}    # gene is an instance of a pseudogene 
  UNION                          # or
  {?gene wdt:P31 wd:Q7187.}      # gene is an instance of a gene
  ?gene wdt:P703 wd:Q15978631.   # gene is found in taxon Homo sapiens
  
  ?cellType wdt:P8872 ?gene.     # cell type has marker gene
  
  ?cellType rdfs:label ?cellTypeLabel.
  ?gene   rdfs:label ?geneLabel.
  wd:Q1456827 rdfs:label ?processLabel

  FILTER(LANG(?cellTypeLabel) = "en")
  FILTER(LANG(?geneLabel) = "en")
  FILTER(LANG(?processLabel) = "en")

}
LIMIT 10
"""

neurogenesis = wikidata2df(query)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [9]:
from tabulate import tabulate
print(tabulate(neurogenesis.head(10), tablefmt="pipe", headers="keys",showindex="never"))


| geneLabel   | processLabel   | cellTypeLabel                   |
|:------------|:---------------|:--------------------------------|
| EPHB1       | neurogenesis   | human oligodendrocyte           |
| EPHB1       | neurogenesis   | human osteoclast                |
| OMP         | neurogenesis   | human purkinje neuron           |
| OMP         | neurogenesis   | human olfactory epithelial cell |
| OMP         | neurogenesis   | human neuron                    |
| PCSK9       | neurogenesis   | human delta cell                |
| PCSK9       | neurogenesis   | human loop of Henle cell        |
| CXCR4       | neurogenesis   | human b cell                    |
| CXCR4       | neurogenesis   | human T cell                    |
| CXCR4       | neurogenesis   | human nk cell                   |


In [10]:
query = """

SELECT ?cellTypeLabel ?geneLabel ?diseaseLabel 
WHERE 
{
  wd:Q11085 wdt:P2293 ?diseaseGene.  # Parkinson's disease --> genetic association --> gene
  ?cellType wdt:P8872 ?diseaseGene. # Cell type --> has marker --> gene
  
  ?cellType rdfs:label ?cellTypeLabel.
  wd:Q11085 rdfs:label ?diseaseLabel.
  ?diseaseGene   rdfs:label ?geneLabel.

  FILTER(LANG(?cellTypeLabel) = "en")
  FILTER(LANG(?diseaseLabel) = "en")
  FILTER(LANG(?geneLabel) = "en")
}
LIMIT 5
"""

# https://w.wiki/yQD

parkinson = wikidata2df(query)

In [12]:
print(tabulate(parkinson.head(10), tablefmt="pipe", headers="keys",showindex="never"))

| geneLabel   | diseaseLabel        | cellTypeLabel    |
|:------------|:--------------------|:-----------------|
| BST1        | Parkinson's disease | human b cell     |
| BST1        | Parkinson's disease | human neutrophil |
| RIT2        | Parkinson's disease | human neuron     |
| SH3GL2      | Parkinson's disease | human alpha cell |
| SH3GL2      | Parkinson's disease | human beta cell  |


In [3]:
query = """

SELECT ?cellTypeLabel ?diseaseLabel 
WHERE 
{
  wd:Q101405087 wdt:P8872 ?diseaseGene .    # human pancreatic beta cell -->  has marker -->  gene
  ?disease wdt:P2293 ?diseaseGene .         # disease --> genetic association --> gene 
 }

LIMIT 5
"""

# https://w.wiki/yQE

beta_cell = wikidata2df(query)

HTTPError: 403 Client Error: Forbidden for url: https://query.wikidata.org/sparql?query=%0A%0ASELECT+%3FcellTypeLabel+%3FdiseaseLabel+%0AWHERE+%0A%7B%0A++wd%3AQ101405087+wdt%3AP8872+%3FdiseaseGene+.++++%23+human+pancreatic+beta+cell+--%3E++has+marker+--%3E++gene%0A++%3Fdisease+wdt%3AP2293+%3FdiseaseGene+.+++++++++%23+disease+--%3E+genetic+association+--%3E+gene+%0A+%7D%0A%0ALIMIT+5%0A

In [14]:
print(tabulate(beta_cell.head(10), tablefmt="pipe", headers="keys",showindex="never"))

| diseaseLabel        | genes                   |   count | cellTypeLabel   |
|:--------------------|:------------------------|--------:|:----------------|
| obesity             | PCSK2, ADCYAP1, SLC30A8 |       3 | human beta cell |
| type 2 diabetes     | SLC30A8, TGFBR3         |       2 | human beta cell |
| Parkinson's disease | SH3GL2                  |       1 | human beta cell |
| asthma              | SLC30A8                 |       1 | human beta cell |
| aniridia            | PAX6                    |       1 | human beta cell |


In [2]:
from wikidata2df import wikidata2df

query = """

SELECT ?geneLabel ?cellTypeLabel ?processLabel
WHERE 
{
  ?protein wdt:P682 wd:Q1456827. # protein molecular process neurogenesis
  ?protein wdt:P702 ?gene.       # protein encoded by gene
  
  {?gene wdt:P31 wd:Q277338.}    # gene is an instance of a pseudogene 
  UNION                          # or
  {?gene wdt:P31 wd:Q7187.}      # gene is an instance of a gene
  ?gene wdt:P703 wd:Q15978631.   # gene is found in taxon Homo sapiens
  
  ?cellType wdt:P8872 ?gene.     # cell type has marker gene
  
  ?cellType rdfs:label ?cellTypeLabel.
  ?gene   rdfs:label ?geneLabel.
  wd:Q1456827 rdfs:label ?processLabel

  FILTER(LANG(?cellTypeLabel) = "en")
  FILTER(LANG(?geneLabel) = "en")
  FILTER(LANG(?processLabel) = "en")

}
LIMIT 10
"""

neurogenesis = wikidata2df(query)

HTTPError: 403 Client Error: Forbidden for url: https://query.wikidata.org/sparql?query=%0A%0ASELECT+%3FgeneLabel+%3FcellTypeLabel+%3FprocessLabel%0AWHERE+%0A%7B%0A++%3Fprotein+wdt%3AP682+wd%3AQ1456827.+%23+protein+molecular+process+neurogenesis%0A++%3Fprotein+wdt%3AP702+%3Fgene.+++++++%23+protein+encoded+by+gene%0A++%0A++%7B%3Fgene+wdt%3AP31+wd%3AQ277338.%7D++++%23+gene+is+an+instance+of+a+pseudogene+%0A++UNION++++++++++++++++++++++++++%23+or%0A++%7B%3Fgene+wdt%3AP31+wd%3AQ7187.%7D++++++%23+gene+is+an+instance+of+a+gene%0A++%3Fgene+wdt%3AP703+wd%3AQ15978631.+++%23+gene+is+found+in+taxon+Homo+sapiens%0A++%0A++%3FcellType+wdt%3AP8872+%3Fgene.+++++%23+cell+type+has+marker+gene%0A++%0A++%3FcellType+rdfs%3Alabel+%3FcellTypeLabel.%0A++%3Fgene+++rdfs%3Alabel+%3FgeneLabel.%0A++wd%3AQ1456827+rdfs%3Alabel+%3FprocessLabel%0A%0A++FILTER%28LANG%28%3FcellTypeLabel%29+%3D+%22en%22%29%0A++FILTER%28LANG%28%3FgeneLabel%29+%3D+%22en%22%29%0A++FILTER%28LANG%28%3FprocessLabel%29+%3D+%22en%22%29%0A%0A%7D%0ALIMIT+10%0A

In [12]:
import pandas as pd 

hs = pd.read_csv("types_hs.csv")
print(hs.to_markdown())

|    | cell_type                                 | cell_typeLabel         |   marker_count |
|---:|:------------------------------------------|:-----------------------|---------------:|
|  0 | http://www.wikidata.org/entity/Q101405035 | human interneuron      |            216 |
|  1 | http://www.wikidata.org/entity/Q101405104 | human neuron           |            203 |
|  2 | http://www.wikidata.org/entity/Q68621315  | human endothelial cell |            187 |
|  3 | http://www.wikidata.org/entity/Q101404861 | human fibroblast       |            170 |
|  4 | http://www.wikidata.org/entity/Q101405101 | human hepatocyte       |            149 |


In [18]:
mm = pd.read_csv("types_mm.csv")
print(mm.to_markdown())

|    | cell_type                                 | cell_typeLabel                |   marker_count |
|---:|:------------------------------------------|:------------------------------|---------------:|
|  0 | http://www.wikidata.org/entity/Q102426621 | mouse neocortical interneuron |            219 |
|  1 | http://www.wikidata.org/entity/Q104416243 | mouse interneuron             |            219 |
|  2 | http://www.wikidata.org/entity/Q104416303 | mouse neuron                  |            210 |
|  3 | http://www.wikidata.org/entity/Q104416178 | mouse endothelial cell        |            188 |
|  4 | http://www.wikidata.org/entity/Q104416140 | mouse fibroblast              |            176 |
