Testing [Zooma API](https://www.ebi.ac.uk/spot/zooma/docs/api)

In [78]:
import requests
import json
from urllib.parse import urlparse
from datatools.utils.utils import apply_to_list
from dataclasses import dataclass
from typing import List, Dict, Any, Optional, Tuple
import pandas as pd

In [4]:
ssl_pem_file_path = "/path/to/certs/ca-certificates.crt"

In [164]:
@apply_to_list
def parse_ontology_term(url: str) -> Tuple[str, str]:
    """
    Example:
        ontology_name, term_name = parse_ontology_term('http://purl.obolibrary.org/obo/MONDO_0004992')
        print(f"Ontology Name: {ontology_name}, Term Name: {term_name}")
    """

    parsed_url = urlparse(url)
    path_parts = parsed_url.path.split("/")
    ontology_name = path_parts[-2]
    term_name = path_parts[-1]
    return {"Ontology": ontology_name, "Term": term_name}


class zooma:
    def __init__(self, ssl_pem_file_path):
        self.BASE_URL = "https://www.ebi.ac.uk/spot/zooma/v2/api"
        self.ssl_pem_file_path = ssl_pem_file_path

    def is_valid_url(url):
        try:
            result = urlparse(url)
            return all([result.scheme, result.netloc])
        except ValueError:
            return False

    def join_url(self, *args):
        return "/".join(map(lambda x: str(x).strip("/"), args))

    def handle_request(self, method, endpoint, params=None):
        url = self.join_url(self.BASE_URL, endpoint)
        try:
            if method == "GET":
                response = requests.get(
                    url, params=params, verify=self.ssl_pem_file_path
                )
            elif method == "POST":
                response = requests.post(
                    url, json=params, verify=self.ssl_pem_file_path
                )
            else:
                raise ValueError("Unsupported HTTP method")

            response.raise_for_status()
            return response
        
        except requests.exceptions.RequestException as e:
            print(f"Request failed: {e}")
            return response

    def general_get(self, endpoint, params):
        response = self.handle_request("GET", endpoint, params)
        return response
    
    def response_to_df(self, response):
        
        data = json.loads(response.text)
        print("Number of results: ", len(data))
        print("Results: ")
        combined_dict = [
            {
                **d["annotatedProperty"],
                **parse_ontology_term(d["semanticTags"])[0],
                "Confidence": d["confidence"],
            }
            for d in data
        ]
        results = pd.DataFrame(combined_dict).sort_values(
            "Confidence", ascending=True
        )
        
        results.dropna(how='all', inplace=True, axis=1)
        print(results)
        
        return results


    def get_annotations(self, term):
        # predicting annotations

        endpoint = "/services/annotate"

        print("Term: ", term)
        term = term.replace(" ", "+").lower()

        params = {"propertyValue": term}

        response = self.handle_request("GET", endpoint, params)


        if response.status_code == 200:
            data = json.loads(response.text)
            results = self.response_to_df(response)

            return data, results
        else:

            return None


zooma_worker = zooma(ssl_pem_file_path)

data = zooma_worker.get_annotations("confocal microscopy")

Term:  confocal microscopy
Number of results:  1
Results: 
         propertyValue Ontology         Term Confidence
0  Confocal Microscopy      obo  NCIT_C17753       GOOD


In [165]:
data = zooma_worker.get_annotations("C3HeB")

Term:  C3HeB
Number of results:  2
Results: 
     propertyValue Ontology         Term Confidence
0  C3HeB/FeJ Mouse      obo  NCIT_C37374     MEDIUM
1        C3HeB/FeJ      efo  EFO_0022465     MEDIUM


In [171]:
data = zooma_worker.get_annotations("Lung")

Term:  Lung
Number of results:  1
Results: 
                                                 uri   propertyType  \
0  http://rdf.ebi.ac.uk/resource/zooma/031E0DDD6C...  organism part   

  propertyValue Ontology            Term Confidence  
0          lung      obo  UBERON_0002048       HIGH  


In [169]:
data = zooma_worker.get_annotations("tuberculosis")

Term:  tuberculosis
Number of results:  1
Results: 
                                                 uri propertyType  \
0  http://rdf.ebi.ac.uk/resource/zooma/C046A5C0E5...      disease   

  propertyValue Ontology           Term Confidence  
0  tuberculosis     ORDO  Orphanet_3389       HIGH  


In [167]:
data = zooma_worker.get_annotations("channel number")

Term:  channel number
Number of results:  1
Results: 
                                      propertyValue Ontology            Term  \
0  increased number of primordial hindbrain channel      obo  UPHENO_0014578   

  Confidence  
0       GOOD  


In [163]:
data = zooma_worker.get_annotations("cd45")

Term:  cd45
Number of results:  2
Results: 
                                                 uri         propertyType  \
0  http://rdf.ebi.ac.uk/resource/zooma/F7C4421AF6...  selected cell types   
1  http://rdf.ebi.ac.uk/resource/zooma/04B696E33A...               strain   

  propertyValue Ontology         Term Confidence  
0         CD45+      obo   CL_0000738     MEDIUM  
1          CD-1      efo  EFO_0005180     MEDIUM  


In [161]:
data = zooma_worker.general_get('/services/annotate', {'propertyType': 'cell type', 'propertyValue': 'CD11'})

In [162]:
zooma_worker.response_to_df(data)

Number of results:  1
Results: 
                                                 uri propertyType  \
0  http://rdf.ebi.ac.uk/resource/zooma/04B696E33A...       strain   

  propertyValue Ontology         Term Confidence  
0          CD-1      efo  EFO_0005180       GOOD  


Unnamed: 0,uri,propertyType,propertyValue,Ontology,Term,Confidence
0,http://rdf.ebi.ac.uk/resource/zooma/04B696E33A...,strain,CD-1,efo,EFO_0005180,GOOD


In [150]:
params = {
    'limit': 30
}

endpoint = "properties/types"

response = zooma_worker.general_get(endpoint, params)
response.json()

['compound',
 'developmental stage',
 'disease',
 'cell type',
 'cell line',
 'inferred cell type - ontology labels',
 'organism',
 'growth condition',
 'inferred cell type - authors labels',
 'strain',
 'treatment',
 'organism part',
 'ploidy',
 'sampling site',
 'sex',
 'disease state',
 'diet',
 'ethnic origin',
 'cohort',
 'physical activity',
 'inferred cell type',
 'infect',
 'environmental stress',
 'biosource type',
 'specimen with known storage state',
 'rna interference',
 'phenotype',
 'ecotype',
 'disease staging',
 'organismpart']