In [1]:
import os

from dataclasses import dataclass, field
from data_io.bio_data_query import PubMedClient, PubMedQuery
from data_io.bio_data_query import StringDBClient
from hydra.core.config_store import ConfigStore
from hydra import compose, initialize
from omegaconf import OmegaConf
from llm.llm_messenger import LLMMessenger


In [2]:
@dataclass
class PubmedTerms:
    terms: list[str] = field(default_factory=list)
    fields: list[str] = field(default_factory=list)


@dataclass
class StringTerms:
    gene_names: list[str] = field(default_factory=list)
    species: int = 0


@dataclass
class LlmConfig:
    model_specification: str = ""
    instructions: str = ""


@dataclass
class ConfigData:
    pubmed_terms: PubmedTerms = field(default_factory=PubmedTerms)
    string_terms: StringTerms = field(default_factory=StringTerms)
    llm_config: LlmConfig = field(default_factory=LlmConfig)


cs = ConfigStore.instance()
cs.store(name="config_schema", node=ConfigData)


In [3]:
os.getcwd()
# Initialize Hydra and load the config
with initialize(version_base=None, config_path="./cfg"):
    cfg = compose(config_name="lilrb2")

# Now you can use the config
print(OmegaConf.to_yaml(cfg))
config_obj = OmegaConf.to_object(cfg)

pubmed_terms:
  terms:
  - Nonalcoholic Steatohepatitis
  - NASH
  - LILRB2
  fields:
  - tiab
  - tiab
  - tw
string_terms:
  gene_names:
  - LILRB2
  species: 9606
llm_config:
  model_specification: models/gemini-1.5-flash
  instructions: ../data/hypothesis_prompt_instructions.txt



In [6]:
pubmed_client = PubMedClient()
pubmed_query = PubMedQuery(
    terms=cfg.pubmed_terms.terms, fields=cfg.pubmed_terms.fields
)
pubmed_query_response = pubmed_client.search(pubmed_query)

llm = LLMMessenger()
string_db_client = StringDBClient()
string_response = string_db_client.search(
    terms=cfg.string_terms.gene_names, species=cfg.string_terms.species
)
string_response_list_str = string_db_client.response_to_strings(string_response)

In [None]:
# pubmed_response is str, strings_response_list_Str is list of strs
# need to get the instructions, form the 

str

In [None]:
# 

["{'stringId_A': '9606.ENSP00000375629', 'stringId_B': '9606.ENSP00000366024', 'preferredName_A': 'LILRB2', 'preferredName_B': 'HLA-G', 'ncbiTaxonId': '9606', 'score': '0.999', 'nscore': '0', 'fscore': '0', 'pscore': '0', 'ascore': '0', 'escore': '0.955', 'dscore': '0.5', 'tscore': '0.982'}",
 "{'stringId_A': '9606.ENSP00000375629', 'stringId_B': '9606.ENSP00000379873', 'preferredName_A': 'LILRB2', 'preferredName_B': 'HLA-A', 'ncbiTaxonId': '9606', 'score': '0.984', 'nscore': '0', 'fscore': '0', 'pscore': '0', 'ascore': '0', 'escore': '0.405', 'dscore': '0.5', 'tscore': '0.953'}",
 "{'stringId_A': '9606.ENSP00000375629', 'stringId_B': '9606.ENSP00000259951', 'preferredName_A': 'LILRB2', 'preferredName_B': 'HLA-F', 'ncbiTaxonId': '9606', 'score': '0.983', 'nscore': '0', 'fscore': '0', 'pscore': '0', 'ascore': '0', 'escore': '0.549', 'dscore': '0.5', 'tscore': '0.933'}",
 "{'stringId_A': '9606.ENSP00000375629', 'stringId_B': '9606.ENSP00000399168', 'preferredName_A': 'LILRB2', 'preferred