In [None]:
import os

from dataclasses import dataclass, field
from data_io.bio_data_query import PubMedClient, PubMedQuery
from data_io.bio_data_query import StringDBClient
from hydra.core.config_store import ConfigStore
from hydra import compose, initialize
from omegaconf import OmegaConf
from llm.llm_messenger import LLMMessenger


In [None]:
@dataclass
class PubmedTerms:
    terms: list[str] = field(default_factory=list)
    fields: list[str] = field(default_factory=list)
    retmax: int = field(default_factory=int)


@dataclass
class StringTerms:
    gene_names: list[str] = field(default_factory=list)
    species: int = 0


@dataclass
class LlmConfig:
    model_specification: str = ""
    instructions: str = ""


@dataclass
class ConfigData:
    pubmed_terms: PubmedTerms = field(default_factory=PubmedTerms)
    string_terms: StringTerms = field(default_factory=StringTerms)
    llm_config: LlmConfig = field(default_factory=LlmConfig)


cs = ConfigStore.instance()
cs.store(name="config_schema", node=ConfigData)


In [None]:
os.getcwd()
# Initialize Hydra and load the config
with initialize(version_base=None, config_path="./cfg"):
    cfg = compose(config_name="lilrb2")

# Now you can use the config
print(OmegaConf.to_yaml(cfg))
config_obj = OmegaConf.to_object(cfg)

In [None]:
pubmed_client = PubMedClient()
pubmed_query = PubMedQuery(
    terms=cfg.pubmed_terms.terms, fields=cfg.pubmed_terms.fields
)
pubmed_query_response = pubmed_client.search(pubmed_query, retmax=cfg.pubmed_terms.retmax)

llm = LLMMessenger()
string_db_client = StringDBClient()
string_response = string_db_client.search(
    terms=cfg.string_terms.gene_names, species=cfg.string_terms.species
)
string_response_list_str = string_db_client.response_to_strings(string_response)

In [None]:
# pubmed_response is str, strings_response_list_Str is list of strs
# need to get the instructions, form the query
instructions = llm._get_hypothesis_instructions(cfg.llm_config.instructions)
for line in instructions:
    print(line)

context = [pubmed_query_response]
context.extend(string_response_list_str)
query = [f"Considering the evidence in the context, what are some plausible hypotheses for the role of {cfg.string_terms.gene_names[0]} in the manifestation of {cfg.pubmed_terms.terms[0]}?"]

In [None]:
response = llm.generate_response(query=query, model_specification=cfg.llm_config.model_specification, context=context, instructions=instructions)

In [None]:
print(response.text)