In [None]:
import sys
import os

# Add the parent directory (graphrag_pipeline) to the Python path (needed for importing
# modules in parent directory)
parent_dir = os.path.dirname(os.getcwd())
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

# Utilities
from library.kg_builder.utilities.gemini_llm import GeminiLLM
from pydantic import RootModel, BaseModel, Field
from typing import List
import os
from dotenv import load_dotenv
import json

In [None]:
# Find path to config_files folder
config_files_path = os.path.join(os.path.dirname(os.getcwd()), 'config_files')

# Load environment variables from .env file
load_dotenv(os.path.join(config_files_path, '.env'), override=True)

# Open configuration file from JSON format
config_path = os.path.join(config_files_path, 'kg_building_config.json')  # Configuration file of the knowledge graph builder
with open(config_path, 'r') as kg_build_config_file:
    build_config = json.load(kg_build_config_file)
config_path = os.path.join(config_files_path, 'kg_retrieval_config.json')  # Configuration file of the knowledge graph retriever
with open(config_path, 'r') as kg_retr_config_file:
    retr_config = json.load(kg_retr_config_file)
config_path = os.path.join(config_files_path, 'graphrag_config.json')  # Configuration file of the GraphRAG
with open(config_path, 'r') as graphrag_config_file:
    graphrag_config = json.load(graphrag_config_file)

# Neo4j connection
neo4j_uri = os.getenv('NEO4J_URI')
neo4j_username = os.getenv('NEO4J_USERNAME')
neo4j_password = os.getenv('NEO4J_PASSWORD')
gemini_api_key = os.getenv('GEMINI_API_KEY')

In [None]:
llm = GeminiLLM(
    model_name="gemini-2.5-flash",
    google_api_key=gemini_api_key
)

response = llm.invoke(input="What is the capital of France?")

print(response)

content='The capital of France is **Paris**.' parsed=None


In [None]:
class Claims(RootModel[List[str]]):
        """
        Represents a list of verifiable claims.
        The root of the model is a list of strings.
        """
        root: List[str] = Field(
            description="A list of verifiable claims, where each claim is a self-contained, atomic statement that can be checked for accuracy."
        )

llm = GeminiLLM(
    model_name="gemini-2.5-flash",
    google_api_key=gemini_api_key,
    model_params={
        "response_mime_type": "application/json",
        "response_schema": Claims
    }
)

answer = llm.invoke(input="What are the verifiable claims in the following text? 'The capital of France is Paris. The Eiffel Tower is in Paris.'")

print(answer)

print(answer.content)

print(answer.parsed)

print(answer.parsed.root)

content='[\n  "The capital of France is Paris.",\n  "The Eiffel Tower is in Paris."\n]' parsed=Claims(root=['The capital of France is Paris.', 'The Eiffel Tower is in Paris.'])
[
  "The capital of France is Paris.",
  "The Eiffel Tower is in Paris."
]
root=['The capital of France is Paris.', 'The Eiffel Tower is in Paris.']
['The capital of France is Paris.', 'The Eiffel Tower is in Paris.']


In [None]:
from google import genai

client = genai.Client(api_key=gemini_api_key)
response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents="What are the verifiable claims in the following text? 'The capital of France is Paris. The Eiffel Tower is in Paris.'",
    config={
        "response_mime_type": "application/json",
        "response_schema": Claims,
    }
)

print(response)

print(response.text)

print(response.parsed.root)

candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, inline_data=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, text='[\n  "The capital of France is Paris.",\n  "The Eiffel Tower is in Paris."\n]')], role='model'), citation_metadata=None, finish_message=None, token_count=None, finish_reason=<FinishReason.STOP: 'STOP'>, url_context_metadata=None, avg_logprobs=None, grounding_metadata=None, index=0, logprobs_result=None, safety_ratings=None)] create_time=None response_id=None model_version='gemini-2.5-flash' prompt_feedback=None usage_metadata=GenerateContentResponseUsageMetadata(cache_tokens_details=None, cached_content_token_count=None, candidates_token_count=23, candidates_tokens_details=None, prompt_token_count=26, prompt_tokens_details=[ModalityTokenCount(modality=<MediaModality.TEXT: 'TEXT'>, token_count=26)], thoughts_token_count=77, tool_use_prompt_token_count=None, tool_use_promp

In [16]:
response.candidates[0]

Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, inline_data=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, text='[\n  "The capital of France is Paris.",\n  "The Eiffel Tower is in Paris."\n]')], role='model'), citation_metadata=None, finish_message=None, token_count=None, finish_reason=<FinishReason.STOP: 'STOP'>, url_context_metadata=None, avg_logprobs=None, grounding_metadata=None, index=0, logprobs_result=None, safety_ratings=None)

In [None]:
type(response)

str

In [18]:
type(response.parsed.root)

list

In [None]:
class QuestionsBase(BaseModel):
    """
    Represents a dictionary of claims (keys) and questions (values).
    The root of the model is a dictionary where each key is a claim and the value is a list of questions related to that claim.
    """
    claim: str = Field(
        description="A verifiable claim for which questions are being asked."
        )
    questions: List[str] = Field(
        description="A list of questions related to the claim."
        )

llm = GeminiLLM(
    model_name="gemini-2.5-flash",
    google_api_key=gemini_api_key,
    model_params={
        "response_mime_type": "application/json",
        "response_schema": QuestionsBase
    }
)

answer = llm.invoke(input="What are the verifiable claims in the following text? 'The capital of France is Paris. The Eiffel Tower is in Paris.' Generate questions for each claim.")

content='{\n  "claim": "The capital of France is Paris.",\n  "questions": [\n    "What is the capital of France?",\n    "Is Paris the capital of France?",\n    "Which city is known as the capital of France?"\n  ]\n}' parsed=QuestionsBase(claim='The capital of France is Paris.', questions=['What is the capital of France?', 'Is Paris the capital of France?', 'Which city is known as the capital of France?'])
{
  "claim": "The capital of France is Paris.",
  "questions": [
    "What is the capital of France?",
    "Is Paris the capital of France?",
    "Which city is known as the capital of France?"
  ]
}


In [11]:
print(answer)

print(answer.content)

print(type(dict(answer.parsed)))

content='{\n  "claim": "The capital of France is Paris.",\n  "questions": [\n    "What is the capital of France?",\n    "Is Paris the capital of France?",\n    "Which city is known as the capital of France?"\n  ]\n}' parsed=QuestionsBase(claim='The capital of France is Paris.', questions=['What is the capital of France?', 'Is Paris the capital of France?', 'Which city is known as the capital of France?'])
{
  "claim": "The capital of France is Paris.",
  "questions": [
    "What is the capital of France?",
    "Is Paris the capital of France?",
    "Which city is known as the capital of France?"
  ]
}
<class 'dict'>
