# Setup

In [None]:
import os
import sys 

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../..')))

from app_settings import PROJECT_ROOT
from src.utils import read_test_queries

os.chdir(PROJECT_ROOT)
print(f"Changed working dir to {PROJECT_ROOT}")

Changed working dir to C:\Users\tiago\Documents\Granter ai Internship\Implementation\Code\KGs_for_Vertical_AI


In [7]:
questions = read_test_queries(os.path.join(PROJECT_ROOT, 'data', 'test_queries.txt'))
questions[:5]

["What is Granter.ai's main business activity?",
 'Which AI methodologies/technological features does Granter.ai use in its solution?',
 "What is the ultimate goal of Granter.ai's project?",
 "What is Granter.ai's mission?",
 "What is the business model for Granter.ai's solution?"]

In [8]:
from langchain_openai import AzureChatOpenAI
from app_settings import (
    AZURE_OPENAI_API_KEY,
    AZURE_OPENAI_ENDPOINT,
    AZURE_OPENAI_API_VERSION,
    AZURE_DEPLOYMENT_GPT41_NANO
)

gpt41_nano = AzureChatOpenAI(
    azure_deployment=AZURE_DEPLOYMENT_GPT41_NANO,
    api_version=AZURE_OPENAI_API_VERSION,
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_key=AZURE_OPENAI_API_KEY
)

# Vector RAG

In [4]:
from src.rag.rag_tests import generate_response

In [5]:
results_dir = "results/answers/vector_rag"
os.makedirs(results_dir, exist_ok=True) # create/check results folder
output_file = os.path.join(results_dir, "rag_results.txt")

for i,question in enumerate(questions):
    print(f'Generating answer for question {i+1}...')
    answer = generate_response(question, k=3)

    with open(output_file, 'a', encoding='utf-8') as f:
        f.write(f"Question {i+1}:\n{question}\n")
        f.write(f"Answer:\n{answer}\n")
        f.write("\n" + "-" * 80 + "\n")

Generating answer for question 1...
Generating answer for question 2...
Generating answer for question 3...
Generating answer for question 4...
Generating answer for question 5...
Generating answer for question 6...
Generating answer for question 7...
Generating answer for question 8...
Generating answer for question 9...
Generating answer for question 10...
Generating answer for question 11...
Generating answer for question 12...
Generating answer for question 13...
Generating answer for question 14...
Generating answer for question 15...
Generating answer for question 16...
Generating answer for question 17...
Generating answer for question 18...
Generating answer for question 19...
Generating answer for question 20...


# Graph RAG - rdb ontology KG

In [9]:
from src.g_retriever import retrieve

## Without chunks

In [10]:
results_dir = "results/answers/rdb_ontology"
os.makedirs(results_dir, exist_ok=True) # create/check results folder
output_file = os.path.join(results_dir, "rdb_results.txt")

for i,question in enumerate(questions):
    print(f'Generating answer for question {i+1}...')

    retrieved = retrieve(
        graph_csv='results/KGs/rdb_ontology_kg_nochunks.csv',
        question=question,
        embed_model='paraphrase-multilingual-MiniLM-L12-v2'
    )

    prompt = f"""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the user question
        - Include all information that is relevant to the question
        - Keep your response clear and concise. Aim for 3 sentences or less, but you can include more if needed to cover all the relevant information
        - If the answer is not in the context, say "I don't know"
        - Do not add information that isn't supported by the context
    Context:
    {retrieved}

    Question: {question}"""

    answer = gpt41_nano.invoke(prompt).content

    with open(output_file, 'a', encoding='utf-8') as f:
        f.write(f"Question {i+1}:\n{question}\n")
        f.write(f"Answer:\n{answer}\n")
        f.write("\n" + "-" * 80 + "\n")

Generating answer for question 1...


Batches: 100%|██████████| 1/1 [00:01<00:00,  1.06s/it]


Generating answer for question 2...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.41it/s]


Generating answer for question 3...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.44it/s]


Generating answer for question 4...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.32it/s]


Generating answer for question 5...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.79it/s]


Generating answer for question 6...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.22it/s]


Generating answer for question 7...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.42it/s]


Generating answer for question 8...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.51it/s]


Generating answer for question 9...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.79it/s]


Generating answer for question 10...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.25it/s]


Generating answer for question 11...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.38it/s]


Generating answer for question 12...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.28it/s]


Generating answer for question 13...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.04it/s]


Generating answer for question 14...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.00it/s]


Generating answer for question 15...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.29it/s]


Generating answer for question 16...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.86it/s]


Generating answer for question 17...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.33it/s]


Generating answer for question 18...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.74it/s]


Generating answer for question 19...


Batches: 100%|██████████| 1/1 [00:01<00:00,  1.22s/it]


Generating answer for question 20...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.38it/s]


## With Chunks

In [11]:
results_dir = "results/answers/rdb_ontology"
os.makedirs(results_dir, exist_ok=True) # create/check results folder
output_file = os.path.join(results_dir, "rdb_chunks_results.txt")

for i,question in enumerate(questions):
    print(f'Generating answer for question {i+1}...')

    retrieved = retrieve(
        graph_csv='results/KGs/rdb_ontology_kg_chunks.csv',
        question=question,
        embed_model='paraphrase-multilingual-MiniLM-L12-v2'
    )

    prompt = f"""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the user question
        - Include all information that is relevant to the question
        - Keep your response clear and concise. Aim for 3 sentences or less, but you can include more if needed to cover all the relevant information
        - If the answer is not in the context, say "I don't know"
        - Do not add information that isn't supported by the context
    Context:
    {retrieved}

    Question: {question}"""

    answer = gpt41_nano.invoke(prompt).content

    with open(output_file, 'a', encoding='utf-8') as f:
        f.write(f"Question {i+1}:\n{question}\n")
        f.write(f"Answer:\n{answer}\n")
        f.write("\n" + "-" * 80 + "\n")

Generating answer for question 1...


Batches: 100%|██████████| 1/1 [00:06<00:00,  6.00s/it]


Generating answer for question 2...


Batches: 100%|██████████| 1/1 [00:07<00:00,  7.63s/it]


Generating answer for question 3...


Batches: 100%|██████████| 1/1 [00:07<00:00,  7.18s/it]


Generating answer for question 4...


Batches: 100%|██████████| 1/1 [00:06<00:00,  6.59s/it]


Generating answer for question 5...


Batches: 100%|██████████| 1/1 [00:07<00:00,  7.39s/it]


Generating answer for question 6...


Batches: 100%|██████████| 1/1 [00:06<00:00,  6.85s/it]


Generating answer for question 7...


Batches: 100%|██████████| 1/1 [00:06<00:00,  6.44s/it]


Generating answer for question 8...


Batches: 100%|██████████| 1/1 [00:07<00:00,  7.53s/it]


Generating answer for question 9...


Batches: 100%|██████████| 1/1 [00:08<00:00,  8.89s/it]


Generating answer for question 10...


Batches: 100%|██████████| 1/1 [00:08<00:00,  8.63s/it]


Generating answer for question 11...


Batches: 100%|██████████| 1/1 [00:06<00:00,  6.22s/it]


Generating answer for question 12...


Batches: 100%|██████████| 1/1 [00:06<00:00,  6.36s/it]


Generating answer for question 13...


Batches: 100%|██████████| 1/1 [00:06<00:00,  6.71s/it]


Generating answer for question 14...


Batches: 100%|██████████| 1/1 [00:06<00:00,  6.01s/it]


Generating answer for question 15...


Batches: 100%|██████████| 1/1 [00:05<00:00,  5.52s/it]


Generating answer for question 16...


Batches: 100%|██████████| 1/1 [00:05<00:00,  5.83s/it]


Generating answer for question 17...


Batches: 100%|██████████| 1/1 [00:05<00:00,  5.27s/it]


Generating answer for question 18...


Batches: 100%|██████████| 1/1 [00:04<00:00,  5.00s/it]


Generating answer for question 19...


Batches: 100%|██████████| 1/1 [00:06<00:00,  6.09s/it]


Generating answer for question 20...


Batches: 100%|██████████| 1/1 [00:07<00:00,  7.04s/it]


# Graph RAG - txt ontology KG

## Without Chunks

In [12]:
results_dir = "results/answers/txt_ontology"
os.makedirs(results_dir, exist_ok=True) # create/check results folder
output_file = os.path.join(results_dir, "txt_results.txt")

for i,question in enumerate(questions):
    print(f'Generating answer for question {i+1}...')

    retrieved = retrieve(
        graph_csv='results/KGs/txt_ontology_kg_nochunks.csv',
        question=question,
        embed_model='paraphrase-multilingual-MiniLM-L12-v2'
    )

    prompt = f"""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the user question
        - Include all information that is relevant to the question
        - Keep your response clear and concise. Aim for 3 sentences or less, but you can include more if needed to cover all the relevant information
        - If the answer is not in the context, say "I don't know"
        - Do not add information that isn't supported by the context
    Context:
    {retrieved}

    Question: {question}"""

    answer = gpt41_nano.invoke(prompt).content

    with open(output_file, 'a', encoding='utf-8') as f:
        f.write(f"Question {i+1}:\n{question}\n")
        f.write(f"Answer:\n{answer}\n")
        f.write("\n" + "-" * 80 + "\n")

Generating answer for question 1...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.13it/s]


Generating answer for question 2...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


Generating answer for question 3...


Batches: 100%|██████████| 1/1 [00:01<00:00,  1.02s/it]


Generating answer for question 4...


Batches: 100%|██████████| 1/1 [00:01<00:00,  1.02s/it]


Generating answer for question 5...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


Generating answer for question 6...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.07it/s]


Generating answer for question 7...


Batches: 100%|██████████| 1/1 [00:01<00:00,  1.00s/it]


Generating answer for question 8...


Batches: 100%|██████████| 1/1 [00:01<00:00,  1.05s/it]


Generating answer for question 9...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.01it/s]


Generating answer for question 10...


Batches: 100%|██████████| 1/1 [00:01<00:00,  1.03s/it]


Generating answer for question 11...


Batches: 100%|██████████| 1/1 [00:01<00:00,  1.02s/it]


Generating answer for question 12...


Batches: 100%|██████████| 1/1 [00:01<00:00,  1.10s/it]


Generating answer for question 13...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.11it/s]


Generating answer for question 14...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.08it/s]


Generating answer for question 15...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.12it/s]


Generating answer for question 16...


Batches: 100%|██████████| 1/1 [00:01<00:00,  1.07s/it]


Generating answer for question 17...


Batches: 100%|██████████| 1/1 [00:04<00:00,  4.38s/it]


Generating answer for question 18...


Batches: 100%|██████████| 1/1 [00:04<00:00,  4.33s/it]


Generating answer for question 19...


Batches: 100%|██████████| 1/1 [00:04<00:00,  4.84s/it]


Generating answer for question 20...


Batches: 100%|██████████| 1/1 [00:01<00:00,  1.02s/it]


## With Chunks

In [13]:
results_dir = "results/answers/txt_ontology"
os.makedirs(results_dir, exist_ok=True) # create/check results folder
output_file = os.path.join(results_dir, "txt_chunks_results.txt")

for i,question in enumerate(questions):
    print(f'Generating answer for question {i+1}...')

    retrieved = retrieve(
        graph_csv='results/KGs/txt_ontology_kg_chunks.csv',
        question=question,
        embed_model='paraphrase-multilingual-MiniLM-L12-v2'
    )

    prompt = f"""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the user question
        - Include all information that is relevant to the question
        - Keep your response clear and concise. Aim for 3 sentences or less, but you can include more if needed to cover all the relevant information
        - If the answer is not in the context, say "I don't know"
        - Do not add information that isn't supported by the context
    Context:
    {retrieved}

    Question: {question}"""

    answer = gpt41_nano.invoke(prompt).content

    with open(output_file, 'a', encoding='utf-8') as f:
        f.write(f"Question {i+1}:\n{question}\n")
        f.write(f"Answer:\n{answer}\n")
        f.write("\n" + "-" * 80 + "\n")

Generating answer for question 1...


Batches: 100%|██████████| 2/2 [00:05<00:00,  2.78s/it]


Generating answer for question 2...


Batches: 100%|██████████| 2/2 [00:07<00:00,  3.60s/it]


Generating answer for question 3...


Batches: 100%|██████████| 2/2 [00:07<00:00,  3.95s/it]


Generating answer for question 4...


Batches: 100%|██████████| 2/2 [00:07<00:00,  3.97s/it]


Generating answer for question 5...


Batches: 100%|██████████| 2/2 [00:07<00:00,  3.76s/it]


Generating answer for question 6...


Batches: 100%|██████████| 2/2 [00:07<00:00,  3.69s/it]


Generating answer for question 7...


Batches: 100%|██████████| 2/2 [00:06<00:00,  3.04s/it]


Generating answer for question 8...


Batches: 100%|██████████| 2/2 [00:06<00:00,  3.07s/it]


Generating answer for question 9...


Batches: 100%|██████████| 2/2 [00:09<00:00,  4.85s/it]


Generating answer for question 10...


Batches: 100%|██████████| 2/2 [00:45<00:00, 22.73s/it]


Generating answer for question 11...


Batches: 100%|██████████| 2/2 [00:12<00:00,  6.37s/it]


Generating answer for question 12...


Batches: 100%|██████████| 2/2 [00:09<00:00,  4.64s/it]


Generating answer for question 13...


Batches: 100%|██████████| 2/2 [00:06<00:00,  3.38s/it]


Generating answer for question 14...


Batches: 100%|██████████| 2/2 [00:07<00:00,  3.61s/it]


Generating answer for question 15...


Batches: 100%|██████████| 2/2 [00:07<00:00,  3.63s/it]


Generating answer for question 16...


Batches: 100%|██████████| 2/2 [00:07<00:00,  3.64s/it]


Generating answer for question 17...


Batches: 100%|██████████| 2/2 [00:08<00:00,  4.03s/it]


Generating answer for question 18...


Batches: 100%|██████████| 2/2 [00:07<00:00,  3.60s/it]


Generating answer for question 19...


Batches: 100%|██████████| 2/2 [00:07<00:00,  3.61s/it]


Generating answer for question 20...


Batches: 100%|██████████| 2/2 [00:07<00:00,  3.69s/it]
