# RESPONSE GENERATION FILE

In [38]:

import os 
import tempfile

import chromadb
import streamlit as st

from langchain_chroma import Chroma
from pypdf import PdfReader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from sentence_transformers import CrossEncoder
from streamlit.runtime.uploaded_file_manager import UploadedFile
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, AIMessage
from langchain_ollama import OllamaEmbeddings
from langchain_core.output_parsers import StrOutputParser

from bert_score import BERTScorer


In [39]:
def get_collection():

    embedding_model = OllamaEmbeddings(model="nomic-embed-text:latest")

    chroma_client = chromadb.PersistentClient(path="./hsc-llm")

    return Chroma(
        client=chroma_client,
        collection_name="biology_collection",
        embedding_function=embedding_model,
    )

In [40]:
system_prompt = """
You are an assistant for question-answering tasks.

Use the following documents that is retrieved from the database is relevant \
use it to provide a complete and concise response to the user's query. \
Do not mention references, sources, or citations in your response

If the documents provided are not relevant to the question, use your own knowledge to answer.

Limit your answer to 3-4 sentences.
"""

In [41]:
llm_model = ChatOllama(model="llama3.1", temperature=0)

In [42]:
def get_response(context: str, prompt: str):
    
    llm = llm_model

    prompt_template = ChatPromptTemplate([
        ("system", system_prompt),
        ("user", f"Context: {context}, Question: {prompt}")
    ])

    chain = prompt_template | llm | StrOutputParser()

    return chain.invoke({
        "prompt": prompt, 
        "context": context
    })

In [43]:
def perform_retrieval(query: str, n = 10):
    collection = get_collection()
    retriever = collection.as_retriever(search_type="similarity", search_kwargs={"k": n})
    results = retriever.invoke(query)
    return results


In [44]:
def reranker(prompt: str, documents: list[str]) -> tuple[str,list[int]]:
    document_texts = [doc.page_content for doc in documents if doc.page_content and doc.page_content.strip()]
    
    
    relevant_text = ""
    relevant_text_ids = []
    encoder_model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
    ranks = encoder_model.rank(prompt, document_texts, top_k=5)
    for rank in ranks:
        relevant_text += document_texts[rank["corpus_id"]]
        relevant_text_ids.append(rank["corpus_id"])

    return relevant_text, relevant_text_ids

In [45]:
#Imports for dataset calculation
import pandas as pd

goldenset = pd.read_csv("./ground_truth.csv", index_col=0)

In [46]:
goldenset.head()

Unnamed: 0_level_0,QUESTION,ANSWER,MARKS,CRITERIA
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Define the term adaptation,Adaptation are the inherited favourable charac...,1,1 mark = correctly definition of adaptation
2,Distinguish between sexual reproduction and as...,"Sexual reproduction, such as meiosis, is the p...",4,1 mark = Define sexual reproduction\r\n\r\n1 m...
3,What is the difference between a somatic cell ...,Somatic cells are all the diploid cells in the...,3,1 mark = Explains the difference between gamet...
4,Where would you find a follicle and what is it...,A follicle is a group of cells in the ovary th...,2,1 mark = Gave the location of the follicle\r\n...
5,"Within a cell cycle, cell replication involves...",The cell cycle involves three phases: interpha...,3,"1 mark = Gives the first phase: interphase, an..."


In [47]:
questions = goldenset['QUESTION'].values.tolist()
reference_answers = goldenset['ANSWER'].values.tolist()

## LLM ANSWER GENERATION PROCESS

### GENERATE AI REPSONSES - MODEL 1: Just LLAMA3.1-8b NO RAG/SELF-KNOWLEDGE

In [48]:
model1_generated_answers = [] 
for question in questions:
    response = get_response(prompt=question, context="No context provided")
    model1_generated_answers.append(response)

In [49]:
len(model1_generated_answers)

30

### GENERATE AI RESPONSES - MODEL 2: Just LLAMA3.2-1b NO RAG/SELF-KNOWLEDGE ONLY

In [50]:
llm_model = ChatOllama(model="llama3.2:1b", temperature=0)

In [51]:
model2_generated_answers = [] 
for question in questions:
    response = get_response(prompt=question, context="No context provided")
    model2_generated_answers.append(response)

In [52]:
len(model2_generated_answers)

30

### GENERATE AI RESPONSES - MODEL 1A: LLAMA3.1-8b WITH RAG

In [53]:
llm_model = ChatOllama(model="llama3.1", temperature=0)

In [54]:
model1a_generated_answers = [] 
for user_query in questions:
    results = perform_retrieval(user_query)
    relevant_text, relevant_text_ids = reranker(user_query, results)
    response = get_response(context=relevant_text, prompt=user_query)
    model1a_generated_answers.append(response)

In [55]:
len(model1a_generated_answers)

30

### GENERATE AI RESPONSES - MODEL 2A: LLAMA3.2-1b WITH RAG

In [56]:
llm_model = ChatOllama(model="llama3.2:1b", temperature=0)

In [57]:
model2a_generated_answers = [] 
for user_query in questions:
    results = perform_retrieval(user_query)
    relevant_text, relevant_text_ids = reranker(user_query, results)
    response = get_response(context=relevant_text, prompt=user_query)
    model2a_generated_answers.append(response)

### COLLATED GENERATION INTO FILE

In [58]:
ground_truth_df = pd.read_csv("./ground_truth.csv", index_col=0)

In [59]:
#check whether the ground_truth was read correctly
ground_truth_df

Unnamed: 0_level_0,QUESTION,ANSWER,MARKS,CRITERIA
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Define the term adaptation,Adaptation are the inherited favourable charac...,1,1 mark = correctly definition of adaptation
2,Distinguish between sexual reproduction and as...,"Sexual reproduction, such as meiosis, is the p...",4,1 mark = Define sexual reproduction\r\n\r\n1 m...
3,What is the difference between a somatic cell ...,Somatic cells are all the diploid cells in the...,3,1 mark = Explains the difference between gamet...
4,Where would you find a follicle and what is it...,A follicle is a group of cells in the ovary th...,2,1 mark = Gave the location of the follicle\r\n...
5,"Within a cell cycle, cell replication involves...",The cell cycle involves three phases: interpha...,3,"1 mark = Gives the first phase: interphase, an..."
6,Recall the names of the two cell division proc...,Mitosis produces identical new cells for growt...,4,1 mark = Mentions mitosis \r\n\r\n1 mark = Men...
7,Explain what is meant by a protein becoming 'd...,A protein is said to be denatured when the hyd...,2,1 mark = Defines what 'denatured' means\r\n\r\...
8,What is the Law of Independent Assortment and ...,"Mendel's second law of inheritance, the Law of...",1,1 mark = States what Law of independent assort...
9,Define 'genetic testing'.,Genetic testing is a medical test used to dete...,1,1 mark = defines 'Genetic testing'
10,Explain why silent mutations are called 'silent'.,Silent mutations are silent because they do no...,2,1 mark = Mentions silent mutations in the answ...


In [60]:
#create a index list the same size as ground_truth
index_list = []
for i in range(1, len(questions)+1):
    index_list.append(i)

len(index_list)

30

In [61]:
#Combine the response generation into one dataframe
generated_df = pd.DataFrame({
    'ID': index_list,
    'Model1_GA': model1_generated_answers,
    'Model2_GA': model2_generated_answers,
    'Model1a_GA': model1a_generated_answers,
    'Model2a_GA': model2a_generated_answers
})

#Make the index the same ID as the ground_truth 
generated_df.set_index('ID', inplace=True)

In [62]:
generated_df.head()

Unnamed: 0_level_0,Model1_GA,Model2_GA,Model1a_GA,Model2a_GA
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,"The term ""adaptation"" refers to a change or ad...",Adaptation refers to the process by which an o...,An adaptation is a characteristic that an orga...,Adaptations are characteristics that help orga...
2,Sexual reproduction involves the combination o...,Sexual reproduction involves the combination o...,Reproduction ensures the continuity of a speci...,Reproduction ensures the continuity of a speci...
3,A somatic cell is any cell in the body that is...,"A somatic cell, also known as a body cell or n...",A somatic cell is a body cell that contains tw...,A somatic cell is a type of body cell that mak...
4,A follicle is typically found in the skin or o...,"A follicle is a small, usually rounded or oval...","Unfortunately, the provided documents do not m...",A follicle is not mentioned in the provided te...
5,The three phases of cell replication within a ...,The three phases of cell replication within a ...,The three phases of cell replication within a ...,Here's a summary of the three phases involved ...


In [63]:
#combine the ground_truth and the generated responses dataframe
combined_df = pd.concat([ground_truth_df, generated_df], axis=1)
combined_df.index.name = "ID"
combined_df.head()

Unnamed: 0_level_0,QUESTION,ANSWER,MARKS,CRITERIA,Model1_GA,Model2_GA,Model1a_GA,Model2a_GA
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Define the term adaptation,Adaptation are the inherited favourable charac...,1,1 mark = correctly definition of adaptation,"The term ""adaptation"" refers to a change or ad...",Adaptation refers to the process by which an o...,An adaptation is a characteristic that an orga...,Adaptations are characteristics that help orga...
2,Distinguish between sexual reproduction and as...,"Sexual reproduction, such as meiosis, is the p...",4,1 mark = Define sexual reproduction\r\n\r\n1 m...,Sexual reproduction involves the combination o...,Sexual reproduction involves the combination o...,Reproduction ensures the continuity of a speci...,Reproduction ensures the continuity of a speci...
3,What is the difference between a somatic cell ...,Somatic cells are all the diploid cells in the...,3,1 mark = Explains the difference between gamet...,A somatic cell is any cell in the body that is...,"A somatic cell, also known as a body cell or n...",A somatic cell is a body cell that contains tw...,A somatic cell is a type of body cell that mak...
4,Where would you find a follicle and what is it...,A follicle is a group of cells in the ovary th...,2,1 mark = Gave the location of the follicle\r\n...,A follicle is typically found in the skin or o...,"A follicle is a small, usually rounded or oval...","Unfortunately, the provided documents do not m...",A follicle is not mentioned in the provided te...
5,"Within a cell cycle, cell replication involves...",The cell cycle involves three phases: interpha...,3,"1 mark = Gives the first phase: interphase, an...",The three phases of cell replication within a ...,The three phases of cell replication within a ...,The three phases of cell replication within a ...,Here's a summary of the three phases involved ...


In [66]:
combined_df.tail(6)

Unnamed: 0_level_0,QUESTION,ANSWER,MARKS,CRITERIA,Model1_GA,Model2_GA,Model1a_GA,Model2a_GA
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
25,What are three mechanisms humans use to produc...,Answers may include:\n- voluntary movement/phy...,3,1 mark = correctly includes only one mechanism...,"Humans produce heat through shivering, increas...",Humans use several mechanisms to produce heat....,Humans use the following mechanisms to produce...,"Humans use several mechanisms to produce heat,..."
26,What are 'signalling molecules'?,Signalling molecules are chemicals involved in...,1,1 mark = correctly defines signaling molecules,"Signalling molecules, also known as signaling ...",Signalling molecules are chemical messengers t...,Signalling molecules are proteins secreted by ...,Signaling molecules are small proteins secrete...
27,What is the liver's role in homeostasis? Outli...,The liver minimises the greater changes in blo...,2,1 mark = defines the function of the liver\r\n...,The liver plays a crucial role in maintaining ...,The liver plays a crucial role in maintaining ...,The liver plays a crucial role in maintaining ...,Here's a summary of the liver's role in homeos...
28,What does a nutritional disease result from?,Nutritional diseases can be caused by a lack o...,1,1 mark = correctly identifies what 'nutritiona...,A nutritional disease can result from an imbal...,A nutritional disease is typically caused by a...,A nutritional disease results from poor or exc...,"Based on the provided information, a nutrition..."
29,Describe the two functions of the ear ossicles.,The three ear bones (ossicles) amplify (increa...,2,1 mark = defines one function correctly\r\n\r\...,"The ear ossicles, also known as the middle ear...",The two main functions of the ear ossicles are...,"The ear ossicles (malleus, incus, and stapes) ...",The two main functions of the ear ossicles are...
30,Name the two hormones responsible for regulati...,ADH (antidiuretic hormone) responds to water l...,2,1 mark = mentions one correct hormone\r\n\r\n1...,The two hormones responsible for regulating sa...,The two primary hormones involved in regulatin...,The two hormones responsible for regulating sa...,The two hormones responsible for regulating sa...


In [None]:
#export the dataframe to a csv file
combined_df.to_csv("combined_df.csv")