# RESPONSE GENERATION FILE

In [2]:

import os 
import tempfile

import chromadb
import streamlit as st

from langchain_chroma import Chroma
from pypdf import PdfReader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from sentence_transformers import CrossEncoder
from streamlit.runtime.uploaded_file_manager import UploadedFile
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, AIMessage
from langchain_ollama import OllamaEmbeddings
from langchain_core.output_parsers import StrOutputParser

from bert_score import BERTScorer


In [3]:
def get_collection():

    embedding_model = OllamaEmbeddings(model="nomic-embed-text:latest")

    chroma_client = chromadb.PersistentClient(path="./hsc-llm")

    return Chroma(
        client=chroma_client,
        collection_name="biology_collection",
        embedding_function=embedding_model,
    )

In [4]:
system_prompt = """
You are an assistant for question-answering tasks.

Use the following documents that is retrieved from the database is relevant \
use it to provide a complete and concise response to the user's query. \
Do not mention references, sources, or citations in your response

If the documents provided are not relevant to the question, use your own knowledge to answer.

Limit your answer to 3-4 sentences.
"""

In [5]:
llm_model = ChatOllama(model="llama3.1", temperature=0)

In [6]:
def get_response(context: str, prompt: str):
    
    llm = llm_model

    prompt_template = ChatPromptTemplate([
        ("system", system_prompt),
        ("user", f"Context: {context}, Question: {prompt}")
    ])

    chain = prompt_template | llm | StrOutputParser()

    return chain.invoke({
        "prompt": prompt, 
        "context": context
    })

In [7]:
def perform_retrieval(query: str, n = 10):
    collection = get_collection()
    retriever = collection.as_retriever(search_type="similarity", search_kwargs={"k": n})
    results = retriever.invoke(query)
    return results


In [8]:
def reranker(prompt: str, documents: list[str]) -> tuple[str,list[int]]:
    document_texts = [doc.page_content for doc in documents if doc.page_content and doc.page_content.strip()]
    
    
    relevant_text = ""
    relevant_text_ids = []
    encoder_model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
    ranks = encoder_model.rank(prompt, document_texts, top_k=5)
    for rank in ranks:
        relevant_text += document_texts[rank["corpus_id"]]
        relevant_text_ids.append(rank["corpus_id"])

    return relevant_text, relevant_text_ids

In [9]:
#Imports for dataset calculation
import pandas as pd

goldenset = pd.read_csv("./ground_truth.csv", index_col=0)

In [10]:
goldenset.head()

Unnamed: 0_level_0,QUESTION,ANSWER,MARKS,CRITERIA
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Define the term ‘adaptation’,Adaptation are the inherited favourable charac...,1,1 mark = correctly definition of adaptation
2,Distinguish between sexual reproduction and as...,"Sexual reproduction, such as meiosis, is the p...",4,1 mark = Define sexual reproduction\r\n\r\n1 m...
3,What is the difference between a somatic cell ...,Somatic cells are all the diploid cells in the...,3,1 mark = Explains the difference between gamet...
4,Where would you find a follicle and what is it...,A follicle is a group of cells in the ovary th...,2,1 mark = Gave the location of the follicle\r\n...
5,"Within a cell cycle, cell replication involves...",The cell cycle involves three phases: interpha...,3,"1 mark = Gives the first phase: interphase, an..."


In [11]:
questions = goldenset['QUESTION'].values.tolist()
reference_answers = goldenset['ANSWER'].values.tolist()

## LLM ANSWER GENERATION PROCESS

### GENERATE AI REPSONSES - MODEL 1: Just LLAMA3.1-8b NO RAG/SELF-KNOWLEDGE

In [12]:
model1_generated_answers = [] 
for question in questions:
    response = get_response(prompt=question, context="No context provided")
    model1_generated_answers.append(response)

In [13]:
len(model1_generated_answers)

30

In [14]:
model1_generated_answers[:5]

['The term "adaptation" refers to a trait or behavior that enhances an organism\'s ability to survive and reproduce in its environment. It is a characteristic that has evolved over time through the process of natural selection, allowing individuals with favorable traits to better cope with their surroundings. Adaptations can be physical, such as the development of wings in birds, or behavioral, like the migration patterns of certain animals.',
 'Sexual reproduction involves the combination of genetic material from two parents to produce offspring with unique characteristics, whereas asexual reproduction involves the production of offspring that are genetically identical to the parent through methods such as budding or binary fission. Sexual reproduction requires two individuals, while asexual reproduction can occur in one individual. This results in greater genetic diversity in sexually reproduced offspring compared to asexually reproduced offspring. Asexual reproduction is often faste

### GENERATE AI RESPONSES - MODEL 2: Just LLAMA3.2-1b NO RAG/SELF-KNOWLEDGE ONLY

In [15]:
llm_model = ChatOllama(model="llama3.2:1b", temperature=0)

In [16]:
model2_generated_answers = [] 
for question in questions:
    response = get_response(prompt=question, context="No context provided")
    model2_generated_answers.append(response)

In [17]:
len(model2_generated_answers)

30

In [18]:
model2_generated_answers[:5]

['Adaptation refers to the process by which an organism or a system changes or adjusts itself in response to environmental factors, such as temperature, light, or chemicals, to ensure its survival and fitness. This can involve physiological changes, behavioral adaptations, or morphological modifications that enable the individual or entity to better interact with its surroundings. Adaptation is a fundamental concept in biology, ecology, and evolutionary theory, highlighting the dynamic and ongoing process of change in living systems. It allows organisms to thrive in diverse environments and maintain their reproductive success.',
 'Sexual reproduction involves the combination of genetic material from two parents to produce offspring with unique characteristics, whereas asexual reproduction involves the production of offspring without the involvement of gametes (sex cells) from another individual. In asexual reproduction, an organism can produce offspring that are genetically identical t

### GENERATE AI RESPONSES - MODEL 1A: LLAMA3.1-8b WITH RAG

In [19]:
llm_model = ChatOllama(model="llama3.1", temperature=0)

In [20]:
model1a_generated_answers = [] 
for user_query in questions:
    results = perform_retrieval(user_query)
    relevant_text, relevant_text_ids = reranker(user_query, results)
    response = get_response(context=relevant_text, prompt=user_query)
    model1a_generated_answers.append(response)

⚠️ It looks like you upgraded from a version below 0.6 and could benefit from vacuuming your database. Run chromadb utils vacuum --help for more information.


In [21]:
len(model1a_generated_answers)

30

In [22]:
model1a_generated_answers[:5]

['An adaptation is a characteristic that an organism has inherited and makes it suited for its environment. It is a result of change that arises via mutation, when a cell divides and replicates during the process of reproduction. Adaptations are traits of a species that help them survive in their environment.',
 'Reproduction ensures the continuity of a species through two main methods: sexual reproduction, which involves combining genetic information from two individuals to produce offspring with unique characteristics; and asexual reproduction, where offspring arise from a single organism, inheriting its genes. Asexual reproduction is faster and requires less energy but can lead to large-scale extinction and limited adaptation to selection pressure. In contrast, sexual reproduction increases genetic diversity and allows species to adapt to changing environments.',
 'A somatic cell is a body cell that contains two copies of each chromosome, whereas a gamete (such as a sperm or egg cel

### GENERATE AI RESPONSES - MODEL 2A: LLAMA3.2-1b WITH RAG

In [23]:
llm_model = ChatOllama(model="llama3.2:1b", temperature=0)

In [24]:
model2a_generated_answers = [] 
for user_query in questions:
    results = perform_retrieval(user_query)
    relevant_text, relevant_text_ids = reranker(user_query, results)
    response = get_response(context=relevant_text, prompt=user_query)
    model2a_generated_answers.append(response)

In [25]:
model2a_generated_answers[:5]

['An adaptation is a characteristic that an organism has inherited and makes it suited for its environment. It can be classified into three types: structural (physical features), behavioral (actions), and physiological (internal or cellular processes). Adaptations help organisms survive in their environment by reducing water loss, such as through leaf size reduction, and also provide other benefits like protection from the sun or wind.',
 'Reproduction ensures the continuity of a species by allowing for the rapid growth and establishment of new individuals. Asexual reproduction, such as budding or binary fission, can produce offspring that are genetically identical to the parent, while sexual reproduction involves the fusion of gametes from two different sexes, resulting in genetic diversity. This difference highlights the importance of both types of reproduction in maintaining species continuity.',
 'A somatic cell is a type of body cell that makes up most of the non-reproductive cell

### COLLATED GENERATION INTO FILE

In [26]:
ground_truth_df = pd.read_csv("./ground_truth.csv", index_col=0)

In [27]:
#check whether the ground_truth was read correctly
ground_truth_df

Unnamed: 0_level_0,QUESTION,ANSWER,MARKS,CRITERIA
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Define the term ‘adaptation’,Adaptation are the inherited favourable charac...,1,1 mark = correctly definition of adaptation
2,Distinguish between sexual reproduction and as...,"Sexual reproduction, such as meiosis, is the p...",4,1 mark = Define sexual reproduction\r\n\r\n1 m...
3,What is the difference between a somatic cell ...,Somatic cells are all the diploid cells in the...,3,1 mark = Explains the difference between gamet...
4,Where would you find a follicle and what is it...,A follicle is a group of cells in the ovary th...,2,1 mark = Gave the location of the follicle\r\n...
5,"Within a cell cycle, cell replication involves...",The cell cycle involves three phases: interpha...,3,"1 mark = Gives the first phase: interphase, an..."
6,Recall the names of the two cell division proc...,Mitosis—produces identical new cells for growt...,4,1 mark = Mentions mitosis \r\n\r\n1 mark = Men...
7,Explain what is meant by a protein becoming 'd...,A protein is said to be denatured when the hyd...,2,1 mark = Defines what 'denatured' means\r\n\r\...
8,What is the Law of Independent Assortment and ...,"Mendel’s second law of inheritance, the Law of...",1,1 mark = States what Law of independent assort...
9,Define 'genetic testing'.,Genetic testing is a medical test used to dete...,1,1 mark = defines 'Genetic testing'
10,Explain why silent mutations are called 'silent'.,Silent mutations are silent because they do no...,2,1 mark = Mentions silent mutations in the answ...


In [28]:
#create a index list the same size as ground_truth
index_list = []
for i in range(1, len(questions)+1):
    index_list.append(i)

len(index_list)

30

In [29]:
print(len(model1_generated_answers))
print(len(model2_generated_answers))
print(len(model1a_generated_answers))
print(len(model2a_generated_answers))


30
30
30
30


In [30]:
#Combine the response generation into one dataframe
generated_df = pd.DataFrame({
    'Model1_GA': model1_generated_answers,
    'Model2_GA': model2_generated_answers,
    'Model1a_GA': model1a_generated_answers,
    'Model2a_GA': model2a_generated_answers
})

#Make the index the same ID as the ground_truth 

In [34]:
#combine the ground_truth and the generated responses dataframe
combined_df = pd.concat([ground_truth_df, generated_df], axis=1)
combined_df.index.name = "ID"
combined_df.head()

Unnamed: 0_level_0,QUESTION,ANSWER,MARKS,CRITERIA,Model1_GA,Model2_GA,Model1a_GA,Model2a_GA
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Define the term ‘adaptation’,Adaptation are the inherited favourable charac...,1.0,1 mark = correctly definition of adaptation,Sexual reproduction involves the combination o...,Sexual reproduction involves the combination o...,Reproduction ensures the continuity of a speci...,Reproduction ensures the continuity of a speci...
2,Distinguish between sexual reproduction and as...,"Sexual reproduction, such as meiosis, is the p...",4.0,1 mark = Define sexual reproduction\r\n\r\n1 m...,A somatic cell is any cell in the body that is...,"A somatic cell, also known as a body cell or n...",A somatic cell is a body cell that contains tw...,A somatic cell is a type of body cell that mak...
3,What is the difference between a somatic cell ...,Somatic cells are all the diploid cells in the...,3.0,1 mark = Explains the difference between gamet...,A follicle is typically found in the skin or o...,"A follicle is a small, usually rounded or oval...","Unfortunately, the provided documents do not m...",A follicle is not mentioned in the provided te...
4,Where would you find a follicle and what is it...,A follicle is a group of cells in the ovary th...,2.0,1 mark = Gave the location of the follicle\r\n...,The three phases of cell replication within a ...,The three phases of cell replication within a ...,The three phases of cell replication within a ...,The three phases of cell replication within th...
5,"Within a cell cycle, cell replication involves...",The cell cycle involves three phases: interpha...,3.0,"1 mark = Gives the first phase: interphase, an...",The two cell division processes are Mitosis an...,The two primary cell division processes are Mi...,The two cell division processes are mitosis an...,Here's a concise response:\n\nThe two main cel...


In [35]:
combined_df.tail()

Unnamed: 0_level_0,QUESTION,ANSWER,MARKS,CRITERIA,Model1_GA,Model2_GA,Model1a_GA,Model2a_GA
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
27,What is the liver's role in homeostasis? Outli...,The liver minimises the greater changes in blo...,2.0,1 mark = defines the function of the liver\r\n...,A nutritional disease can result from an imbal...,A nutritional disease is typically caused by a...,A nutritional disease results from poor or exc...,"Based on the provided information, a nutrition..."
28,What does a nutritional disease result from?,Nutritional diseases can be caused by a lack o...,1.0,1 mark = correctly identifies what 'nutritiona...,"The ear ossicles, also known as the middle ear...",The two main functions of the ear ossicles are...,"The ear ossicles (malleus, incus, and stapes) ...",The two main functions of the ear ossicles are...
29,Describe the two functions of the ear ossicles.,The three ear bones (ossicles) amplify (increa...,2.0,1 mark = defines one function correctly\r\n\r\...,The two hormones responsible for regulating sa...,The two primary hormones involved in regulatin...,The two hormones responsible for regulating sa...,The two hormones responsible for regulating sa...
30,Name the two hormones responsible for regulati...,ADH (antidiuretic hormone)—responds to water l...,2.0,1 mark = mentions one correct hormone\r\n\r\n1...,,,,
0,,,,,"The term ""adaptation"" refers to a trait or beh...",Adaptation refers to the process by which an o...,An adaptation is a characteristic that an orga...,An adaptation is a characteristic that an orga...


In [36]:
#export the dataframe to a csv file
combined_df.to_csv("combined_df.csv")