In [7]:
import os

from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma

try:
    current_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
    # Fallback for environments where __file__ is not defined
    current_dir = os.getcwd()
    
file_path = os.path.join(current_dir, "books", "OpenStax_Physics_Required.pdf")
db_dir = os.path.join(current_dir, "db")

if not os.path.exists(file_path):
    raise FileNotFoundError(
        f"The file {file_path} does not exist. Please check the path."
    )

# Read the text content from the file
loader = PyPDFLoader(file_path)
documents = loader.load()

# Split the document into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(documents)

def create_vector_store(docs, embeddings, store_name):
    persistent_directory = os.path.join(db_dir, store_name)
    if not os.path.exists(persistent_directory):
        print(f"\n--- Creating vector store {store_name} ---")
        Chroma.from_documents(
            docs, embeddings, persist_directory=persistent_directory)
        print(f"--- Finished creating vector store {store_name} ---")
    else:
        print(
            f"Vector store {store_name} already exists. No need to initialize.")
        

huggingface_embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
    encode_kwargs={"normalize_embeddings": True},
)

create_vector_store(docs, huggingface_embeddings, "chroma_db_huggingface")

print("Embedding demonstrations for Hugging Face completed.")


--- Creating vector store chroma_db_huggingface ---
--- Finished creating vector store chroma_db_huggingface ---
Embedding demonstrations for Hugging Face completed.


In [61]:
import os
from dotenv import load_dotenv
 
load_dotenv()

TOGETHER_KEY = os.getenv('TOGETHER_API_KEY')
# OPENAI_KEY = os.getenv('OPENAI_API_KEY')

In [94]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.tools import tool
from langchain_together import ChatTogether
 
from typing import List
 
topic_identifier_system = """Analyze user input and identify the physics topic mentioned in the input. Do not return the input text.
For example:
- "Create questions about velocity" -> "velocity"
- "Explain displacement" -> "displacement"
 
Return only the identified physics topic name as given in the input."""
 
topic_check = ChatPromptTemplate.from_messages([
   ("system", topic_identifier_system),
   ("placeholder", "{messages}")
]) | ChatTogether(
    model="meta-llama/Llama-3.3-70B-Instruct-Turbo", temperature=0
    )
 
input = "Create 5 questions on displacement."
 
topic = topic_check.invoke({"messages": [("user", input)]}).content

In [95]:
topic

'displacement'

In [96]:
from langchain_core.prompts import ChatPromptTemplate

from langchain_together import ChatTogether
from langchain_openai import ChatOpenAI

import json

def query_vector_store(store_name, query, embedding_function):
    persistent_directory = os.path.join(db_dir, store_name)
    if os.path.exists(persistent_directory):
        print(f"\n--- Querying the Vector Store {store_name} ---")
        db = Chroma(
            persist_directory=persistent_directory,
            embedding_function=embedding_function,
        )
        retriever = db.as_retriever(
            search_type="similarity",
            search_kwargs={"k": 3},
        )
        # Display the relevant results with metadata
        return retriever

system_prompt = (
    "Create one multiple choice question for {skill} level of Bloom's taxonomy for a 9th grade Physics student in India on {topic}."
    "Requirements:"
    "Student should only be able to answer if they've mastered the concept"
    "Each distractor must address either: A specific misconception about {topic} or A prerequisite knowledge gap"
    "Language and complexity suitable for 9th grade"
    "Physics context and application"
    "Use {context} for accuracy in creating the questions and distractors."
    "Format as the output as a JSON:"
 
        """{{{{
        "question": "",
        "skill": ""
        "options": {{"a": "", "b": "", "c": "", "d": ""}},
        "correct": "",
        "explanation": {{
            "correct": "",
            "a": "misconception/prerequisite tested",
            "b": "", "c": "", "d": ""
        }}
        }}}}"""
 
    "For {skill} level, ensure:"
    "{skill_requirement}"
    "Make sure there are no additional information being other than the output in the format that is asked for."
)

skill_requirements = {
   "Remember": "Question tests ability to retrieve relevant knowledge from long-term memory.",
   "Understand": "Question tests ability to onstruct meaning from instructional messages, including oral, written, and graphic communication.",
   "Apply": "Question tests ability to carry out or use a procedure in a given situation.",
   "Analyze": "Question tests ability to break material into foundational parts and determine how parts relate to one another and the overall structure or purpose.",
   "Evaluate": "Question tests ability to make judgments based on criteria and standards."
}
 
skills = ["Remember", "Understand", "Apply", "Analyze", "Evaluate"]

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

# llm = ChatOpenAI(model="gpt-4o",
#                  api_key=os.getenv("OPENAI_API_KEY"))

# llm = ChatTogether(
#     model="Qwen/Qwen2.5-72B-Instruct-Turbo"
#     )

llm = ChatTogether(
    model="meta-llama/Llama-3.3-70B-Instruct-Turbo"
    )

# llm = ChatTogether(
#     model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo"
#     )

# llm = ChatOpenAI(model="gpt-4o-mini",
#                  api_key=os.getenv("OPENAI_API_KEY"))

query = {"input": '{input}'}

def generate_assessment(topic, llm):
   responses = {
       "topic": topic,
       "questions": []
   }
   
   for skill in skills:
       
       print(skill)
       retriever = query_vector_store("chroma_db_huggingface", query['input'], huggingface_embeddings)
       documents = retriever.invoke(query['input'])
       
       # Combine the retrieved documents into a single context string
       context = " ".join(doc.page_content for doc in documents)

       prompt = system_prompt.format(
           skill=skill,
           topic=topic,
           context = context,
           skill_requirement=skill_requirements[skill]
       )

       response = llm.invoke(prompt)
       cleaned_content = response.content.strip()
       if cleaned_content.startswith("```json"):
           cleaned_content = cleaned_content[7:-3]
           
       try:
           question_json = json.loads(cleaned_content)
           responses["questions"].append(question_json)
       except json.JSONDecodeError as e:
           print(response)
           print(f"Error parsing {skill} response")
   
   model_name = llm.model_name.split('/')[-1]
   filename = f"RAG_{topic}_{model_name}.json"
   
   with open(filename, "w") as f:
       json.dump(responses, f, indent=2, ensure_ascii=False)
       
   return responses
 
final_assessment = generate_assessment(topic, llm)

Remember

--- Querying the Vector Store chroma_db_huggingface ---
Understand

--- Querying the Vector Store chroma_db_huggingface ---
Apply

--- Querying the Vector Store chroma_db_huggingface ---
Analyze

--- Querying the Vector Store chroma_db_huggingface ---
Evaluate

--- Querying the Vector Store chroma_db_huggingface ---
