In [1]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
import openai
from config.config import Config
from pymilvus import MilvusClient, MilvusException
from ingestion.data import DataIngestion  # Updated import statement
from retriever.document import DocumentRetriever  # Updated import statement
from generator.llm import rag_module,rag_intendedLO,cot, basic, course, quiz
openai.api_key = Config.OPENAI_API_KEY
import os



In [2]:
from pymilvus import MilvusClient
client = MilvusClient("milvus_demo.db")

# Initialize Milvus client for Milvus Lite (creating a local DB file at './milvus_demo.db')
try:
    milvus_client = MilvusClient(Config.MILVUS_URI)  # Correct Milvus Lite initialization
    print(f"Milvus database initialized at {Config.MILVUS_URI}")
except Exception as e:
    print(f"Failed to initialize Milvus Lite: {e}")
    exit(1)

Milvus database initialized at milvus_demo.db


In [3]:
# Document title becomes collection name
doc_title = "handbook"
txt_folder_path = './txt/' + doc_title
os.environ["OPENAI_API_KEY"] = Config.OPENAI_API_KEY

# model_name = 'longformer-base-4096'
model_name = 'paraphrase-multilingual-MiniLM-L12-v2'
collection_name = (doc_title + '-' + model_name).replace('-', '_')

# Create DataIngestion instance
data_ingestion = DataIngestion(milvus_client, collection_name, model_name)

# Check if collection exists and matches the number of new documents
if not data_ingestion._check_collection(txt_folder_path):
    # Ingest data into Milvus if collection does not exist or does not match
    data_ingestion.ingest_data(txt_folder_path)

Collection 'handbook_paraphrase_multilingual_MiniLM_L12_v2' already exists and matches the number of new documents.


In [11]:
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_community.retrievers import BM25Retriever
from langchain_core.documents import Document
from langchain.text_splitter import MarkdownTextSplitter
file = open('./txt/handbook/handbook.txt', "r")
content = file.read()
os.environ["OPENAI_API_KEY"] = Config.OPENAI_API_KEY


splitter = MarkdownTextSplitter(chunk_size = 2000, chunk_overlap=50)
md_splitter = splitter.create_documents([content])
md_convert_splitter = [md.page_content for md in md_splitter]
retriever = BM25Retriever.from_texts(md_convert_splitter)

# initialize the bm25 retriever and faiss retriever
bm25_retriever = BM25Retriever.from_texts(
    md_convert_splitter, metadatas=[{"source": 1}] * len(md_convert_splitter)
)
bm25_retriever.k = 12
embedding = OpenAIEmbeddings()
faiss_vectorstore = FAISS.from_texts(
    md_convert_splitter, embedding, metadatas=[{"source": 2}] * len(md_convert_splitter)
)
faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": 12})

# initialize the ensemble retriever
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5]
)

In [12]:
learning_outcome_query = "what is the intended learning outcome in Argumentation, Data Visualization and Communication module?"
retriever = ensemble_retriever.invoke(learning_outcome_query)


[Document(metadata={'source': 2}, page_content='These lecture-tutorial combinations provide the students with a general overview about basic concepts and theories in economics and business. In the associated tutorials, students have the opportunity to integrate the material taught in the lectures through discussions of related concepts, policy problems, or scientific studies, and through cases and exercises.\n\nThe remaining CHOICE modules (15 CP) can be selected in the first year of studies according to interest and/or with the aim to allow a change of major until the beginning of the second year, when the major choice becomes fixed. Students can still change to another major at the beginning of their second year of studies, provided they have taken the corresponding mandatory CHOICE modules in their first year of studies. All students must participate in an entry advising session with their Academic Advisors to learn about their major change options and consult their Academic Advisor

In [13]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
llm = ChatOpenAI()

PROMPT_TEMPLATE = """
You are a course content planner to help with creating course outline and course content according to handbook information.
Provided with handbook curriculum information and intended learning outcome with other details , list down all subject or module relevant in the document.
Do not hallucinate and add additional information outside of this document

Given the following query and relevant context, please provide a comprehensive and accurate response:

<context>
{context}
</context>

<question>
{question}
</question>

Response:"""

prompt = PromptTemplate(
    template=PROMPT_TEMPLATE, input_variables=["context", "question"]
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": ensemble_retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)
rag_output = rag_chain.invoke(learning_outcome_query)
print(f"{rag_output}")



Based on the provided handbook information, the list of modules in this document includes:
1. Applied Calculus
2. Applied Statistics with R
3. Qualitative Research Methods
4. Econometrics
5. Logic (perspective I)
6. Logic (perspective II)
7. Causation and Correlation (perspective I)
8. Causation and Correlation (perspective II)
9. Linear Model and Matrices
10. Complex Problem Solving
11. Argumentation, Data Visualization and Communication (perspective I)
12. Argumentation, Data Visualization and Communication (perspective II)
13. Agency, Leadership, and Accountability
14. Microeconomics
15. Macroeconomics
16. Introduction to International Business
17. Introduction to Finance and Accounting
18. Fundamentals of Earth Sciences
19. Environmental Systems and Global Change
20. General Industrial Engineering
21. General Logistics
22. Community Impact Project
23. Languages
24. Humanities
25. Introduction to Philosophical Ethics

These modules cover a wide range of topics in economics, business

In [8]:
# # Document title becomes collection name
# doc_title = "handbook"
# txt_folder_path = './txt/' + doc_title
# os.environ["OPENAI_API_KEY"] = Config.OPENAI_API_KEY

# # model_name = 'longformer-base-4096'
# model_name = 'paraphrase-multilingual-MiniLM-L12-v2'
# collection_name = (doc_title + '-' + model_name).replace('-', '_')

# # Create DataIngestion instance
# data_ingestion = DataIngestion(milvus_client, collection_name, model_name)

# # Check if collection exists and matches the number of new documents
# if not data_ingestion._check_collection(txt_folder_path):
#     # Ingest data into Milvus if collection does not exist or does not match
#     data_ingestion.ingest_data(txt_folder_path)

In [14]:
k = 5  # Number of results to return
# query = "In Linear Model and Matrices module, what is the learning outcome?"
# Perform search and get LLM response
document_retriever = DocumentRetriever(milvus_client, collection_name, model_name) 
search_results,bm25 = document_retriever.retrieve_documents(learning_outcome_query, k=k) 

# Print search results to check the structure
print(f"{search_results[0]['document']}")

1

# Page Content

## Text
- C ONSTRUCTOR
- C >O NSTRUCTOR
- UNIVERSITY
- UNIVERSITY
- 3,8652
- 45784
- Study
- Program
- Handbook
- 3574
- Global Economics and Management
- Bachelor of Arts
```

### Image Identification and Description
- No images, graphs, or other graphical elements were identified on this p
# Subject-specific Examination Regulations for Global Economics and Management

The subject-specific examination regulations for Global Economics and Management are defined by this program handbook and are valid only in combination with the General Examination Regulations for Undergraduate degree programs (General Examination Regulations = Rahmenprüfungsordnung). This handbook also contains the program-specific Study and Examination Plan (see Chapter 6). Upon graduation, students in this program will receive a Bachelor of Arts (BA) degree with a scope of 180 ECTS (for specifics see Chapter 4 of this handbook).

| Current version | Valid as of | Decision         | Details         

In [15]:
context = "\n".join([f"Result: {res['document']}" for res in search_results if 'document' in res])

# llm_response = rag_module(query, context)
llm_response = rag_module(learning_outcome_query,context)
print(llm_response)

Based on the provided handbook curriculum information, the relevant modules in the Global Economics and Management program are as follows:

### Global Economics and Management Modules:
1. Microeconomics
2. Macroeconomics
3. Introduction to International Business
4. Introduction to Finance and Accounting
5. Development Economics
6. Environmental and Resource Economics
7. Comparing Economic Systems
8. International Economics
9. Marketing
10. Organization and Human Resource Management
11. Advanced Econometrics
12. Managing Public and Nonprofit Organizations
13. Information Economics
14. Lean Management
15. Managerial Accounting
16. Financial Data Analytics
17. Contemporary Topics in Marketing
18. Internship / Startup and Career Skills
19. Bachelor Thesis and Seminar

### Constructor Track Modules:
#### Methods Modules:
1. Applied Calculus
2. Applied Statistics with R
3. Qualitative Research Methods
4. Econometrics

#### New Skills Modules:
1. Logic (perspective I)
2. Logic (perspective II