In [24]:
import glob
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain_core.documents import Document


In [25]:
persist_directory = r'./chroma_courses_db'
# Path to all .md files in the folder
markdown_path = "CourseInfo/OnlineCourses.md"
loader = UnstructuredMarkdownLoader(markdown_path)

In [26]:
data = loader.load()
assert len(data) == 1
assert isinstance(data[0], Document)
readme_content = data[0].page_content
print(readme_content[:250])


Course Name: 2. Master Filmora: Editing, Motion Graphics, and Color Grading Course Description: Learn professional video editing, motion graphics, and color grading using Filmora. Course Link: https://courze.org/master-filmora-editing-motion-graphics


In [27]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
splits = text_splitter.split_documents(data)


In [28]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

# Load a local embedding model from Hugging Face
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [29]:
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings, persist_directory=persist_directory)


In [30]:
import glob
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain_core.documents import Document


In [31]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import PromptTemplate

In [33]:
vectorstore = Chroma(
    persist_directory=persist_directory,
    embedding_function=embeddings
)


In [34]:
retriever = vectorstore.as_retriever()
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")   




In [36]:
custom_template_str = """You are an expert assistant helping users extract structured information about online courses from a knowledge base.

Based on the context provided, extract information and return it in the following JSON format:

Course {    
  "CourseName" : String,
  "CourseDescrption" : String,
  "CourseLink" : String
}
1. Return response only in the JSON format shown above
2. CourseDescription field should be only one sentence
3. CourseLink should be a link to the course

Context:
{context}

Question:
{question}
"""
retformat = """Based on the context provided, extract information and return as a JSON object only as described below! Do not add any notes or additional information in the response:
Course {    
  "CourseName" : String,
  "CourseDescrption" : String,
  "CourseLink" : String
}

"""

In [37]:
def format_docs(docs):
    return "\n".join(doc.page_content for doc in docs)


In [38]:
from langchain.chat_models import ChatOpenAI

api_key = os.environ.get('Groq_APIKey', None) 

llm = ChatOpenAI(
    model="llama3-70b-8192",  # or use "llama3-70b-8192"
    openai_api_key=api_key,
    openai_api_base="https://api.groq.com/openai/v1"
)


In [39]:
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

retriever =vectorstore.as_retriever()

rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt},
    return_source_documents=True
)

In [40]:
def call_rag(question):
    response = rag_chain.invoke({"query": custom_template_str + question})
    print('\nResponse from RAG/LLM: \n',response['result'])
    # return(response['result'])
    return ((response))

In [41]:
response=call_rag("Recommend course on Web Dev")



Response from RAG/LLM: 
 {
"CourseName" : "Master Electron: Build Desktop Applications using JavaScript",
"CourseDescrption" : "Build Desktop Applications using JavaScript",
"CourseLink" : "https://courze.org/master-electron-build-desktop-applications-using-javascript/"
}
