In [1]:
!pip install gradio --q

In [2]:
import os
import json

import pandas as pd

import gradio as gr

from langchain_community.document_loaders import CSVLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_chroma import Chroma

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
data = pd.read_csv("https://raw.githubusercontent.com/Bluedata-Consulting/GAAPB01-training-code-base/refs/heads/main/Assignments/assignment2dataset.csv")

In [4]:
data.head()

Unnamed: 0,course_id,title,description
0,C001,Foundations of Machine Learning,Understand foundational machine learning algor...
1,C002,Deep Learning with TensorFlow and Keras,Explore neural network architectures using Ten...
2,C003,Natural Language Processing Fundamentals,Dive into NLP techniques for processing and un...
3,C004,Computer Vision and Image Processing,Learn the principles of computer vision and im...
4,C005,Reinforcement Learning Basics,Get introduced to reinforcement learning parad...


In [5]:
data.shape

(25, 3)

In [6]:
data.to_csv('course_data.csv',index=False)

In [7]:
print(data['description'].iloc[0])

Understand foundational machine learning algorithms including regression, classification, clustering, and dimensionality reduction. This course covers data pre-processing, feature engineering, model selection, hyperparameter tuning, and evaluation metrics. Hands-on labs use scikit-learn and Python to implement end-to-end workflows on real-world datasets, preparing learners for practical machine learning applications with interactive engaging exercises.


In [8]:
embedding_model_name = "models/gemini-embedding-001"
model_name = "gemini-2.0-flash"

In [9]:
csv_loader = CSVLoader(file_path="course_data.csv",
                       csv_args={
                           "delimiter":',',
                           "fieldnames": ['course_id','title','description']
                       })

In [10]:

docs = csv_loader.load()

In [11]:
# docs

In [12]:
# print(docs[2].page_content)

In [13]:
vector_db_path = "Vectordb_chroma_course_recommendation"
os.makedirs(vector_db_path,exist_ok=True)

In [14]:
embeddings = GoogleGenerativeAIEmbeddings(model=embedding_model_name)

In [15]:
vectorstore = Chroma.from_documents(documents=docs, embedding=embeddings,
                                    persist_directory=vector_db_path,collection_name="assignment",
                                    collection_metadata={"use_type":"RECOMMENDATION"})

In [16]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 10})

In [17]:
print("Number of docs dumped into vector DB")
print(len(vectorstore.get()['ids']))

Number of docs dumped into vector DB
78


In [18]:
retrieved_docs = retriever.invoke("""I’ve completed the ‘Python Programming for Data Science’ course and enjoy data
visualization. What should I take next?""")

In [19]:
for doc in retrieved_docs:
    print(doc.page_content)
    print()


course_id: C016
title: Python Programming for Data Science
description: Learn Python fundamentals for data science: variables, control flow, functions, and object-oriented programming. Advance to data handling with pandas, numerical computing with NumPy, and basic plotting with matplotlib. You’ll build reproducible data workflows, clean and transform datasets, and perform exploratory analysis, laying the groundwork for machine learning and statistical modeling projects.

course_id: C016
title: Python Programming for Data Science
description: Learn Python fundamentals for data science: variables, control flow, functions, and object-oriented programming. Advance to data handling with pandas, numerical computing with NumPy, and basic plotting with matplotlib. You’ll build reproducible data workflows, clean and transform datasets, and perform exploratory analysis, laying the groundwork for machine learning and statistical modeling projects.

course_id: C016
title: Python Programming for Da

In [20]:
query = """I’ve completed the ‘Python Programming for Data Science’ course and enjoy data
visualization. What should I take next?"""

In [29]:
def retrieve_docs(query):
    retrieved_docs = retriever.invoke(query)
    results = list(dict.fromkeys([doc.page_content.strip() for doc in retrieved_docs]))
    return '\n\n'.join(results)

In [30]:
# retrieve_docs(query)

In [31]:
with gr.Blocks() as demo:
    gr.Markdown("Course Recommender")
    with gr.Row():
        with gr.Column(scale = 1):
            query_input = gr.Textbox(label = 'Enter your query',lines = 8)
            submit = gr.Button("Search")

        with gr.Column(scale = 4):
            output = gr.Textbox(label = 'Recommended Courses',lines = 20)
    
    submit.click(retrieve_docs,inputs = query_input,outputs = output)

    

In [32]:
demo.launch()

* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.




In [33]:
### Output shown in Gradio