In [37]:
# Install necessary libraries
!pip install faiss-cpu
!pip install langchain-community
!pip install sentence-transformers faiss-cpu gradio pandas



In [38]:
import faiss
import pandas as pd
import gradio as gr
from sentence_transformers import SentenceTransformer

In [39]:
# Load the dataset
data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/AnalyticsVidhya.csv")

In [40]:
# Preview the data to check structure
print(data.head())

                                               title  \
0           Frameworks for Effective Problem Solving   
1           Anyone can Build Al Agents - Free Course   
2  A Comprehensive Learning Path to Become a Data...   
3  Reimagining GenAl: Common Mistakes and Best Pr...   
4  Coding a ChatGPT-style Language Model from Scr...   

                                         description  \
0  This course focuses on various problem-solving...   
1  In this free course, learners will explore how...   
2  This course provides a structured roadmap to b...   
3  This course delves into the common pitfalls wh...   
4  This hands-on course teaches how to build a st...   

                                          curriculum  
0  Introduction to Problem Solving Frameworks\nTh...  
1  Introduction to AI Agents and Their Applicatio...  
2  Introduction to Data Analytics and the Data An...  
3  Overview of Generative AI (GenAI)\nCommon Mist...  
4  Introduction to Transformers and Language Mode..

In [41]:
# Check the column names in the dataset
print(data.columns)

Index(['title', 'description', 'curriculum'], dtype='object')


In [42]:
# Preprocessing the data (strip whitespaces)
data["title"] = data["title"].str.strip()
data["description"] = data["description"].str.strip()
data["curriculum"] = data["curriculum"].str.strip()

In [43]:
# Combine columns for embeddings
data["content"] = data["title"] + " " + data["description"] + " " + data["curriculum"]

In [44]:
# Drop rows with any missing values in the required columns
data = data.dropna(subset=["title", "description", "curriculum"])

In [45]:
# Initialize the Sentence-Transformer model (no OpenAI API key required)
model = SentenceTransformer('all-MiniLM-L6-v2')

In [46]:
print(data["content"].head())  # Print the first few rows of the content column

0    Frameworks for Effective Problem Solving This ...
1    Anyone can Build Al Agents - Free Course In th...
2    A Comprehensive Learning Path to Become a Data...
3    Reimagining GenAl: Common Mistakes and Best Pr...
4    Coding a ChatGPT-style Language Model from Scr...
Name: content, dtype: object


In [47]:
# Generate embeddings for the courses
embeddings = model.encode(data["content"].tolist(), show_progress_bar=True)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [48]:
# Convert the embeddings to a FAISS index
embedding_dim = embeddings.shape[1]
index = faiss.IndexFlatL2(embedding_dim)

In [49]:
# Add the embeddings to the FAISS index
index.add(embeddings)

In [50]:
# Function to search for courses based on the query
def search_courses(query):
    query_embedding = model.encode([query])  # Generate embedding for the query

    # Perform similarity search using FAISS
    distances, indices = index.search(query_embedding, k=5)  # Get top 5 nearest neighbors

    # Extract the corresponding titles, descriptions, and curriculums
    output = []
    for idx in indices[0]:  # Loop through the top 5 results
        course = data.iloc[idx]  # Get the course based on the index
        output.append({
            "title": course["title"],
            "description": course["description"],
            "curriculum": course["curriculum"]
        })
    return output

In [51]:
# Gradio search function to display results
def gradio_search(query):
    results = search_courses(query)
    return "\n\n".join([f"**Title:** {r['title']}\n**Description:** {r['description']}\n**Curriculum:** {r['curriculum']}" for r in results])

In [52]:
# Create the Gradio app
interface = gr.Interface(
    fn=gradio_search,
    inputs="text",
    outputs="text",
    title="Smart Search for Courses",
    description="Search Analytics Vidhya's free courses using smart search."
)

In [53]:
# Launch the app
interface.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://393839d3c645fbb695.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


