In [6]:
# Day 07 â€“ Basic RAG Style Search
# AlgoProfessor AI Internship

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

print("Day 07 - Retrieval Based Search System")


# Step 1: Creating a small knowledge dataset


data = [
    "Iris setosa flowers are small with short petals.",
    "Iris versicolor has medium sized petals.",
    "Iris virginica species contains long petals.",
    "Sepal length helps identify iris flower type.",
    "Petal width is useful for classification."
]

df = pd.DataFrame(data, columns=["description"])

print("\nDataset Preview:")
print(df.head())



Day 07 - Retrieval Based Search System

Dataset Preview:
                                        description
0  Iris setosa flowers are small with short petals.
1          Iris versicolor has medium sized petals.
2      Iris virginica species contains long petals.
3     Sepal length helps identify iris flower type.
4         Petal width is useful for classification.


In [7]:
# Step 2: Convert text into numerical vectors


vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform(df["description"])

print("\nVectorization completed")
print("Shape:", vectors.shape)



Vectorization completed
Shape: (5, 28)


In [8]:
# Step 3: Search function


def find_similar_text(user_query):

    query_vector = vectorizer.transform([user_query])

    similarity_scores = cosine_similarity(query_vector, vectors)

    best_index = similarity_scores.argmax()

    print("\nUser Query:", user_query)
    print("Best Match Found:")
    print(df.iloc[best_index]["description"])
    print("Similarity Score:", round(float(similarity_scores.max()),3))

In [9]:
# Step 4: Testing queries


find_similar_text("flower with long petals")
find_similar_text("classification feature")
find_similar_text("short flower")


User Query: flower with long petals
Best Match Found:
Iris virginica species contains long petals.
Similarity Score: 0.357

User Query: classification feature
Best Match Found:
Petal width is useful for classification.
Similarity Score: 0.408

User Query: short flower
Best Match Found:
Sepal length helps identify iris flower type.
Similarity Score: 0.281
