## Install necessary libraries

In [51]:
import os, sys
os.environ["TOKENIZERS_PARALLELISM"] = 'false'

# Redirect output to devnull
sys.stdout = open(os.devnull, 'w')


# install required dependencies
!pip install faiss-cpu
!pip install sentence-transformers
!pip install pandas

# Reset stdout to default
sys.stdout = sys.__stdout__

## Import required libraries

In [26]:
import pandas as pd
#pd.set_option("diplay.max_colwidth",100)
from sentence_transformers import SentenceTransformer
import faiss

## Load Data from CSV/ List

In [27]:
data = [
    # Health
    ("Fruits are rich in vitamins and antioxidants, promoting better health.", "Health"),
    ("Regular exercise can help maintain a healthy body and mind.", "Health"),
    ("Drinking plenty of water every day is essential for hydration.", "Health"),
    ("Eating a balanced diet with plenty of vegetables supports long-term health.", "Health"),
    ("Getting enough sleep each night is crucial for overall wellness.", "Health"),
    
    # Fashion
    ("The fall collection features oversized jackets and vintage boots.", "Fashion"),
    ("Denim skirts are making a comeback this season in various styles.", "Fashion"),
    ("Layering is the key to styling your wardrobe for colder weather.", "Fashion"),
    ("Street style this year is all about bold prints and oversized silhouettes.", "Fashion"),
    ("Sustainable fashion is growing in popularity as consumers become more eco-conscious.", "Fashion"),
    
    # Events
    ("The annual jazz festival will be held at the city park this weekend.", "Event"),
    ("Join the local community for the winter arts and crafts fair next Saturday.", "Event"),
    ("A new theater production opens this Friday, featuring a cast of emerging talents.", "Event"),
    ("The summer carnival is just around the corner, offering fun rides and food stalls.", "Event"),
    ("The charity auction is scheduled for next month at the convention center.", "Event"),
    
    # Travel
    ("For a tropical escape, you can visit the beautiful beaches of Bali.", "Travel"),
    ("Exploring the ancient ruins of Machu Picchu is a must-do for any traveler.", "Travel"),
    ("Paris is renowned for its art, culture, and iconic landmarks like the Eiffel Tower.", "Travel"),
    ("A road trip along the Pacific Coast Highway offers stunning ocean views.", "Travel"),
    ("New Zealand offers a wide range of outdoor adventures, from hiking to bungee jumping.", "Travel"),
    
    # Food & Dining
    ("Italian cuisine is known for its rich flavors, especially in pasta and pizza dishes.", "Food & Dining"),
    ("A healthy salad with quinoa and fresh vegetables is perfect for lunch.", "Food & Dining"),
    ("The local bakery offers a variety of fresh pastries and sandwiches every morning.", "Food & Dining"),
    ("Seafood lovers can enjoy the freshest catch at the coastal restaurant.", "Food & Dining"),
    ("Gourmet coffee paired with dark chocolate makes a perfect afternoon treat.", "Food & Dining"),
    
    # Technology
    ("The latest smartphone model offers advanced camera features and a longer battery life.", "Technology"),
    ("Virtual reality is revolutionizing the gaming industry with more immersive experiences.", "Technology"),
    ("AI technology is becoming a valuable tool in various industries, including healthcare.", "Technology"),
    ("Smart homes are increasingly common, with devices like smart thermostats and lights.", "Technology"),
    ("The latest software update includes several security enhancements and new features.", "Technology"),
    
    # Health & Fitness
    ("Yoga is a great way to improve flexibility and reduce stress levels.", "Health & Fitness"),
    ("Cardio exercises, like running or cycling, are effective for burning calories.", "Health & Fitness"),
    ("Strength training can help increase muscle mass and improve metabolism.", "Health & Fitness"),
    ("Drinking green tea is known for its antioxidant properties and health benefits.", "Health & Fitness"),
    ("Getting at least 30 minutes of physical activity every day promotes overall health.", "Health & Fitness"),
    
    # Sports
    ("Football is one of the most popular sports worldwide, enjoyed by millions of fans.", "Sports"),
    ("Tennis players often use a combination of agility and power to win matches.", "Sports"),
    ("Basketball games are fast-paced, requiring quick reflexes and teamwork.", "Sports"),
    ("Golf requires precision and focus, with players aiming to get the ball into the hole with as few strokes as possible.", "Sports"),
    ("Swimming is a great full-body workout that improves endurance and strength.", "Sports"),
    
    # Music & Entertainment
    ("The Grammy Awards celebrate outstanding achievements in the music industry.", "Music & Entertainment"),
    ("Rock bands often perform at large stadiums, attracting thousands of fans.", "Music & Entertainment"),
    ("Indie artists are gaining popularity for their unique sound and creative approach to music.", "Music & Entertainment"),
    ("Pop music continues to dominate the charts with catchy tunes and upbeat rhythms.", "Music & Entertainment"),
    ("The film festival showcases the best of international cinema each year.", "Music & Entertainment"),
    
    # Education & Learning
    ("Online learning platforms are providing accessible education to people worldwide.", "Education & Learning"),
    ("Reading books on history can help expand your knowledge of different cultures.", "Education & Learning"),
    ("Learning a second language opens up new opportunities for travel and work.", "Education & Learning"),
    ("Study groups can be a great way to share knowledge and improve exam performance.", "Education & Learning"),
    ("Taking notes during lectures is an effective way to retain information.", "Education & Learning"),
    
    # Finance & Shopping
    ("Shopping online offers convenience and often better deals than in-store shopping.", "Finance & Shopping"),
    ("Using a budget helps you manage your spending and save money for future goals.", "Finance & Shopping"),
    ("Investing in stocks can be a good way to grow your wealth over time.", "Finance & Shopping"),
    ("Sales events, such as Black Friday, provide great opportunities to buy discounted items.", "Finance & Shopping"),
    ("It’s important to compare prices before making any major purchase to get the best deal.", "Finance & Shopping"),
    
    # Lifestyle
    ("Living a minimalist lifestyle can help reduce clutter and create a sense of calm.", "Lifestyle"),
    ("Taking a walk in nature can be a great way to relax and clear your mind.", "Lifestyle"),
    ("Volunteering for a cause you care about is a meaningful way to give back to the community.", "Lifestyle"),
    ("Practicing mindfulness can help reduce stress and improve mental well-being.", "Lifestyle"),
    ("Traveling allows you to experience different cultures and expand your perspective.", "Lifestyle")
]


df = pd.DataFrame(data, columns=['text', 'category'])
# df= pd.read_csv("sample_sentenses.csv")
df.shape

(60, 2)

In [28]:
df

Unnamed: 0,text,category
0,"Fruits are rich in vitamins and antioxidants, ...",Health
1,Regular exercise can help maintain a healthy b...,Health
2,Drinking plenty of water every day is essentia...,Health
3,Eating a balanced diet with plenty of vegetabl...,Health
4,Getting enough sleep each night is crucial for...,Health
5,The fall collection features oversized jackets...,Fashion
6,Denim skirts are making a comeback this season...,Fashion
7,Layering is the key to styling your wardrobe f...,Fashion
8,Street style this year is all about bold print...,Fashion
9,Sustainable fashion is growing in popularity a...,Fashion


## Make vectors for loaded data

In [29]:
encoder = SentenceTransformer("all-mpnet-base-v2")
vectors = encoder.encode(df.text)
vectors.shape


(60, 768)

## Get Vectors shape (Dimension)

In [30]:
dim = vectors.shape[1]
dim

768

## Add vectors to FAISS Index 

In [31]:
index = faiss.IndexFlatL2(dim)
index

<faiss.swigfaiss.IndexFlatL2; proxy of <Swig Object of type 'faiss::IndexFlatL2 *' at 0xffff37e237e0> >

In [32]:
index.add(vectors)

## Convert Search query to Vector

In [33]:
search_query = "I want to buy a good formal shirt for office"
vec = encoder.encode(search_query)
vec.shape

(768,)

## Convert single dimensional array to two dimensional array

In [34]:
import numpy as np
svec = np.array(vec).reshape(1, -1)
svec.shape

(1, 768)

In [35]:
# Here Vec is single array
vec

array([-2.57991031e-02,  3.55879515e-02,  2.27258494e-03,  1.42115420e-02,
       -1.89389195e-02, -4.56868149e-02,  2.11166739e-02,  6.95221797e-02,
       -6.61924388e-03,  4.23663743e-02,  9.82517283e-03, -3.23184766e-02,
        1.69982798e-02,  8.90896022e-02, -4.44575585e-03,  1.31926714e-02,
        2.76567936e-02,  2.50751842e-02,  6.37881011e-02, -1.50687788e-02,
       -2.26934440e-02, -7.65429065e-03, -5.28059027e-04, -1.39976125e-02,
        4.16568518e-02, -2.81298403e-02,  3.22693735e-02,  3.33307050e-02,
       -5.75153045e-02,  3.22678387e-02,  8.38346034e-02, -1.52588333e-03,
       -2.55068298e-02,  3.87023725e-02,  1.17861646e-06,  2.51762290e-02,
        3.66419144e-02, -6.88562617e-02,  1.73385479e-02,  1.25989702e-03,
        2.27525942e-02,  6.48351386e-02, -4.26584929e-02, -3.76666896e-03,
       -8.12522322e-03, -1.32075688e-02, -1.55175775e-02,  4.73976135e-02,
       -5.46760783e-02,  5.09973941e-03, -2.01406069e-02,  1.12968134e-02,
       -2.07993574e-02, -

In [36]:
# Here svec is Array of array
svec

array([[-2.57991031e-02,  3.55879515e-02,  2.27258494e-03,
         1.42115420e-02, -1.89389195e-02, -4.56868149e-02,
         2.11166739e-02,  6.95221797e-02, -6.61924388e-03,
         4.23663743e-02,  9.82517283e-03, -3.23184766e-02,
         1.69982798e-02,  8.90896022e-02, -4.44575585e-03,
         1.31926714e-02,  2.76567936e-02,  2.50751842e-02,
         6.37881011e-02, -1.50687788e-02, -2.26934440e-02,
        -7.65429065e-03, -5.28059027e-04, -1.39976125e-02,
         4.16568518e-02, -2.81298403e-02,  3.22693735e-02,
         3.33307050e-02, -5.75153045e-02,  3.22678387e-02,
         8.38346034e-02, -1.52588333e-03, -2.55068298e-02,
         3.87023725e-02,  1.17861646e-06,  2.51762290e-02,
         3.66419144e-02, -6.88562617e-02,  1.73385479e-02,
         1.25989702e-03,  2.27525942e-02,  6.48351386e-02,
        -4.26584929e-02, -3.76666896e-03, -8.12522322e-03,
        -1.32075688e-02, -1.55175775e-02,  4.73976135e-02,
        -5.46760783e-02,  5.09973941e-03, -2.01406069e-0

## Get three nearest Index from FAISS for given Search query's vector

In [37]:
distance, I = index.search(svec, k=3)
print (distance)
print (I)

[[1.3139373 1.4744456 1.551373 ]]
[[9 5 7]]


## Display Top 3 answers

In [38]:
print (f"Search Query: {search_query}")
print ("-"*40)
print ("Possible 3 answers")
print ("-"*40)
print (f"Answer :{df.loc[I[0]]}")

Search Query: I want to buy a good formal shirt for office
----------------------------------------
Possible 3 answers
----------------------------------------
Answer :                                                text category
9  Sustainable fashion is growing in popularity a...  Fashion
5  The fall collection features oversized jackets...  Fashion
7  Layering is the key to styling your wardrobe f...  Fashion


In [39]:
def get_svec(search_query):
    vec = encoder.encode(search_query)
    vec.shape
    # Convert single dimensional Array to two dimensional Array
    import numpy as np
    svec = np.array(vec).reshape(1, -1)
    svec.shape
    return svec

def get_index_search(svec, k = 2):
    distance, I = index.search(svec, k=k)
    print (distance)
    print (I)
    return distance, I

## Query-1

In [40]:
search_query = "An apple a day keeps the doctor away"
svec = get_svec(search_query)
distance, I = get_index_search(svec, k=1)
df.loc[I[0]]

[[1.0341485]]
[[0]]


Unnamed: 0,text,category
0,"Fruits are rich in vitamins and antioxidants, ...",Health


## Query-2

In [41]:
search_query = "How can physical activity help boost your energy levels?"
svec = get_svec(search_query)
distance, I = get_index_search(svec, k=1)
df.loc[I[0]]

[[0.7457862]]
[[1]]


Unnamed: 0,text,category
1,Regular exercise can help maintain a healthy b...,Health


## Query-3

In [42]:
search_query = "What cultural gatherings are happening around the city this weekend?"
svec = get_svec(search_query)
distance, I = get_index_search(svec, k=1)
df.loc[I[0]]

[[1.0544972]]
[[11]]


Unnamed: 0,text,category
11,Join the local community for the winter arts a...,Event


## Query-4

In [43]:
search_query = "What are some delicious meals with fresh ingredients that are easy to prepare?"
svec = get_svec(search_query)
distance, I = get_index_search(svec, k=1)
df.loc[I[0]]

[[0.99939656]]
[[21]]


Unnamed: 0,text,category
21,A healthy salad with quinoa and fresh vegetabl...,Food & Dining


## Query-5

In [44]:
search_query = "What new gadgets are people excited about this year?"
svec = get_svec(search_query)
distance, I = get_index_search(svec, k=1)
df.loc[I[0]]

[[1.197061]]
[[28]]


Unnamed: 0,text,category
28,"Smart homes are increasingly common, with devi...",Technology


## Multiple queries and Extracted answers

In [45]:
queries = [
    'What are some habits that improve overall well-being?', 
    'How can physical activity help boost your energy levels?', 
    'What are some everyday practices for staying hydrated?', 
    'How does eating a variety of foods benefit your body?', 
    'Why is getting enough rest vital for productivity?', 
    'What are the top trends for the cooler months this year?', 
    'How can I update my wardrobe with a vintage feel?', 
    'How should I dress for unpredictable weather?', 
    'What should I wear to stay comfortable and stylish in cold conditions?', 
    'How can I make environmentally conscious choices with my clothing?', 
    'What cultural gatherings are happening around the city this weekend?', 
    'Where can I find a fun and creative community fair this month?', 
    'What kind of performances are new in the local theater scene?', 
    'What fun activities can I look forward to during the summer?', 
    'How can I participate in a fundraising event next month?', 
    'What destinations offer beautiful beaches and year-round sunshine?', 
    'Where can I visit to experience ancient architecture and history?', 
    'What iconic places should I explore when in Europe?', 
    'How can I enjoy scenic drives with breathtaking views?', 
    'What destinations are perfect for adventure seekers?', 
    'What are some delicious meals with fresh ingredients that are easy to prepare?', 
    'Where can I find light and healthy meal options for lunch?', 
    'What kinds of treats are best enjoyed with your morning coffee?', 
    'What are the top spots for fresh seafood and local dishes?', 
    'Where can I find delicious baked goods in the morning?', 
    'What new gadgets are people excited about this year?', 
    'How is technology enhancing interactive experiences?', 
    'What advances are being made in artificial intelligence?', 
    'What devices make managing a smart home easier?', 
    'How do software updates improve security and functionality?', 
    'What exercises can help with flexibility and reducing tension?', 
    'What are the best workouts for burning calories efficiently?', 
    'How can strength training help with your fitness goals?', 
    'How can I incorporate healthy drinks into my routine?', 
    'How can daily activity contribute to better health?', 
    'Why do so many people love watching football around the world?', 
    'What athletic skills are key to success in individual sports?', 
    'How do team sports foster cooperation and communication?', 
    'What makes golf challenging yet enjoyable for players?', 
    'How does swimming contribute to physical fitness?', 
    'What awards highlight the best achievements in the music world?', 
    'What kind of music events attract large crowds and excitement?', 
    'How is the indie music scene influencing modern culture?', 
    'What music styles are dominating the charts lately?', 
    'What film festivals showcase the best new international films?', 
    'How are virtual classrooms changing the way people learn?', 
    'What books can expand your understanding of the past?', 
    'How can speaking a new language open up career and travel opportunities?', 
    'What are some ways to collaborate with peers to improve academic performance?', 
    'How can writing notes during class help you absorb information better?', 
    'What are the benefits of shopping from the comfort of your home?', 
    'How can managing your expenses help you reach your financial goals?', 
    'How do people grow their savings through investments?'
]

In [50]:
for query in queries:
    svec = get_svec(query)
    distance, I = get_index_search(svec, k=1)
    print ("_"*100)
    print (f"Query : {query}")
    print (f"Answer : {df.loc[I[0]]}")
    print ("_"*100)

[[0.8365221]]
[[1]]
____________________________________________________________________________________________________
Query : What are some habits that improve overall well-being?
Answer :                                                 text category
1  Regular exercise can help maintain a healthy b...   Health
____________________________________________________________________________________________________
[[0.7457862]]
[[1]]
____________________________________________________________________________________________________
Query : How can physical activity help boost your energy levels?
Answer :                                                 text category
1  Regular exercise can help maintain a healthy b...   Health
____________________________________________________________________________________________________
[[0.6435116]]
[[2]]
____________________________________________________________________________________________________
Query : What are some everyday practices f