In [None]:
import os
import pandas as pd
from openai import OpenAI
from sklearn.metrics.pairwise import cosine_similarity
from dotenv import load_dotenv

load_dotenv()

client = OpenAI(api_key=os.getenv('API_KEY'))

True

In [None]:
# load pickle file
embedding_df = pd.read_pickle("embedded_activities.pkl")
embedding_df.sample(5)

In [3]:

def get_embedding(text, model="text-embedding-ada-002"):
    text = text.replace("\n", " ")
    return client.embeddings.create(input=[text], model=model).data[0].embedding

def find_k_nearest_neighbors(df, query_embedding, k=5):
    # Compute cosine similarity between query embedding and all embeddings in the DataFrame
    similarities = cosine_similarity([query_embedding], list(df['ada_embedding'].values))[0]

    # Find indices of the top K most similar embeddings
    top_indices = similarities.argsort()[-k:][::-1]

    # Extract the sentences corresponding to the top indices
    k_nearest_neighbors = df.iloc[top_indices]['text'].tolist()

    return k_nearest_neighbors

def get_top_search(question, n=5):
    print("Question:", question)
    question_embedding = get_embedding(question)

    k_nearest_neighbors = find_k_nearest_neighbors(embedding_df, question_embedding, k=n)
    print("K Nearest Neighbors:")
    for neighbor in k_nearest_neighbors:
        print(neighbor)

In [None]:
question = "Zoooomba"

get_top_search(question, n=10)

Question: Zoooomba
K Nearest Neighbors:
A1177 Aqua Zumba 9am Jurong Lake Gardens
A1208 Zumba Lite 7am Bishan Junction 8
A1129 Zumba Gold 9am Tampines Mall
A1223 Zen Stretch 7am MacPherson Park
A3324 Jogging 8am Everton Park
A1165 Groove Dance 9am VivoCity Sky Park
A1178 Dance Fitness Jam 8am AMK Hub
A1173 Brazilian Dance 5pm Eunos Community Centre
A1201 Morning Jog 6am East Coast Park
A1205 Jazzercise 5pm Buona Vista Park


In [13]:
question = "Cardio"

get_top_search(question, n=10)

Question: Cardio
K Nearest Neighbors:
A1182 Light Cardio 10am Pioneer Mall
A1227 Cardio Sculpt 7pm Bedok Central
A1155 Cardio Barre 8am Mount Faber Park
A1136 Full Body Cardio 7am Singapore General Hospital
A1200 Cardio Blitz 9am Tampines ActiveSG Gym
A1184 Core & Cardio 6pm Punggol Settlement
A1164 Cardio Dance Party 8am Sentosa Beach Station
A1135 Dance Cardio 8am Pasir Ris CC
A1196 High Energy Aerobics 8am Hillion Mall
A1205 Jazzercise 5pm Buona Vista Park


In [10]:
question = "East Coast"

get_top_search(question, n=5)


Question: East Coast
K Nearest Neighbors:
A1201 Morning Jog 6am East Coast Park
A1134 HIIT Bootcamp 6am East Coast Park
A1131 Tai Chi Basics 7am West Coast Park
A1176 Stretch & Align 7pm Eastpoint Mall
A1158 CrossFit Lite 6am  Downtown East D’Resort


In [None]:
question = "Cardio in Toa Payoh"

get_top_search(question, n=5)

Question: Cardio in Toa Payoh
K Nearest Neighbors:
A1200 Cardio Blitz 9am Tampines ActiveSG Gym
A1227 Cardio Sculpt 7pm Bedok Central
A1184 Core & Cardio 6pm Punggol Settlement
A1143 Gentle Stretching 9am Toa Payoh Central
A1150 Chair Aerobics 10am Bukit Timah Shopping Centre


In [14]:
question = "A1128"

get_top_search(question, n=5)

Question: A1128
K Nearest Neighbors:
A1133 Barre Fusion 11am Clementi SMC
A1178 Dance Fitness Jam 8am AMK Hub
A1162 Chair Yoga 11am Compass One
A1182 Light Cardio 10am Pioneer Mall
A1196 High Energy Aerobics 8am Hillion Mall
