In [2]:
import os
import pandas as pd
from openai import OpenAI
from sklearn.metrics.pairwise import cosine_similarity
from dotenv import load_dotenv

load_dotenv()

client = OpenAI(api_key=os.getenv('API_KEY'))

In [3]:
# load pickle file
embedding_df = pd.read_pickle("data/embedded_activities.pkl")
embedding_df.sample(5)

Unnamed: 0,Event ID,Activity,Time,Location,text,ada_embedding
80,A1207,Guided Stretch,9am,Upper Thomson Park,A1207 Guided Stretch 9am Upper Thomson Park,"[0.006176583468914032, 0.0073325554840266705, ..."
72,A1200,Cardio Blitz,9am,Tampines ActiveSG Gym,A1200 Cardio Blitz 9am Tampines ActiveSG Gym,"[-0.029249368235468864, -0.011362254619598389,..."
57,A1185,Group Functional Training,7pm,Yew Tee Point,A1185 Group Functional Training 7pm Yew Tee Point,"[-0.010567406192421913, -0.01499401405453682, ..."
66,A1194,Strong Nation,7pm,Tanjong Pagar Centre,A1194 Strong Nation 7pm Tanjong Pagar Centre,"[0.008739463053643703, -0.022288653999567032, ..."
18,A1146,Kickboxing Fitness,6pm,Jurong East MRT Plaza,A1146 Kickboxing Fitness 6pm Jurong East MRT P...,"[0.004646248184144497, -0.015833109617233276, ..."


In [4]:

def get_embedding(text, model="text-embedding-ada-002"):
    text = text.replace("\n", " ")
    return client.embeddings.create(input=[text], model=model).data[0].embedding

def find_k_nearest_neighbors(df, query_embedding, k=5):
    # Compute cosine similarity between query embedding and all embeddings in the DataFrame
    similarities = cosine_similarity([query_embedding], list(df['ada_embedding'].values))[0]

    # Find indices of the top K most similar embeddings
    top_indices = similarities.argsort()[-k:][::-1]

    # Extract the sentences corresponding to the top indices
    k_nearest_neighbors = df.iloc[top_indices]['text'].tolist()

    return k_nearest_neighbors

def get_top_search(question, n=5):
    print("Question:", question)
    question_embedding = get_embedding(question)

    k_nearest_neighbors = find_k_nearest_neighbors(embedding_df, question_embedding, k=n)
    print("K Nearest Neighbors:")
    for neighbor in k_nearest_neighbors:
        print(neighbor)

In [5]:
question = "Zoooomba"

get_top_search(question, n=10)

Question: Zoooomba
K Nearest Neighbors:
A1177 Aqua Zumba 9am Jurong Lake Gardens
A1208 Zumba Lite 7am Bishan Junction 8
A1129 Zumba Gold 9am Tampines Mall
A1223 Zen Stretch 7am MacPherson Park
A3324 Jogging 8am Everton Park
A1165 Groove Dance 9am VivoCity Sky Park
A1178 Dance Fitness Jam 8am AMK Hub
A1173 Brazilian Dance 5pm Eunos Community Centre
A1201 Morning Jog 6am East Coast Park
A1205 Jazzercise 5pm Buona Vista Park


In [6]:
question = "Cardio"

get_top_search(question, n=10)

Question: Cardio
K Nearest Neighbors:
A1182 Light Cardio 10am Pioneer Mall
A1227 Cardio Sculpt 7pm Bedok Central
A1155 Cardio Barre 8am Mount Faber Park
A1136 Full Body Cardio 7am Singapore General Hospital
A1200 Cardio Blitz 9am Tampines ActiveSG Gym
A1184 Core & Cardio 6pm Punggol Settlement
A1164 Cardio Dance Party 8am Sentosa Beach Station
A1135 Dance Cardio 8am Pasir Ris CC
A1196 High Energy Aerobics 8am Hillion Mall
A1205 Jazzercise 5pm Buona Vista Park


In [7]:
question = "East Coast"

get_top_search(question, n=10)


Question: East Coast
K Nearest Neighbors:
A1201 Morning Jog 6am East Coast Park
A1134 HIIT Bootcamp 6am East Coast Park
A1131 Tai Chi Basics 7am West Coast Park
A1176 Stretch & Align 7pm Eastpoint Mall
A1158 CrossFit Lite 6am  Downtown East D’Resort
A1217 Energy Barre 6am River Valley
A1145 Yin Yoga 10am Harbourfront Tower
A1198 Street Dance 5pm Great World City
A1219 Aqua Strength 10am Simei Eastpoint
A1154 Line Dancing 11am Chinatown Point


In [10]:
question = "Cardio in Toa Payoh"

get_top_search(question, n=10)

Question: Cardio in Toa Payoh
K Nearest Neighbors:
A1200 Cardio Blitz 9am Tampines ActiveSG Gym
A1227 Cardio Sculpt 7pm Bedok Central
A1184 Core & Cardio 6pm Punggol Settlement
A1143 Gentle Stretching 9am Toa Payoh Central
A1150 Chair Aerobics 10am Bukit Timah Shopping Centre
A1164 Cardio Dance Party 8am Sentosa Beach Station
A1190 Rhythm Aerobics 8am Clementi MRT Plaza
A1212 Active Seniors Fitness 10am Punggol Plaza
A1135 Dance Cardio 8am Pasir Ris CC
A1136 Full Body Cardio 7am Singapore General Hospital


In [11]:
question = "A1128"

get_top_search(question, n=10)

Question: A1128
K Nearest Neighbors:
A1133 Barre Fusion 11am Clementi SMC
A1178 Dance Fitness Jam 8am AMK Hub
A1162 Chair Yoga 11am Compass One
A1182 Light Cardio 10am Pioneer Mall
A1196 High Energy Aerobics 8am Hillion Mall
A1160 Body Toning 8am Woodlands Central
A1154 Line Dancing 11am Chinatown Point
A1192 Combat Training 6pm Sports Hub
A1221 Dynamic Pilates 8am Anchorpoint Mall
A1136 Full Body Cardio 7am Singapore General Hospital


In [13]:
question = "Light Exercises for Retirees in Toa Payoh"

get_top_search(question, n=20)

Question: Light Exercises for Retirees in Toa Payoh
K Nearest Neighbors:
A1212 Active Seniors Fitness 10am Punggol Plaza
A1143 Gentle Stretching 9am Toa Payoh Central
A1150 Chair Aerobics 10am Bukit Timah Shopping Centre
A1199 Gentle Pilates 7am Kallang ActiveSG Gym
A1161 Mat Pilates 10am Holland Village
A1191 Beginner Yoga 9am Boon Lay Park
A1200 Cardio Blitz 9am Tampines ActiveSG Gym
A1169 Total Body Workout 9am Ang Mo Kio Park
A1136 Aerobics Basics 7pm Punggol Waterway Point
A1171 Power Pilates 10am Paya Lebar Quarter
A1190 Rhythm Aerobics 8am Clementi MRT Plaza
A1193 Slow Flow Yoga 8am Yio Chu Kang Sports Centre
A1140 Resistance Band Sculpt 8am Serangoon Gardens
A1177 Aqua Zumba 9am Jurong Lake Gardens
A1132 Aqua Aerobics 10am Yishun Safra
A1149 Core & Balance 9am Choa Chu Kang Park
A1159 Functional Stretching 7pm Bukit Panjang Plaza
A1227 Cardio Sculpt 7pm Bedok Central
A1129 Zumba Gold 9am Tampines Mall
A1139 Functional Training 6pm Bedok Reservoir
