In [7]:
from getpass import getpass as gp
username = input("Enter username: ")
api_key = gp("LLM Api Key: ")
git_key = gp("Git Api Key: ")

Enter username: fahds-eden
LLM Api Key: ··········
Git Api Key: ··········


In [8]:
!git clone https://{username}:{git_key}@github.com/{username}/iNAGO-Eats.git

Cloning into 'iNAGO-Eats'...
remote: Enumerating objects: 55, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (3/3), done.[K
remote: Total 55 (delta 0), reused 0 (delta 0), pack-reused 52 (from 1)[K
Receiving objects: 100% (55/55), 47.84 MiB | 16.26 MiB/s, done.
Resolving deltas: 100% (8/8), done.


In [9]:
import sys
sys.path.append('/content/iNAGO-Eats')

In [10]:
from prompts.preference_extraction_v1 import SYSTEM_PROMPT, USER_PROMPT
from sessions.conversation_parser import parse_and_combine_by_user
from config.aspects import ASPECTS
from LLM.openai import OpenAILLM
from LLM.preferance_extractor import extract_preferences_from_conversation
from LLM.suggest_memory_action import suggest_memory_actions
from user_profile.user_profile import UserProfile

In [11]:
llm = OpenAILLM(api_key, model_name="gpt-4")

# Setting Up User Profiles
Here we just set up two example user profiles. Currently profile class definitions are very basic.

Functions include:


*   Initialization of user_id and memory
*   has_memory: checks if user has previously used application
*   get_memory_view: displays simple interface for user memory
*   ingest_extracted_preferences:
*   apply_actions:







In [12]:
profiles = {}

user1 = UserProfile(100)
user2 = UserProfile(200)

profiles[user1.user_id] = user1
profiles[user2.user_id] = user2

profiles

{100: <user_profile.user_profile.UserProfile at 0x7a336d9b25a0>,
 200: <user_profile.user_profile.UserProfile at 0x7a336d9b3bc0>}

# First Conversation

In [13]:
conversation = [

    {"user_id": 100, "text": "I’m pretty hungry — just a heads up, I only eat halal food"},
    # IMPORTANT: hard dietary constraint → halal (mandatory)

    {"user_id": 200, "text": "Good to know. I don’t have strict rules, but I usually avoid pork"},
    # IMPORTANT: dietary constraint → no pork (soft)

    {"user_id": 100, "text": "Cuisine-wise I’m down for Asian food, like noodles or rice dishes"},
    # IMPORTANT: cuisine preference → Asian (soft)

    {"user_id": 200, "text": "Yeah Asian sounds good. I was also thinking Middle Eastern, but Asian might be easier"},
    # IMPORTANT: cuisine preference → Asian or Middle Eastern, slight lean Asian

    {"user_id": 100, "text": "I usually prefer places that are quick since I’ve got stuff to do"},
    # IMPORTANT: service speed preference → fast (initially strong)

    {"user_id": 200, "text": "I don’t really mind waiting as long as the food’s worth it"},
    # IMPORTANT: service speed → flexible / low priority

    {"user_id": 100, "text": "Yeah halal is still non-negotiable for me"},
    # IMPORTANT: reaffirmation → halal remains hard constraint

    {"user_id": 200, "text": "That’s fine with me. As long as there’s no pork, I’m good"},
    # IMPORTANT: confirms compatibility with halal constraint

    {"user_id": 100, "text": "Asian is probably the safest option then"},
    # IMPORTANT: reinforces cuisine → Asian (higher confidence)

    {"user_id": 200, "text": "Agreed. Asian food sounds better than Middle Eastern right now"},
    # IMPORTANT: narrows cuisine preference → Asian preferred

    {"user_id": 100, "text": "Actually, speed doesn’t matter that much if the place is good"},
    # IMPORTANT: service speed downgraded → no longer a hard preference

    {"user_id": 200, "text": "Same here. I’d rather avoid super heavy food though"},
    # IMPORTANT: food type preference → lighter meals

    {"user_id": 100, "text": "That works for me — something lighter with chicken would be ideal"},
    # IMPORTANT: protein preference → chicken

    {"user_id": 200, "text": "Perfect. As long as there are chicken or vegetarian options, I’m happy"}
    # IMPORTANT: protein preference → chicken or vegetarian
]


results = extract_preferences_from_conversation(llm, conversation)

In [14]:
from pprint import pprint
for user_id in [100, 200]:
  print(f"\nUser {user_id} preferences:")
  pprint(results[user_id]["preferences"])


User 100 preferences:
{'hard_preferences': [{'aspect': 'dietary',
                       'strength': 'strong',
                       'value': 'halal'}],
 'soft_preferences': [{'aspect': 'cuisine',
                       'strength': 'medium',
                       'value': 'Asian'},
                      {'aspect': 'time',
                       'strength': 'medium',
                       'value': 'quick'},
                      {'aspect': 'quality',
                       'strength': 'weak',
                       'value': 'good'},
                      {'aspect': 'cuisine',
                       'strength': 'weak',
                       'value': 'lighter chicken dishes'}]}

User 200 preferences:
{'hard_preferences': [{'aspect': 'dietary',
                       'strength': 'strong',
                       'value': 'no pork'},
                      {'aspect': 'dietary',
                       'strength': 'medium',
                       'value': 'chicken options'},
              

In [15]:
for user_id in [100, 200]:
  profiles[user_id].ingest_extracted_preferences(results[user_id]["preferences"])

In [17]:
for user_id in [100, 200]:
  print(f"\nUser {user_id} memory:")
  pprint(profiles[user_id].memory)


User 100 memory:
{'cuisine': {'asian': {'confidence': 0.6,
                       'evidence': 1,
                       'last_seen': datetime.datetime(2025, 12, 24, 18, 28, 57, 789738, tzinfo=datetime.timezone.utc)},
             'lighter chicken dishes': {'confidence': 0.3,
                                        'evidence': 1,
                                        'last_seen': datetime.datetime(2025, 12, 24, 18, 28, 57, 789750, tzinfo=datetime.timezone.utc)}},
 'dietary': {'halal': {'confidence': 0.9,
                       'evidence': 1,
                       'last_seen': datetime.datetime(2025, 12, 24, 18, 28, 57, 789728, tzinfo=datetime.timezone.utc)}},
 'quality': {'good': {'confidence': 0.3,
                      'evidence': 1,
                      'last_seen': datetime.datetime(2025, 12, 24, 18, 28, 57, 789746, tzinfo=datetime.timezone.utc)}},
 'time': {'quick': {'confidence': 0.6,
                    'evidence': 1,
                    'last_seen': datetime.datetime(2025, 

# Second Conversation

In [18]:
conversation_2 = [
    {"user_id": 100, "text": "I’m thinking Asian food tonight"},
    # IMPORTANT: cuisine preference → Asian (implicit continuation of past constraints)

    {"user_id": 200, "text": "Asian works for me. We should still make sure it fits your restrictions though"},
    # IMPORTANT: references remembered constraint (halal) without restating it

    {"user_id": 100, "text": "Yeah exactly — something light would be nice"},
    # IMPORTANT: food style preference → lighter food (implicit halal assumed)

    {"user_id": 200, "text": "Cool. I’m fine with chicken or veggie options"},
    # IMPORTANT: protein preference → chicken / vegetarian (soft)

    {"user_id": 100, "text": "Also no rush, I’m not in a hurry anymore"},
    # IMPORTANT: time preference downgraded → speed not important

    {"user_id": 200, "text": "Perfect, then we can prioritize quality over speed"}
    # IMPORTANT: quality preference → worth waiting
]



results_2 = extract_preferences_from_conversation(llm, conversation_2)

In [19]:
for user_id in [100, 200]:
  print(f"\nUser {user_id} preferences:")
  pprint(results_2[user_id]["preferences"])


User 100 preferences:
{'hard_preferences': [],
 'soft_preferences': [{'aspect': 'cuisine',
                       'strength': 'strong',
                       'value': 'Asian'},
                      {'aspect': 'quality',
                       'strength': 'medium',
                       'value': 'light'},
                      {'aspect': 'time',
                       'strength': 'medium',
                       'value': 'not in a hurry'}]}

User 200 preferences:
{'hard_preferences': [],
 'soft_preferences': [{'aspect': 'cuisine',
                       'strength': 'weak',
                       'value': 'Asian'},
                      {'aspect': 'dietary',
                       'strength': 'medium',
                       'value': 'chicken or veggie options'},
                      {'aspect': 'quality',
                       'strength': 'strong',
                       'value': 'high'}]}


# Updating User Memory Using Conversation Data

In [22]:
for user_id in [100, 200]:
  actions = suggest_memory_actions(
      llm,
      profiles[user_id].get_memory_view(),
      results_2[user_id]["preferences"]
  )
  print(f"User {user_id} actions:")
  pprint(actions)
  profiles[user_id].apply_actions(actions)
  print(f"User {user_id} updated memory:")
  pprint(profiles[user_id].memory)

User 100 actions:
[{'action': 'add', 'aspect': 'cuisine', 'strength': 'strong', 'value': 'Asian'},
 {'action': 'weaken',
  'aspect': 'quality',
  'strength': 'medium',
  'value': 'good'},
 {'action': 'weaken', 'aspect': 'time', 'strength': 'medium', 'value': 'quick'}]
User 100 updated memory:
{'cuisine': {'asian': {'confidence': 0.9,
                       'evidence': 1,
                       'last_seen': datetime.datetime(2025, 12, 24, 18, 33, 54, 227218, tzinfo=datetime.timezone.utc)},
             'lighter chicken dishes': {'confidence': 0.3,
                                        'evidence': 1,
                                        'last_seen': datetime.datetime(2025, 12, 24, 18, 28, 57, 789750, tzinfo=datetime.timezone.utc)}},
 'dietary': {'halal': {'confidence': 0.9,
                       'evidence': 1,
                       'last_seen': datetime.datetime(2025, 12, 24, 18, 28, 57, 789728, tzinfo=datetime.timezone.utc)}},
 'quality': {'good': {'confidence': 0.18,
           

# Testing Embedder

In [23]:
from embeddings.embedder import ReviewEmbedder
from embeddings.index import EmbeddingIndex
from embeddings.search import EmbeddingSearch

In [24]:
import pandas as pd
reviews = pd.read_csv("/content/iNAGO-Eats/data/yelp_restaurants_toronto_reviews.csv")
print(f"Loaded {len(reviews)} reviews")

Loaded 29277 reviews


In [26]:
reviews.head()

Unnamed: 0,place_id,rating,date,text,language,user_name,user_location,useful,funny,cool,photo
0,mZRKH9ngRY92bI_irrHq6w,5,2025-08-20T00:51:10Z,We went for the Six Course Chef's Tasting Menu...,en,Eugene L.,"North York, Toronto, Canada",0,0,0,https://s3-media0.fl.yelpcdn.com/bphoto/v0UN8t...
1,mZRKH9ngRY92bI_irrHq6w,4,2025-12-16T01:32:17Z,Richmond Station has been on my list for years...,en,Sean K.,"Toronto, Canada",0,0,0,https://s3-media0.fl.yelpcdn.com/bphoto/vdLsel...
2,mZRKH9ngRY92bI_irrHq6w,4,2025-11-28T02:54:23Z,"If Richmond Station were a person, it would be...",en,Jack A.,"Los Angeles, CA",0,0,0,https://s3-media0.fl.yelpcdn.com/bphoto/YgIXQs...
3,mZRKH9ngRY92bI_irrHq6w,4,2025-09-06T16:40:17Z,Visited here for work with a client for anothe...,en,Jo-Anne L.,"Toronto, Canada",2,0,1,https://s3-media0.fl.yelpcdn.com/bphoto/STJMms...
4,mZRKH9ngRY92bI_irrHq6w,5,2025-09-23T17:12:51Z,Very nice spot! A service included restaurant ...,en,Boris N.,"North York, Toronto, Canada",0,0,0,https://s3-media0.fl.yelpcdn.com/bphoto/83Zt_q...


In [31]:
texts = reviews["text"].tolist()

embedder = ReviewEmbedder() #pass model name here to add new

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
embeddings = embedder.embed(texts)

print("Embeddings shape:", embeddings.shape)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/229 [00:00<?, ?it/s]

Embeddings shape: (29277, 384)


In [None]:
metadata = reviews[["place_id"]].to_dict(orient="records")

index = EmbeddingIndex()
index.build(embeddings, metadata)

In [None]:
index.save("/content/iNAGO-Eats/embeddings.npy")

In [27]:
index = EmbeddingIndex()
index.load("/content/iNAGO-Eats/embeddings/all-MiniLM-L6-v2/embeddings.npy")

In [29]:
from embeddings.rank_restaurants import rank_restaurants

In [35]:
query = "looking for comfort food that isnt too greasy. Prices should be pretty cheap please."
query_vec = embedder.embed([query])[0]

top_restaurants = rank_restaurants(index, query_vec, top_k=5, top_reviews_per_rest=10)

for place_id, score in top_restaurants:
    print(place_id, round(score, 3))

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

7PK9PIdWdPtAp0gB31hO7Q 0.566
2SyYW8GiDZsqtPb-aCbosg 0.551
WEm23ZUFuENSJj-bk0Ugkg 0.542
7gnYWanhD6O7qOF-NfPNhA 0.541
eD-XLSo-j8uIoTnIlrG8YA 0.534
