In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
from huggingface_hub import login
login(new_session=False)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd "drive/MyDrive/summer"

In [5]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Mon Jul 21 08:32:24 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |
| N/A   30C    P0             45W /  400W |       5MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
!pip install -U bitsandbytes
!pip install -U transformers
!pip install -q torch accelerate sentence-transformers faiss-cpu nltk

import pandas as pd
import numpy as np
import torch
from transformers import (
    AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
)
from sentence_transformers import SentenceTransformer
import faiss

In [None]:
data = pd.read_csv("cleaned_restaurants.csv")
data["text"] = data["text"].fillna("<no_text>")
data["stars"] = data["stars"].fillna(0)

import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()
data["sentiment"] = data["text"].map(lambda t: sia.polarity_scores(str(t))["compound"])

In [9]:
quality_df = (
    data
      .groupby(["title", "categoryName"], as_index=False)
      .agg(
          avg_sentiment     = ("sentiment",    "mean"),
          stars             = ("stars",        "mean"),
          true_review_count = ("text",         "count"),
          reviewsCount      = ("reviewsCount", "max"),
          url               = ("url",          "first"),
          website           = ("website",      "first"),
      )
)

quality_df["stars_norm"] = quality_df["stars"] / 5.0
quality_df["count_norm"] = quality_df["true_review_count"] / quality_df["true_review_count"].max()
w_sent, w_stars, w_count = 0.3, 0.5, 0.2
quality_df["score"] = (
      w_sent  * quality_df["avg_sentiment"]
    + w_stars * quality_df["stars_norm"]
    + w_count * quality_df["count_norm"]
)

In [10]:
def make_chunk(row):
    return (
        f"{row.title} ({row.categoryName}) — score {row.score:.2f}, "
        f"{row.stars:.1f}★, {row.true_review_count} reviews. "
        f"Website: {row.website} – Maps: {row.url}"
    )

chunks = quality_df.apply(make_chunk, axis=1).tolist()

In [None]:
embedder = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedder.encode(chunks, convert_to_numpy=True)
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

In [None]:
model_id = "meta-llama/Llama-3.1-8B-Instruct"
bnb = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
    model_id, quantization_config=bnb, device_map="auto"
)
text_generator = pipeline(
    "text-generation", model=model, tokenizer=tokenizer,
    do_sample=True, max_new_tokens=512,
    temperature=0.7, top_p=0.95
)

In [13]:
import re

SYSTEM = (
    "You are a knowledgable person who can recommend best restaurants to visit based on user's needs."
    "You speak warmly and politely, like a real person."
    "If the guest asks for a recommendation, pull in context and suggest top options."
    "Make sure to skip a line every sentence so your response is more readable for user."
)

recommend_intent = re.compile(r"\b(recommend|suggest|find|want)\b", flags=re.I)

def recommend(query, k=5):
    is_recommend = bool(recommend_intent.search(query))
    rag_block = ""
    if is_recommend:
        qvec, ids = embedder.encode([query]), index.search(embedder.encode([query]), k)[1]
        rows = quality_df.iloc[ids[0]].sort_values("score", ascending=False).head(k)
        context = "\n".join(make_chunk(r) for _, r in rows.iterrows())
        rag_block = f"\n[Context]\n{context}\n"

    prompt = (
        SYSTEM
        + rag_block
        + f"\nUser: {query}\nAssistant: "
    )

    raw = text_generator(
        prompt,
        return_full_text=False,
        do_sample=True,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.95
    )[0]["generated_text"]

    reply = raw.split("\nUser:")[0].strip()
    return reply


if __name__ == "__main__":
    print("Welcome to the Atlanta Restaurant Chat! (type ‘exit’ to quit)\n")
    while True:
        user_input = input("You: ").strip()
        if user_input.lower() in ("exit", "quit"):
            print("👋 Goodbye!")
            break

        bot_reply = recommend(user_input)
        print(f"Assistant: {bot_reply}\n")

Welcome to the Atlanta Restaurant Chat! (type ‘exit’ to quit)

You: Hello!
Assistant: Welcome! What brings you here today?

You: I need some recommendation for my dinner. 
Assistant: Ah, dinner plans, how exciting! 

I'd be happy to help you find a great spot to dine. Could you please tell me what type of cuisine you're in the mood for? Are you looking for something specific, like Italian, Mexican, or maybe something a bit more exotic?

Also, do you have a specific price range in mind or any dietary restrictions I should keep in mind? 

This will help me give you the most tailored recommendations.

You: I want korean bbq place with at least 4 stars and 50 reviews. I want price range to be under $60 per person. 
Assistant: I'd be delighted to help you find the perfect Korean BBQ spot!

Considering your requirements, I'd recommend Q Korean Steakhouse. It has an impressive 4.6-star rating and 213 reviews, which is fantastic! The price range is around $30-$40 per person, which fits your bu

In [17]:
!pip install gradio --quiet

import gradio as gr

def recommend_wrapper(query, chat_history=[]):
    reply = recommend(query)
    chat_history = chat_history + [(query, reply)]
    return chat_history

grid = gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="teal"))

with grid:
    gr.Markdown(
        "# 🍽️ Atlanta Restaurant Recommender"
        "\n---"
        "\nAsk our friendly waiter for the perfect spot around town!"
    )

    chatbot = gr.Chatbot(label="Chat")

    with gr.Row():
        txt = gr.Textbox(
            placeholder="Type your message here and press enter...",
            show_label=False,
            lines=1
        )
        send = gr.Button("Send")

    txt.submit(lambda msg, hist: ("", recommend_wrapper(msg, hist)), [txt, chatbot], [txt, chatbot])
    send.click(lambda msg, hist: ("", recommend_wrapper(msg, hist)), [txt, chatbot], [txt, chatbot])

    gr.Button("Clear Chat").click(lambda: [], None, chatbot)

grid.launch()

  chatbot = gr.Chatbot(label="Chat")


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://9d8ab406e35ad27085.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


