In [1]:
# Connect to your google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
pip install streamlit unsloth sentence-transformers pyngrok beautifulsoup4 selenium streamlit-extras

Collecting streamlit
  Downloading streamlit-1.48.0-py3-none-any.whl.metadata (9.5 kB)
Collecting unsloth
  Downloading unsloth-2025.8.4-py3-none-any.whl.metadata (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.4/47.4 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting pyngrok
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Collecting selenium
  Downloading selenium-4.34.2-py3-none-any.whl.metadata (7.5 kB)
Collecting streamlit-extras
  Downloading streamlit_extras-0.7.5-py3-none-any.whl.metadata (4.2 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting unsloth_zoo>=2025.8.3 (from unsloth)
  Downloading unsloth_zoo-2025.8.3

In [3]:
!apt-get update -y
!apt-get install -y chromium-browser chromium-chromedriver

Get:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Get:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:7 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease [18.1 kB]
Get:8 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,923 kB]
Get:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease [24.3 kB]
Get:10 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Hit:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:12 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy/main amd64 Packages [32.9 kB]
Get:13 https://ppa.la

In [18]:
%%writefile streamlit_app.py

import streamlit as st
from streamlit_extras.let_it_rain import rain

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import plotly.express as px
import plotly.graph_objects as go

import os
import time
import random
import re
import requests
import sys
import pathlib
import datetime
from pathlib import Path

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import torch
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template
from sentence_transformers import SentenceTransformer, util
from transformers import TextStreamer
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel

#--------------------------LOAD IN ALL MODELS----------------------------
@st.cache_resource
def load_model():
    save_path = "/content/drive/MyDrive/Colab Notebooks/totallymakescents/llama-model/"
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=save_path,
        max_seq_length=2048,
        dtype=None,
        load_in_4bit=True,
    )
    tokenizer = get_chat_template(tokenizer, chat_template="llama-3.1")
    FastLanguageModel.for_inference(model)
    return model, tokenizer

@st.cache_resource
def load_tag_model():
    quantized_model_path = '/content/drive/MyDrive/Colab Notebooks/totallymakescents/perfume_mistral_cpt_fine_tune_adapters/'

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True
    )

    tag_model = AutoModelForCausalLM.from_pretrained(
        quantized_model_path,
        device_map="auto",
        quantization_config=bnb_config
    )
    tag_tokenizer = AutoTokenizer.from_pretrained(quantized_model_path)

    return tag_model, tag_tokenizer

@st.cache_resource
def load_sbert():
    return SentenceTransformer("all-MiniLM-L6-v2")

@st.cache_resource
def load_embeddings():
    return torch.load("/content/drive/MyDrive/Colab Notebooks/totallymakescents/perfume_embeddings.pt")

@st.cache_resource
def load_dataframe():
    return pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/totallymakescents/data/combined_df_classify_reviews.parquet")

@st.cache_resource
def load_notes_accords():
    return pd.read_csv(f'/content/drive/MyDrive/Colab Notebooks/totallymakescents/data/notes.csv')


#--------------------------SPINNERS FOR MODEL LOADING----------------------------
try:
    # st.write("🔧 Loading LLM explanation model...")
    model, tokenizer = load_model()
    # st.success("✅ Explanation model loaded!")
except Exception as e:
    st.error(f"❌ Error loading model: {e}")
    st.stop()

try:
    # st.write("🔧 Loading LLM tag generation model...")
    tag_model, tag_tokenizer = load_tag_model()
    # st.success("✅ Tag model loaded!")
except Exception as e:
    st.error(f"❌ Error loading model: {e}")
    st.stop()

try:
    # st.write("📔 Loading SBERT...")
    sbert_model = load_sbert()
    # st.success("✅ SBERT loaded!")
except Exception as e:
    st.error(f"❌ Error loading SBERT: {e}")
    st.stop()
try:
    # st.write("💐 Loading perfume embeddings...")
    perfume_embeddings = load_embeddings()
    # st.success("✅ Embeddings loaded!")
except Exception as e:
    st.error(f"❌ Error loading embeddings: {e}")
    st.stop()

try:
    # st.write("📄 Loading data...")
    combined_df_classify_reviews = load_dataframe()
    # st.success("✅ Data loaded!")
except Exception as e:
    st.error(f"❌ Error loading data: {e}")
    st.stop()

try:
    # st.write("📄 Loading data...")
    df_notes_accords = load_notes_accords()
    # st.success("✅ Data loaded!")
except Exception as e:
    st.error(f"❌ Error loading data: {e}")
    st.stop()

#--------------------------FUNCTIONS---------------------------------------------
def generate_tags(prompt, max_new_tokens=128):
    tag_model.eval()
    input_text = f"### Instruction:\n{prompt}\n\n### Response:\n"
    inputs = tag_tokenizer(input_text, return_tensors="pt").to(tag_model.device)

    with torch.no_grad():
        output = tag_model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            top_p=0.9,
            temperature=0.7,
            repetition_penalty=1.2,
            pad_token_id=tag_tokenizer.eos_token_id
        )

    decoded = tag_tokenizer.decode(output[0], skip_special_tokens=True)
    cleaned = decoded.replace("<|im_start|>", "").replace("<|im_end|>", "").strip()
    cleaned = re.sub(r"\b(nassistant\n|assistant\n|user\n):?", "", cleaned, flags=re.IGNORECASE)

    if "### Response:" in cleaned:
        cleaned = cleaned.split("### Response:")[-1].strip()

    return cleaned

#input format for LLM
def format_for_explanation_with_tags(user_query, perfume_row, tags):
    short_desc = (
        f"Top Notes: {perfume_row['Top']}. "
        f"Middle Notes: {perfume_row['Middle']}. "
        f"Base Notes: {perfume_row['Base']}. "
        f"Main Accords: {', '.join([str(perfume_row.get(f'mainaccord{i}', '')) for i in range(1, 6)])}."
    )
    return {
        "role": "user",
        "content": (
            f"User query: {user_query}\n"
            f"Perfume returned: {perfume_row['Perfume']} by {perfume_row['Brand']}\n"
            f"Notes: {short_desc}\n"
            f"Tags: {tags}\n"
            f"Please explain why this perfume fits the request, using both the tags and the notes."
        )
    }


#--------------------------scraping---------------------------------------------
#--------------------------scraping---------------------------------------------
# st.cache_data(show_spinner=False)
def scrape_perfume(website):
    # Visit specific perfume website and obtain html code

    # CHROME SCRAPING OPTIONS
    opts = Options()
    opts.add_argument("--headless")
    opts.add_argument("--no-sandbox")
    opts.add_argument("--disable-dev-shm-usage")
    opts.add_argument("--disable-gpu")
    opts.add_argument("--remote-debugging-port=9222")
    opts.add_argument("--window-size=1920,1080")
    # Mild “stealth”:
    opts.add_argument("--disable-blink-features=AutomationControlled")
    opts.add_experimental_option("excludeSwitches", ["enable-automation"])
    opts.add_experimental_option('useAutomationExtension', False)
    opts.add_argument("--user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126 Safari/537.36")
    # In some Colab images, the binary path is needed explicitly:
    opts.binary_location = "/usr/bin/chromium-browser"  # or "/usr/bin/chromium"
    driver = webdriver.Chrome(options=opts)

    try:
        driver.get(website)
        WebDriverWait(driver, 40).until(
            lambda d: d.execute_script("return document.readyState") == "complete"
        )

        perfume_soup = BeautifulSoup(driver.page_source, "html.parser")
        notes_are_categorized = perfume_soup.find("h2", string="Perfume Pyramid")
    finally:
        driver.quit()

    # Extract info from html

    pattern = r'([\d.]+)%'  # re pattern for searching

    accord_data = []

    # Rating (out of 5)
    try:
        rating = float(perfume_soup.find('span',itemprop='ratingValue').text)
        num_votes = int(perfume_soup.find('span',itemprop='ratingCount').text)
    except Exception:
        rating = 0.0
        num_votes = 0

    # User descriptions (longevity, sillage, gender, price)
    try:
        ratings = np.array([int(x.text) for x in perfume_soup.find_all('span',class_="vote-button-legend")[11:30]])
        longevity = (ratings[:5]/sum(ratings[:5])) if sum(ratings[:5])!=0 else np.zeros(5)
        sillage = (ratings[5:9]/sum(ratings[5:9])) if sum(ratings[5:9])!=0 else np.zeros(4)
        fem_masc = (ratings[9:14]/sum(ratings[9:14])) if sum(ratings[9:14])!=0 else np.zeros(5)
        price = (ratings[14:19]/sum(ratings[14:19])) if sum(ratings[14:19])!=0 else np.zeros(5)
    except Exception:
        longevity = np.zeros(5)
        sillage = np.zeros(4)
        fem_masc =  np.zeros(5)
        price = np.zeros(5)

    # Environment (seasons, day/night
    try:
        environment = perfume_soup.find_all('div',style="width: 100%; height: 0.3rem; border-radius: 0.2rem; background: rgba(204, 224, 239, 0.4);")[8:]
        environment = [str(tag.div) for tag in environment]
        environment = np.array([float(re.search(pattern, tag).group(1)) for tag in environment])
    except Exception:
        environment = np.zeros(6)

    return accord_data,rating,longevity,sillage,fem_masc,price,environment


# -------------------------------------------------------------------------
# Radar Chart for Perfume Stats
# -------------------------------------------------------------------------
def radar_chart(longevity_score,sillage_score,gender_score,price_score,rating_score):

    categories = ["Longevity", "Sillage", "Gender", "Affordability", "Rating"]
    values     = [longevity_score, sillage_score, gender_score, price_score, rating_score]

    # Close loop for plotting
    cats_loop = categories + categories[:1]
    vals_loop = values     + values[:1]

    # Build the Plotly radar chart
    fig = go.Figure()

    # Add concentric pentagons: 20%, 40%, 60%, 80%, 100%
    for r, col, w in zip([20,40,60,80,100],
                        ["lightgray"]*4 + ["black"],
                        [1]*4 + [2]):
        fig.add_trace(go.Scatterpolar(
            r    = [r]*len(cats_loop),
            theta= cats_loop,
            mode = "lines",
            line = dict(color=col, width=w),
            hoverinfo="none"
        ))

    # Add inner hover text
    hover_text = [f"{cat}: {val}%" for cat,val in zip(categories, values)]

    fig.add_trace(go.Scatterpolar(
        r        = vals_loop,
        theta    = cats_loop,
        mode     = "markers+lines",
        line     = dict(color="orange"),
        marker   = dict(size=8, color="orange"),
        hoverinfo= "text",
        fill      = "toself",
        hovertext= hover_text + [""]          # blank for the closing point
    ))
    fig.add_trace(go.Scatterpolar(
        r        = values,
        theta    = categories,
        mode     = "markers",
        marker   = dict(size=40, color="rgba(0,0,0,0)"),
        hoverinfo= "text",
        hovertext= hover_text,
        showlegend=False
    ))
    # Add outer hover text
    fig.add_trace(go.Scatterpolar(
        r        = [100]*len(categories),
        theta    = categories,
        mode     = "markers",
        marker   = dict(size=20, color="rgba(0,0,0,0)"),
        hoverinfo= "text",
        hovertext= ['0=short-lasting to 100=long-lasting',
                    '0=intimate radius to 100=large radius',
                    '0=feminine to 100=masculine',
                    '0=overpriced to 100=great value',
                    '0=poor ratings to 100=excellent ratings'],
        showlegend=False
    ))
    # Tidy layout
    fig.update_layout(
        polar = dict(
            radialaxis   = dict(visible=False, range=[0,100]),
            angularaxis  = dict(rotation=-90, direction="clockwise")
        ),
        showlegend=False,
        margin=dict(l=20,r=20,t=20,b=20)
    )
    # Display
    st.plotly_chart(fig, use_container_width=True)#,width=400)

# -------------------------------------------------------------------------
# Environment Pie Charts
# -------------------------------------------------------------------------
def environment_chart(environment):
    if st.context.theme.type == 'dark':
        sys_bg_color = '#0e1117'
        sys_fr_color = '#FFFFFF'
    else:
        sys_bg_color = '#FFFFFF'
        sys_fr_color = '#0e1117'
    # day_colors = ["#FFC300","#345492"]  # gold yellow, indigo blue
    season_colors = {
    "Winter": "#20A2D5",  # deep blue
    "Spring": "#229F56",  # forest green
    "Summer": "#E1BB23",  # warm gold
    "Fall":   "#BB392B"   # autumn brown
    }
    season_labels = ["Winter", "Spring", "Summer", "Fall"]

    # Create axes
    fig, ax = plt.subplots(figsize=(2, 1),facecolor=sys_bg_color)
    ax.set_facecolor(sys_bg_color)

    # Data for day/night slider
    day_pct, night_pct = environment[4], environment[5]
    ratio = night_pct / (day_pct + night_pct)
    # Gradient colormap from gold to blue
    cmap = LinearSegmentedColormap.from_list('daynight', ['#FFC300', "#0B4CD0"])
    # Horizontal gradient image
    gradient = np.linspace(0, 1, 256)
    gradient = np.vstack((gradient, gradient))
    ax.imshow(gradient, aspect='auto', cmap=cmap, extent=[0, 100, 0, 1])
    # Draw the slider handle
    ax.axvline(ratio * 100, color=sys_fr_color, linewidth=3)
    # Add labels
    ax.text(0, 1.5, 'Day', color=sys_fr_color, ha='left', va='center', fontsize=8)
    ax.text(100, 1.5, 'Night', color=sys_fr_color, ha='right', va='center', fontsize=8)
    # Clean up axes
    ax.set_xlim(0, 100)
    ax.set_ylim(0, 1)
    ax.axis('off')

    plt.tight_layout()
    st.pyplot(fig, use_container_width=True)

    # Tree Map for Seasonality
    df = pd.DataFrame({
    "Season": season_labels,
    "Value":  environment[:4]
    })
    fig = px.treemap(
    df,
    path=["Season"],
    values="Value",
    color="Season",
    color_discrete_map=season_colors
    )
    fig.update_traces(
        textinfo="label+percent entry",
        textfont=dict(color="white", size=24)
    )
    fig.update_layout(
        autosize = False,
        width=380,height=300,
        margin=dict(t=30, l=10, r=10, b=10),
        paper_bgcolor=sys_bg_color,
        plot_bgcolor=sys_bg_color
    )
    st.plotly_chart(fig, use_container_width=True)

# -------------------------------------------------------------------------
# Accord Colorizer and Plot
# -------------------------------------------------------------------------
accord_colors = {
    "honey":     "#FFC300",
    "lemon":     "#FFF44F",
    "citrus":    "#FFD966",
    "orange":    "#FFA500",
    "berry":     "#8E4585",
    "grapefruit":"#FF5F1F",
    "rose":      "#FFC0CB",
    "jasmine":   "#F8F4FF",
    "tuberose":  "#F2E7FE",
    "floral":    "#E8C1D1",
    "white floral":"#F5F5F5",
    "yellow floral":"#FFF8DC",
    "woody":     "#8B5E3C",
    "cedarwood": "#A0522D",
    "patchouli": "#70543E",
    "earthy":    "#6B4226",
    "moss":      "#556B2F",
    "mossy":     "#556B2F",
    "green":     "#228B22",
    "herbal":    "#6B8E23",
    "lavender":  "#967BB6",
    "violet":    "#7F00FF",
    "iris":      "#5A4FCF",
    "powdery":   "#EDE3E0",
    "vanilla":   "#F3E5AB",
    "coconut":   "#FFF5E1",
    "almond":    "#EED5B7",
    "caramel":   "#C68E17",
    "chocolate": "#5C4033",
    "cacao":     "#3B1F1F",
    "coffee":    "#4B3621",
    "tobacco":   "#5D3A1A",
    "smoke":     "#4F4F4F",
    "smoky":     "#4F4F4F",
    "leather":   "#59351F",
    "amber":     "#FFBF00",
    "warm spicy":"#C1440E",
    "spicy":     "#D2691E",
    "cinnamon":  "#A0522D",
    "fresh spicy":"#FF7F50",
    "sweet":     "#FFB6C1",
    "fruity":    "#FF6EB4",
    "tropical":  "#FFA07A",
    "aquatic":   "#00CED1",
    "marine":    "#4682B4",
    "ozonic":    "#E0FFFF",
    "metallic":  "#B0C4DE",
    "mineral":   "#A9A9A9",
    "rubber":    "#2F4F4F",
    "vinyl":     "#708090",
    "plastic":   "#A9A9A9",
    "balsamic":  "#8B4513",
    "oriental":  "#DEB887",
    "gourmand":  "#D2B48C",
    "animalic":  "#8B0000",
    "lactonic":  "#FFF8E7",
    "flowers":    "#E8C1D1",
    "natural and synthetic, popular and weird":  "#B0C4DE",
    "greens, herbs and fougeres":"#228B22",
    "fruits, vegetables and nuts":"#FF6EB4",
    "sweets and gourmand smells":"#D2B48C",
    "woods and mosses":"#556B2F",
    "beverages": "#FFB6C1",
    "spices":    "#D2691E",
    "musk, amber, animalic smells":"#8B0000",
    "citrus smells":"#FFF44F",
    "resins and balsams":"#FFBF00",
    "white flowers":"#F5F5F5",
    "uncategorized":"#E0FFFF",
    }

def get_accord_color(accord):
    try:
        return accord_colors[accord]
    except KeyError:
        return "#888888"
    return "#888888"

def get_text_color(bg_hex):
    bg_hex = bg_hex.lstrip('#')
    r, g, b = int(bg_hex[:2], 16), int(bg_hex[2:4], 16), int(bg_hex[4:6], 16)
    brightness = 0.299*r + 0.587*g + 0.114*b
    return 'black' if brightness > 186 else 'white'

def display_accords(data):
    # input: ['rose','woody',fruity,aromatic,floral]
    accords = data.index.to_list()[:5]
    values = data.values[:5]
    # accords = data
    # data = [(a,60.0) for a in accords]
    # values = [60.0 for a in accords]
    colors = [get_accord_color(a) for a in accords]
    if st.context.theme.type == 'dark':
        sys_bg_color = '#0e1117'
        sys_fr_color = '#FFFFFF'
    else:
        sys_bg_color = '#FFFFFF'
        sys_fr_color = '#0e1117'

    fig, ax = plt.subplots(figsize=(6, 3), facecolor=sys_bg_color)
    ax.set_facecolor(sys_bg_color)

    y_pos = np.arange(len(accords))
    ax.barh(y_pos, values, color=colors, height=1)
    for i in range(min(len(values), 5)):
        accord = accords[i]
        val = values[i]
        bg = colors[i]
        if (val >= values[0] / 2):
            text_color = get_text_color(bg)
            ax.text(val / 2, i, accord, va='center', ha='center', color=text_color, fontsize=10)
        else:
            ax.text((values[0]+val)/ 2, i, accord, va='center', ha='center', color=sys_fr_color, fontsize=10)
    ax.invert_yaxis()
    ax.axis('off')
    plt.tight_layout()
    st.pyplot(fig, use_container_width=True)

def get_img_fragrantica(input_url):
    perfume_id = input_url.split('-')[-1].split('.')[0]
    return f'https://fimgs.net/mdimg/perfume-thumbs/375x500.{perfume_id}.jpg'


#--------------------------HOME PAGE---------------------------------------------
st.set_page_config(
    page_title='TotallyMakeScents',
    layout="wide",                # ← enables full-width mode
    initial_sidebar_state="auto"  # optional
)

col1, col2 = st.columns([2, 4])

with col1:
    # Logo
    # -------------------------------------------------------------------------
    st.image(image = '/content/drive/MyDrive/Colab Notebooks/totallymakescents/app/tms-logo.png',
            width = 320,
            use_container_width = False)

with col2:
    # Title
    # -------------------------------------------------------------------------
    st.header('TotallyMakeScents')

    # Rotating Subheaders
    # -------------------------------------------------------------------------
    subheaders = ['What does it smell like in the rain, at the end of a hiking trail full of blossoms?',
                "What fragrance would a wizard wear in a magical world?",
                "I'm looking for a bittersweet scent for a farewell party.",
                "What perfumes smell like a lake-side restaurant?",
                "What fragrances embody the smell of castle ruins?",
                "What olfactory notes would capture the feeling of an airy apothecary?",
                "Which perfumes best embody a romantic getaway?"]

    # total_duration = num_sentences * per_sentence_duration
    per_sentence_duration = 6  # seconds per sentence
    total_duration = len(subheaders) * per_sentence_duration

    # keyframe percentages for 1s fade-in, 2s hold, 1s fade-out:
    fade_in_pct_end = 100 * (per_sentence_duration - 3) / total_duration  # 1s fade-in
    hold_start = fade_in_pct_end
    hold_end = 100 * (per_sentence_duration - 1) / total_duration         # end of hold
    fade_out_pct_start = hold_end

    css = f"""
    <style>
    .rotator {{
    position: relative;
    height: 2em;
    overflow: hidden;
    }}
    .rotator span {{
    position: absolute;
    width: 100%;
    opacity: 0;
    color: grey;
    text-align: center;
    animation: rotate {total_duration}s ease-in-out infinite;
    }}
    {"".join(
        f".rotator span:nth-child({i+1}) {{ animation-delay: {i * per_sentence_duration}s; }}"
        for i in range(len(subheaders))
    )}
    @keyframes rotate {{
    0%, {fade_out_pct_start}%, 100% {{ opacity: 0; }}
    {(fade_in_pct_end/2):.3f}%, {fade_in_pct_end:.3f}% {{ opacity: 1; }}
    {hold_start:.3f}%, {hold_end:.3f}% {{ opacity: 1; }}
    {hold_end:.3f}%, {fade_out_pct_start:.3f}% {{ opacity: 0; }}
    }}
    </style>
    <div class="rotator">
    {''.join(f'<span>{s}</span>' for s in subheaders)}
    </div>
    """

    st.markdown(css, unsafe_allow_html=True)

    # USER INPUT GOES HERE
    st.markdown('<p style="margin:0 0 4px 0; font-size:1.1em;">Tell us your story, and we will...</p>',
            unsafe_allow_html=True
    )
    user_input = st.text_area(label='',
                            placeholder='Enter your scent inspiration here...',
                            label_visibility='hidden',
                            height=68)
    col3, col4 = st.columns([4, 2])
    with col3:
        top_k = st.slider("Number of Recommendations", min_value=1, max_value=5, value=3)
    with col4:
        generate_recommendations = st.button('MakeScents')

# top_k = st.slider("Number of Recommendations", min_value=1, max_value=5, value=3)


if generate_recommendations:
    if user_input:
        st.markdown(' ')
        make_progress_bar = st.progress(10, text='Making Scents...')
        st.markdown(' ')
        rain(emoji="👃",font_size=54,falling_speed=3,animation_length="2")
        # with st.spinner('Making scents...'):
        st.markdown(' ')
        tags_text = generate_tags(user_input)

        query_embedding = sbert_model.encode(tags_text, convert_to_tensor=True)
        scent_tensor = perfume_embeddings.to(query_embedding.device)

        similarities = util.cos_sim(query_embedding, scent_tensor)[0]

        adjusted_scores = [] # boost scores of positive perfumes
        for idx, score in enumerate(similarities):
            sentiment_boost = 1.2 if combined_df_classify_reviews.loc[idx, 'is_positive'] == 1 else 0.8
            adjusted_scores.append(score.item() * sentiment_boost)

        adjusted_scores = torch.tensor(adjusted_scores)
        top_results = torch.topk(similarities, k=top_k)

        progress_bar_progress = 0.
        for score, idx in zip(top_results.values, top_results.indices):
            progress_bar_progress += 1.
            make_progress_bar.progress(int((progress_bar_progress/float(top_k))*90.+10.), text=f'Making Scent {int(progress_bar_progress)}')
            idx = idx.item() # convert Python tensor to int
            perfume = combined_df_classify_reviews.iloc[idx]
            message = format_for_explanation_with_tags(user_input, perfume, tags_text)

            # Starting scraping
            accord_data, rating, longevity, sillage, fem_masc, price, environment = scrape_perfume(perfume['url'])

            accord_data = [ str(perfume.get(f'mainaccord{i}', '')) for i in range(1, 6) ]
            # e.g. ['rose', 'woody', fruity, aromatic, floral]

            potd_all_notes = perfume['Top'] + ', ' + perfume['Middle'] + ', ' + perfume['Base']
            potd_all_notes_series = pd.Series(potd_all_notes.split(', '), name='note')
            df_potd_notes_accords = pd.merge(left=potd_all_notes_series, right=df_notes_accords[['note_group', 'note']], how='left', on='note')
            potd_accords = df_potd_notes_accords['note_group'].value_counts()

            # Compute scores
            longevity_score = (100*np.dot(longevity,range(5))/5).round(2)
            sillage_score   = (100*np.dot(sillage,range(4))/4).round(2)
            gender_score    = (100*np.dot(fem_masc,range(5))/5).round(2)
            price_score     = (100*np.dot(price,range(5))/5).round(2)
            rating_score    = round(100.0*rating/5,2)

            inputs = tokenizer.apply_chat_template(
                [message],
                tokenize=True,
                add_generation_prompt=True,
                return_tensors="pt",
            ).to("cuda")

            text_streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

            # Add scrape here
            col1, col2 = st.columns([1,10])
            with col1:
                img = get_img_fragrantica(perfume['url'])
                st.image(image = img,
                        width = 100,
                        use_container_width = False)
            with col2:
                # Name, Brand, Link
                st.header(':material/fragrance: {name} by {brand}'.format(name=perfume['Perfume'],brand=perfume['Brand']))
                st.link_button(
                    'Find more details and Reviews at Fragrantica.com :material/arrow_outward: ',
                    f"{perfume['url']}",
                )

            col1, col2, col3 = st.columns([2, 2, 2])

            # Radar Chart
            with col1:
                radar_chart(longevity_score, sillage_score, gender_score, price_score, rating_score)

            # Accords and Notes
            with col2:
                st.subheader('𓏊 Notes')
                st.write(f":material/clock_loader_10: Top: {perfume['Top']}")
                st.write(f":material/clock_loader_40: Middle: {perfume['Middle']}")
                st.write(f":material/clock_loader_90: Base: {perfume['Base']}")
                st.subheader(':material/ent: Accords')
                display_accords(potd_accords)

            # Environment Information
            with col3:
                environment_chart(environment)

            # LLM Explanation
            with st.spinner('Making sense...'):
                output_llm = model.generate(
                      input_ids=inputs,
                      max_new_tokens=256,
                      use_cache=True,
                      temperature=1.5,
                      min_p=0.1,
                      streamer=text_streamer
                  )

                full_output_llm = tokenizer.decode(output_llm[0], skip_special_tokens=True)

                assistant_prefix = "assistant\n"
                if assistant_prefix in full_output_llm:
                    llm_explanation = full_output_llm.split(assistant_prefix, 1)[-1].strip()
                else:
                    llm_explanation = full_output_llm.replace(message["content"], "").strip()

                st.markdown("**Explanation:**")
                st.markdown(llm_explanation)
        make_progress_bar.progress(100, text='We Made the Scents for You')

        time.sleep(1)
        make_progress_bar.empty()
    else:
        st.warning('Please enter a prompt.')



Overwriting streamlit_app.py


In [19]:
from google.colab import userdata
from pyngrok import conf, ngrok
ngrok.kill()  # reset tunnels

ngrok_token = userdata.get('ngrok_KEY') # needs key from ngrok

conf.get_default().auth_token = ngrok_token

public_url = ngrok.connect(addr=8501, proto="http")
print("Visit the app in the first link, not the local link:\n", public_url)

!streamlit run streamlit_app.py --server.enableCORS false --server.enableXsrfProtection false --server.port 8501 &

Visit the app in the first link, not the local link:
 NgrokTunnel: "https://04c7ba8b234e.ngrok-free.app" -> "http://localhost:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.138.121.180:8501[0m
[0m
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
2025-08-10 09:44:04.178262: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754819044.232885   33180 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754819044.249526   33180 cuda_blas.cc:1407] Unable to reg