In [1]:
# -*- coding: utf-8 -*-
# Build scores_full.csv from Smart Grocery List Optimizer datasets
# Requires: pandas, scikit-learn, tqdm

import re
from pathlib import Path
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# =========================
# CONFIG
# =========================
BASE_DIR = r"C:\Users\sagni\Downloads\Smart Grocery List Optimizer"
RAW_RECIPES = str(Path(BASE_DIR) / "archive" / "RAW_recipes.csv")
SCORES_OUT = str(Path(BASE_DIR) / "scores_full.csv")

# Your "job description" or target meal plan (edit as needed)
TARGET_TEXT = (
    "Looking for healthy weekly meals with rice, chicken, vegetables, "
    "lentils, and some Indian spices. Prefer easy cooking recipes."
)

# =========================
# Helpers
# =========================
def clean_text(text: str) -> str:
    if pd.isna(text):
        return ""
    text = str(text).lower()
    text = re.sub(r"\s+", " ", text)
    return text.strip()

# =========================
# Load dataset
# =========================
print("[INFO] Loading recipes...")
df = pd.read_csv(RAW_RECIPES)

# Keep useful columns
df = df[["id","name","ingredients","steps"]].copy()

# Combine text fields
df["text_raw"] = (
    df["name"].astype(str) + " "
    + df["ingredients"].astype(str) + " "
    + df["steps"].astype(str)
)

df["text"] = df["text_raw"].map(clean_text)

# =========================
# Compute TF-IDF scores
# =========================
print("[INFO] Computing similarity scores...")
docs = df["text"].tolist() + [clean_text(TARGET_TEXT)]
vec = TfidfVectorizer(max_features=50000, ngram_range=(1,2), stop_words="english")
X = vec.fit_transform(docs)

X_recipes, X_target = X[:-1], X[-1]
scores = cosine_similarity(X_recipes, X_target).ravel()

df["score"] = scores

# =========================
# Save scores_full.csv
# =========================
out = df[["id","name","score"]].copy()
out.to_csv(SCORES_OUT, index=False, encoding="utf-8")
print(f"[OK] Saved {len(out)} recipe scores → {SCORES_OUT}")
print(out.head())


[INFO] Loading recipes...
[INFO] Computing similarity scores...
[OK] Saved 231637 recipe scores → C:\Users\sagni\Downloads\Smart Grocery List Optimizer\scores_full.csv
       id                                        name  score
0  137739  arriba   baked winter squash mexican style    0.0
1   31490            a bit different  breakfast pizza    0.0
2  112140                   all in the kitchen  chili    0.0
3   59389                          alouette  potatoes    0.0
4   44061          amish  tomato ketchup  for canning    0.0
