In [117]:
import json
import numpy as np
import pandas as pd

In [118]:
photo_datalist = []
with open('../data/photos.json', 'r') as file:
    for line in file:
        data = json.loads(line)
        photo_datalist.append(data)

restaurant_datalist = []
with open('../data/yelp_academic_dataset_business.json', 'r') as file:
    for line in file:
        data = json.loads(line)
        restaurant_datalist.append(data)
        


In [119]:
# clean photos
# create dataframe
picture_df = pd.DataFrame(photo_datalist)
restaurant_df = pd.DataFrame(restaurant_datalist)
michelin_df = pd.read_csv('../data/michelin.csv')
print(restaurant_df['attributes'])

0                             {'ByAppointmentOnly': 'True'}
1                    {'BusinessAcceptsCreditCards': 'True'}
2         {'BikeParking': 'True', 'BusinessAcceptsCredit...
3         {'RestaurantsDelivery': 'False', 'OutdoorSeati...
4         {'BusinessAcceptsCreditCards': 'True', 'Wheelc...
                                ...                        
150341    {'ByAppointmentOnly': 'False', 'RestaurantsPri...
150342    {'BusinessAcceptsCreditCards': 'True', 'Restau...
150343    {'RestaurantsPriceRange2': '1', 'BusinessAccep...
150344    {'BusinessParking': '{'garage': False, 'street...
150345    {'WheelchairAccessible': 'True', 'BusinessAcce...
Name: attributes, Length: 150346, dtype: object


In [120]:
# clean michelin
michelin_df = michelin_df.drop(columns=['PhoneNumber', 'Url', 'WebsiteUrl', 'Award', 'GreenStar', 'Description', 'Location'])
# encode prices based on number of characters
michelin_df["Price"] = michelin_df["Price"].apply(lambda x: str(len(str(x))))
facilities_and_services = michelin_df["FacilitiesAndServices"].apply(lambda x: x.split(",") if isinstance(x, str) else [])
michelin_df["attributes"] = facilities_and_services.apply(lambda x: {item.strip(): True for item in x})
# Process cuisine properly - use apply with a function that has access to row index
for idx, row in michelin_df.iterrows():
    cuisine_list = row["Cuisine"].split(",") if isinstance(row["Cuisine"], str) else []
    cuisine_clean = [c.strip() for c in cuisine_list]
    price = row["Price"]
    # Update attributes for this specific row
    michelin_df.at[idx, "attributes"] = {**michelin_df.at[idx, "attributes"], 
                                         "Cuisine": cuisine_clean,
                                         "RestaurantsPriceRange2": price.strip()}
michelin_df['is_michelin'] = True
restaurant_df['is_michelin'] = False
# generate random high ratings for michelin restaurants
michelin_df['stars'] = 0
# generate business_ids for michelin
michelin_df['business_id'] = np.arange(len(michelin_df))

In [121]:
# align michelin_df and restaurant_df
restaurant_df = restaurant_df.drop(columns=['hours', 'review_count', 'is_open'])
michelin_df = michelin_df.drop(columns=['FacilitiesAndServices', 'Cuisine', 'Price'])
michelin_df = michelin_df.rename(columns={
    "Name": "name",
    "Address": "address",
    "Longitude": "longitude",
    "Latitude": "latitude",
})
# combine address, city, state, postal_code fields into one column
restaurant_df["address"] = restaurant_df["address"] + ", " + restaurant_df["city"] + ", " + restaurant_df["state"] + " " + restaurant_df["postal_code"]
restaurant_df = restaurant_df.drop(columns=['city', 'state', 'postal_code'])

In [122]:
print("\nSample categories in original dataset:")
print(restaurant_df['categories'].unique())
non_food_terms = '|'.join(['Salon', 'Barber', 'Gym', 'Spa', 'Theater', 'Nightlife', 'Beauty', 'Barbershop', "Active Life",
    "Automotive",
    "Beauty & Spas",
    "Home Services",
    "Health & Medical",
    "Hotels & Travel",
    "Local Services",
    "Professional Services",
    "Public Services & Government",
    "Real Estate",
    "Religious Organizations",
    "Shopping & Retail",
    "Transportation",
    "Arts & Entertainment",
    "Event Planning & Services",
    "Education",
    "Financial Services",
    "Nightlife",
    "Pets & Animal Services",
    "Sports & Recreation",
    "Miscellaneous Services",
    "Shopping", "Women's Clothing", "Fashion"
])

# Filter out non-food related businesses
restaurant_df = restaurant_df[~restaurant_df['categories'].str.contains(non_food_terms, case=False, na=False)]
picture_df = picture_df[picture_df['label'] != 'inside']
picture_df = picture_df[picture_df['label'] != 'outside']
picture_df = picture_df[picture_df['label'] != 'menu']
# Keep only photos that belong to the filtered restaurants
picture_df = picture_df[picture_df['business_id'].isin(restaurant_df['business_id'])]

# Print statistics
print("Number of food establishments after filtering:", len(restaurant_df))
print("Number of photos for food establishments after filtering:", len(picture_df))
print("\nSample categories in filtered dataset:")
print(restaurant_df['categories'].unique())

restaurant_df = restaurant_df.drop(columns=["categories"])



Sample categories in original dataset:
['Doctors, Traditional Chinese Medicine, Naturopathic/Holistic, Acupuncture, Health & Medical, Nutritionists'
 'Shipping Centers, Local Services, Notaries, Mailbox Centers, Printing Services'
 'Department Stores, Shopping, Fashion, Home & Garden, Electronics, Furniture Stores'
 ...
 'Shopping, Jewelry, Piercing, Toy Stores, Beauty & Spas, Accessories, Fashion'
 'Fitness/Exercise Equipment, Eyewear & Opticians, Shopping, Sporting Goods, Bikes'
 'Beauty & Spas, Permanent Makeup, Piercing, Tattoo']
Number of food establishments after filtering: 51036
Number of photos for food establishments after filtering: 81941

Sample categories in filtered dataset:
['Restaurants, Food, Bubble Tea, Coffee & Tea, Bakeries'
 'Brewpubs, Breweries, Food'
 'Burgers, Fast Food, Sandwiches, Food, Ice Cream & Frozen Yogurt, Restaurants'
 ... 'Restaurants, Sandwiches, Convenience Stores, Coffee & Tea, Food'
 'Cafes, Juice Bars & Smoothies, Coffee & Tea, Restaurants, Food'

In [123]:
# union michelin_df and restaurant_df based on business_id as index
michelin_df = michelin_df.set_index('business_id')
restaurant_df = restaurant_df.set_index('business_id')
combined_df = pd.concat([michelin_df, restaurant_df], axis=0)
combined_df = combined_df.reset_index()
print("Number of restaurants in combined dataset:", len(combined_df))
combined_df.to_csv('../data/combined_restaurants.csv', index=False)

Number of restaurants in combined dataset: 68782


In [124]:
import ast
import json
import itertools
from collections import defaultdict
from typing import List, Dict, Tuple

def _coerce(value):
    """
    Normalise raw values to Python scalars that are easy to encode.
    • 'True'/'False'/'None' → bool / None
    • strings that look like dicts/lists  → parsed via ast.literal_eval
    • lists are returned unchanged (they will be one‑hot encoded later)
    """
    if isinstance(value, str):
        v = value.strip()
        if v.lower() in {"true", "false", "none"}:
            return None if v.lower() == "none" else v.lower() == "true"
        if (v.startswith("{") and v.endswith("}")) or (v.startswith("[") and v.endswith("]")):
            try:
                return ast.literal_eval(v)
            except (SyntaxError, ValueError):
                pass
    return value  # already a scalar/list/dict

def _safe_to_dict(raw):
    """best‑effort convert str → dict, else return {}"""
    if isinstance(raw, str):
        raw = raw.strip()
        # try JSON first
        try:
            return json.loads(raw)
        except Exception:
            pass
        # then Python‑literal (handles single quotes, etc.)
        try:
            return ast.literal_eval(raw)
        except Exception:
            pass
    return {}          # couldn’t parse

def _flatten(attr_dict, prefix=""):
    """
    Same as before, but now:
    • None / NaN / str → quietly skipped or parsed
    """
    # ---- guard clauses ---------------------------------
    if attr_dict is None:
        return {}
    if isinstance(attr_dict, float) and math.isnan(attr_dict):
        return {}
    if not isinstance(attr_dict, dict):
        # try to turn a string into dict, else bail
        attr_dict = _safe_to_dict(attr_dict)
        if not isinstance(attr_dict, dict):
            return {}
    # ---- normal flattening -----------------------------
    flat = {}
    for k, v in attr_dict.items():
        key = f"{prefix}{k}"
        v = _coerce(v)            # uses the existing helper
        if isinstance(v, dict):
            flat.update(_flatten(v, prefix=f"{key}_"))
        else:
            flat[key] = v
    return flat


def build_feature_map(attr_column: List[Dict]) -> Tuple[Dict[str, int], Dict[str, Dict[str, int]]]:
    """
    Scans the whole dataset once to decide:
    • numeric / boolean features  → single dimension
    • categorical features (string/enum) → one dimension per category value
    • list‑type features (e.g. Cuisine) → one dimension per possible item
    Returns:
        num_features   – mapping key → column index
        cat_features   – mapping key → {category → column index}
    """
    num_features, cat_features = {}, {}
    next_idx = 0

    for raw in attr_column:
        flat = _flatten(raw)
        for k, v in flat.items():
            if isinstance(v, (bool, int, float)) or v is None:
                if k not in num_features:
                    num_features[k] = next_idx
                    next_idx += 1
            elif isinstance(v, list):
                for item in v:
                    if k not in cat_features:
                        cat_features[k] = {}
                    if item not in cat_features[k]:
                        cat_features[k][item] = next_idx
                        next_idx += 1
            else:  # string categorical
                if k not in cat_features:
                    cat_features[k] = {}
                if v not in cat_features[k]:
                    cat_features[k][v] = next_idx
                    next_idx += 1
    return num_features, cat_features

def encode(attr_dict: Dict,
           num_features: Dict[str, int],
           cat_features: Dict[str, Dict[str, int]],
           dim: int) -> List[float]:
    """
    Encodes a single restaurant’s attributes into a fixed‑length vector.
    Missing / None values are encoded as 0.
    """
    vec = [0.0] * dim
    flat = _flatten(attr_dict)

    # numeric / boolean
    for k, idx in num_features.items():
        v = flat.get(k)
        if isinstance(v, bool):
            vec[idx] = 1.0 if v else 0.0
        elif v is not None:
            # everything else numeric is cast to float
            try:
                vec[idx] = float(v)
            except (TypeError, ValueError):
                pass  # leave as 0

    # categorical
    for k, cat_map in cat_features.items():
        v = flat.get(k)
        if isinstance(v, list):
            for item in v:
                idx = cat_map.get(item)
                if idx is not None:
                    vec[idx] = 1.0
        else:
            idx = cat_map.get(v)
            if idx is not None:
                vec[idx] = 1.0

    return vec


In [125]:
import numpy as np
import pandas as pd

# 1) Build the master feature map on the full column
attr_series = combined_df["attributes"]

num_map, cat_map = build_feature_map(attr_series)

dim = len(num_map) + sum(len(v) for v in cat_map.values())
print(f"Vector dim = {dim:,}  "
      f"({len(num_map)} numeric/bool  +  {dim-len(num_map)} one‑hot)")

# 2) Pre‑compute a list of feature names in index order --------------------------------
feature_names = [None] * dim
for k, idx in num_map.items():
    feature_names[idx] = k                   # e.g.  'RestaurantsPriceRange2'
for k, cmap in cat_map.items():
    for val, idx in cmap.items():
        feature_names[idx] = f"{k}_{val}"    # e.g.  'Cuisine_Sushi'

# 3) Encode every row ------------------------------------------------------------------
attr_matrix = np.vstack(
    attr_series.apply(lambda d: encode(d, num_map, cat_map, dim))
)

attr_df = pd.DataFrame(attr_matrix, columns=feature_names, dtype=np.float32)
print(f"attr_df shape : {attr_df.shape}")

# 4) Add to combined_df ---------------------------------------------------------------
combined_df = (
    combined_df
      .drop(columns=["attributes"])          # we’ve replaced it with numerics
      .reset_index(drop=True)
      .join(attr_df)                         # ← new columns for Annoy
)

import json, pathlib
pathlib.Path("../data").mkdir(exist_ok=True)
with open("../data/attribute_feature_map.json", "w") as f:
    json.dump({"num": num_map, "cat": cat_map}, f)

print("combined_df final shape:", combined_df.shape)


Vector dim = 418  (96 numeric/bool  +  322 one‑hot)
attr_df shape : (68782, 418)
combined_df final shape: (68782, 425)


In [126]:
# check output based on column datatypes
print(combined_df.dtypes)
print(combined_df.shape)
print(combined_df.columns.to_list())
import re
# Identify boolean columns (including those potentially created from attributes)
bool_cols = combined_df.select_dtypes(include=['bool']).columns
print(f"Boolean columns found: {list(bool_cols)}")
for col in bool_cols:
    combined_df[col] = combined_df[col].astype(int)

# Convert is_michelin specifically if it wasn't bool type already
if 'is_michelin' in combined_df.columns and combined_df['is_michelin'].dtype != 'int':
    # Ensure it's treated as boolean before converting to int
    combined_df['is_michelin'] = combined_df['is_michelin'].astype(bool).astype(int)

obj_bool_cols = [
    "Air conditioning",
    "Interesting wine list",
    "Valet parking",
    "Wheelchair access",
    "Garden or park",
    "Restaurant offering vegetarian menus",
    "Car park",
    "Great view",
    "Terrace",
    "Counter dining",
    "Notable sake list",
    "Shoes must be removed",
    "Cash only",
    "Brunch",
    "Credit cards not accepted",
    "Bring your own bottle",
    "Cash only - lunch",
    "Foreign credit cards not accepted"
]

# Clean column names in obj_bool_cols list to match flattened names
obj_bool_cols = [re.sub(r'[^a-zA-Z0-9_]', '', col.replace(' ', '_')) for col in obj_bool_cols]

# Filter list to columns that actually exist in the dataframe after flattening
obj_bool_cols = [col for col in obj_bool_cols if col in combined_df.columns]

# Check which of these are still object type (meaning initial mapping might have missed some cases)
obj_cols_needing_map = [col for col in obj_bool_cols if combined_df[col].dtype == 'object']

print(f"Object columns to check/map for boolean-like values: {obj_cols_needing_map}")

map_dict = {'True': 1, 'true': 1, '1': 1, True: 1,
            'False': 0, 'false': 0, '0': 0, False: 0,
            'None': 0, 'none': 0, None: 0, 'null': 0, '': 0,
            'yes': 1, 'no': 0}

for col in obj_cols_needing_map:
    # Apply mapping, coerce others to NaN
    combined_df[col] = combined_df[col].map(map_dict)
    # Convert to numeric, errors='coerce' handles original non-mappable values -> NaN
    combined_df[col] = pd.to_numeric(combined_df[col], errors='coerce')

# fill missing values with 0 (redundant if done in function, but safe)
numeric_cols = combined_df.select_dtypes(include=np.number).columns
combined_df[numeric_cols] = combined_df[numeric_cols].fillna(0)

# Ensure final integer type for all boolean-like columns
# Combine original bool cols, obj_bool_cols, and is_michelin
final_bool_cols = list(bool_cols) + obj_bool_cols + ['is_michelin']
# Ensure unique and existing columns
final_bool_cols = list(set([col for col in final_bool_cols if col in combined_df.columns]))

for col in final_bool_cols:
     # Check if column exists and is numeric but not integer
     if col in combined_df.columns and pd.api.types.is_numeric_dtype(combined_df[col]) and not pd.api.types.is_integer_dtype(combined_df[col]):
         # Check if conversion to int is safe (no NaNs, all are whole numbers)
         if combined_df[col].isnull().sum() == 0 and (combined_df[col] % 1 == 0).all():
             combined_df[col] = combined_df[col].astype(int)
         else:
             # Handle cases with NaNs or non-integers if necessary, e.g., fillna then convert
             print(f"Warning: Column {col} could not be safely converted to int. Contains NaNs or non-integers.")
             # Apply fillna(0) and convert, accepting potential data change
             combined_df[col] = combined_df[col].fillna(0).astype(int)

# check output based on column datatypes
print(combined_df.shape)
print("Checking dtypes of boolean-like columns after conversion:")
print(combined_df[final_bool_cols].head())
print(combined_df[final_bool_cols].dtypes)

business_id             object
name                    object
address                 object
longitude              float64
latitude               float64
                        ...   
Smoking_'no'           float32
DietaryRestrictions    float32
AcceptsInsurance       float32
Smoking                float32
Smoking_u'yes'         float32
Length: 425, dtype: object
(68782, 425)
['business_id', 'name', 'address', 'longitude', 'latitude', 'is_michelin', 'stars', 'Air conditioning', 'Interesting wine list', 'Valet parking', 'Wheelchair access', 'Cuisine_Korean', 'Cuisine_Korean Contemporary', 'RestaurantsPriceRange2_4', 'Garden or park', 'Restaurant offering vegetarian menus', 'Cuisine_Creative British', 'Cuisine_French', 'Cuisine_Modern Cuisine', 'Car park', 'Cuisine_Creative', 'Great view', 'Cuisine_Classic French', 'Cuisine_Modern French', 'Cuisine_Modern British', 'Terrace', 'Cuisine_Contemporary', 'Cuisine_Seafood', 'Cuisine_Vegan', 'Cuisine_Innovative', 'Counter dining', 'Notable sa

In [127]:
# New cell for validation
# Check if all pictures link to valid restaurants
# get business_ids of all restaurants in combined_df
all_valid_business_ids = set(combined_df['business_id'])
invalid_pictures = picture_df[~picture_df['business_id'].isin(all_valid_business_ids)]

print("Validation Results:")
print(f"Total pictures: {len(picture_df)}")
print(f"Pictures with valid restaurant links: {len(picture_df) - len(invalid_pictures)}")
print(f"Pictures with invalid restaurant links: {len(invalid_pictures)}")

if len(invalid_pictures) > 0:
    print("\nSample of invalid picture entries:")
    print(invalid_pictures.head())
else:
    print("\nAll pictures are linked to valid restaurants!")

# Optional: Remove any invalid pictures if found
if len(invalid_pictures) > 0:
    picture_df = picture_df[picture_df['business_id'].isin(all_valid_business_ids)]
    print(f"\nCleaned dataset now contains {len(picture_df)} valid pictures")




Validation Results:
Total pictures: 81941
Pictures with valid restaurant links: 81941
Pictures with invalid restaurant links: 0

All pictures are linked to valid restaurants!


In [128]:
# parse photos folder and remove any photos that are not in the cleaned dataset
import os

# Create a set of valid photo IDs
valid_photo_ids = set(picture_df['photo_id'])

# Define the path to the photos folder
photos_folder = '../data/photos'

# Iterate over all files in the photos folder
if os.path.exists(photos_folder):
    for filename in os.listdir(photos_folder):
        # Extract the photo ID from the filename
        photo_id = filename.split('.')[0]
        
        # Check if the photo ID is not in the valid set
        if photo_id not in valid_photo_ids:
            # Construct the full path to the file
            file_path = os.path.join(photos_folder, filename)
            
            # Remove the file
            try:
                os.remove(file_path)
                print(f"Removed invalid photo: {filename}")
            except OSError as e:
                print(f"Error removing file {file_path}: {e}")
                
    # Check the number of photos in the folder
    print(f"Remaining photos in the folder: {len(os.listdir(photos_folder))}")
else:
    print(f"Photos folder not found at: {photos_folder}")

Photos folder not found at: ../data/photos


In [None]:
# save restaurants to json for db import and csv for model training
restaurant_df_model_training = combined_df.copy().drop(columns=['name', 'address', 'Air conditioning', 'Wheelchair access', 'Interesting wine list', 'Valet parking', 'Garden or park', 'Restaurant offering vegetarian menus', 'Car park', 'Great view', 'Terrace', 'Counter dining', 'Notable sake list', 'Shoes must be removed', 'Cash only', 'Brunch', 'Credit cards not accepted', 'Bring your own bottle', 'Cash only - lunch', 'Foreign credit cards not accepted'])
picture_df.to_csv('../data/cleaned_photos.csv', index=False)
# keep name, address, stars, latitude, longitude, is_michelin, business_id, RestaurantsPriceRange2
# Select important columns for database
columns_to_keep = ['name', 'address', 'stars', 'latitude', 'longitude', 'is_michelin', 'business_id']
print(combined_df["RestaurantsPriceRange2"].unique())
# Find the RestaurantsPriceRange2 column (it might be extracted from attributes)
# Ensure the price column name is cleaned if necessary
price_col_name = 'RestaurantsPriceRange2'
if price_col_name not in combined_df.columns:
    # Attempt to find a similarly named column if cleaning changed it
    potential_price_cols = [col for col in combined_df.columns if 'RestaurantsPriceRange' in col]
    if potential_price_cols:
        price_col_name = potential_price_cols[0]
    else:
        print("Warning: RestaurantsPriceRange2 column not found. Price will not be included in DB export.")
        price_col_name = None

if price_col_name and price_col_name in combined_df.columns:
    # convert price_col to string for JSON export if it's numeric
    if pd.api.types.is_numeric_dtype(combined_df[price_col_name]):
         combined_df[price_col_name] = combined_df[price_col_name].astype(str)
    columns_to_keep.append(price_col_name)
else:
    price_col_name = None # Ensure it's None if not added

# Create database version with selected columns
restaurant_df_database = combined_df[columns_to_keep].copy()
picture_df.to_json('../data/cleaned_photos.json', orient='records', lines=True)

print("Columns for model training:", restaurant_df_model_training.columns.to_list())
print("Columns for database:", restaurant_df_database.columns.to_list())
print(restaurant_df_model_training.head())
restaurant_df_database.to_json('../data/cleaned_restaurants.json', orient='records', lines=True)
restaurant_df_model_training.to_pickle('../data/cleaned_restaurants.pkl')

[4. 1. 3. 2. 0.]
Columns for model training: ['business_id', 'longitude', 'latitude', 'is_michelin', 'stars', 'Cuisine_Korean', 'Cuisine_Korean Contemporary', 'RestaurantsPriceRange2_4', 'Cuisine_Creative British', 'Cuisine_French', 'Cuisine_Modern Cuisine', 'Cuisine_Creative', 'Cuisine_Classic French', 'Cuisine_Modern French', 'Cuisine_Modern British', 'Cuisine_Contemporary', 'Cuisine_Seafood', 'Cuisine_Vegan', 'Cuisine_Innovative', 'Cuisine_Japanese', 'Cuisine_Sushi', 'Cuisine_American', 'Cuisine_Noodles', 'RestaurantsPriceRange2_1', 'Cuisine_Naengmyeon', 'Cuisine_Gomtang', 'Cuisine_Dwaeji-gukbap', 'Cuisine_Southern Thai', 'Cuisine_Asturian', 'Cuisine_Traditional Cuisine', 'Cuisine_Italian Contemporary', 'Cuisine_Alpine', 'Cuisine_Mediterranean Cuisine', 'Cuisine_Seasonal Cuisine', 'Cuisine_Country cooking', 'Cuisine_Farm to table', 'Cuisine_French Contemporary', 'Cuisine_Chinese', 'Cuisine_Taizhou', 'Cuisine_Chao Zhou', 'Cuisine_Taiwanese contemporary', 'Cuisine_Singaporean', 'Cuisi

In [130]:
print(restaurant_df_model_training.columns.to_list())

['business_id', 'longitude', 'latitude', 'is_michelin', 'stars', 'Cuisine_Korean', 'Cuisine_Korean Contemporary', 'RestaurantsPriceRange2_4', 'Cuisine_Creative British', 'Cuisine_French', 'Cuisine_Modern Cuisine', 'Cuisine_Creative', 'Cuisine_Classic French', 'Cuisine_Modern French', 'Cuisine_Modern British', 'Cuisine_Contemporary', 'Cuisine_Seafood', 'Cuisine_Vegan', 'Cuisine_Innovative', 'Cuisine_Japanese', 'Cuisine_Sushi', 'Cuisine_American', 'Cuisine_Noodles', 'RestaurantsPriceRange2_1', 'Cuisine_Naengmyeon', 'Cuisine_Gomtang', 'Cuisine_Dwaeji-gukbap', 'Cuisine_Southern Thai', 'Cuisine_Asturian', 'Cuisine_Traditional Cuisine', 'Cuisine_Italian Contemporary', 'Cuisine_Alpine', 'Cuisine_Mediterranean Cuisine', 'Cuisine_Seasonal Cuisine', 'Cuisine_Country cooking', 'Cuisine_Farm to table', 'Cuisine_French Contemporary', 'Cuisine_Chinese', 'Cuisine_Taizhou', 'Cuisine_Chao Zhou', 'Cuisine_Taiwanese contemporary', 'Cuisine_Singaporean', 'Cuisine_Cantonese', 'Cuisine_Californian', 'Cuisin