In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.metrics import classification_report, accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

In [3]:
df = pd.read_csv("categorized_large_dataset.csv")

In [4]:
df_processed = df.copy()

In [5]:
# Additional features you might consider
df_processed['price_range'] = pd.qcut(df_processed['Restaurant_categories_food_item_price'], 4, labels=['budget', 'moderate', 'premium', 'luxury'])

# Location-based features
df_processed['location_encoded'] = df_processed['Restaurant_location'].astype('category').cat.codes

# One-hot encode categorical features
df_processed = pd.get_dummies(df_processed, columns=['price_range'])

df_processed = pd.get_dummies(df_processed, columns=['Craving_Category']) 

In [6]:
df_processed.head()

Unnamed: 0,Restaurant_name,Restaurant_url,Restaurant_location,Restaurant_minimum_order,Restaurant_additional_service_charge,Restaurant_additional_VAT,Restaurant_categories_name,Restaurant_categories_food_item_ingredient,Restaurant_categories_food_item_price,Cluster,...,price_range_premium,price_range_luxury,Craving_Category_Cheesy,Craving_Category_Comforting,Craving_Category_Heart,Craving_Category_Light,Craving_Category_Salty,Craving_Category_Spicy,Craving_Category_Uncategorized,Craving_Category_Veggie
0,Fire And Ice Pizzeria,https://foodmandu.com/Restaurant/Details/269,Thamel,500,10.0,Soup,Soup,Cream of pumpkin soup with focaccia bread.,445.0,2,...,False,False,False,True,False,False,False,False,False,False
1,Fire And Ice Pizzeria,https://foodmandu.com/Restaurant/Details/269,Thamel,500,10.0,Soup,Soup,Traditional minestrone soup served with focacc...,460.0,2,...,False,False,False,True,False,False,False,False,False,False
2,Fire And Ice Pizzeria,https://foodmandu.com/Restaurant/Details/269,Thamel,500,10.0,Soup,Soup,Traditional Italian cream of tomato soup with ...,445.0,2,...,False,False,False,True,False,False,False,False,False,False
3,Fire And Ice Pizzeria,https://foodmandu.com/Restaurant/Details/269,Thamel,500,10.0,Soup,Organic Salad,"Mixed lettuce with boiled eggs, homemade dress...",380.0,2,...,False,False,False,False,False,True,False,False,False,False
4,Fire And Ice Pizzeria,https://foodmandu.com/Restaurant/Details/269,Thamel,500,10.0,Soup,Organic Salad,"Mixed lettuce with a boiled egg, homemade dres...",585.0,2,...,True,False,False,False,False,True,False,False,False,False


In [7]:
# Generate the feature list correctly
features = [col for col in df_processed.columns if col.startswith('Craving_')] + \
           [col for col in df_processed.columns if col.startswith('price_range_')] + \
           ['location_encoded'] 

# Scale the selected features
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df_processed[features])


In [8]:
print(features)

['Craving_Category_Cheesy', 'Craving_Category_Comforting', 'Craving_Category_Heart', 'Craving_Category_Light', 'Craving_Category_Salty', 'Craving_Category_Spicy', 'Craving_Category_Uncategorized', 'Craving_Category_Veggie', 'price_range_budget', 'price_range_moderate', 'price_range_premium', 'price_range_luxury', 'location_encoded']


In [9]:
label_encoder_category = LabelEncoder()
df_processed['restaurant_category_encoded'] = label_encoder_category.fit_transform(df_processed['Restaurant_categories_name'])

In [10]:
df_processed.head()

Unnamed: 0,Restaurant_name,Restaurant_url,Restaurant_location,Restaurant_minimum_order,Restaurant_additional_service_charge,Restaurant_additional_VAT,Restaurant_categories_name,Restaurant_categories_food_item_ingredient,Restaurant_categories_food_item_price,Cluster,...,price_range_luxury,Craving_Category_Cheesy,Craving_Category_Comforting,Craving_Category_Heart,Craving_Category_Light,Craving_Category_Salty,Craving_Category_Spicy,Craving_Category_Uncategorized,Craving_Category_Veggie,restaurant_category_encoded
0,Fire And Ice Pizzeria,https://foodmandu.com/Restaurant/Details/269,Thamel,500,10.0,Soup,Soup,Cream of pumpkin soup with focaccia bread.,445.0,2,...,False,False,True,False,False,False,False,False,False,53
1,Fire And Ice Pizzeria,https://foodmandu.com/Restaurant/Details/269,Thamel,500,10.0,Soup,Soup,Traditional minestrone soup served with focacc...,460.0,2,...,False,False,True,False,False,False,False,False,False,53
2,Fire And Ice Pizzeria,https://foodmandu.com/Restaurant/Details/269,Thamel,500,10.0,Soup,Soup,Traditional Italian cream of tomato soup with ...,445.0,2,...,False,False,True,False,False,False,False,False,False,53
3,Fire And Ice Pizzeria,https://foodmandu.com/Restaurant/Details/269,Thamel,500,10.0,Soup,Organic Salad,"Mixed lettuce with boiled eggs, homemade dress...",380.0,2,...,False,False,False,False,True,False,False,False,False,36
4,Fire And Ice Pizzeria,https://foodmandu.com/Restaurant/Details/269,Thamel,500,10.0,Soup,Organic Salad,"Mixed lettuce with a boiled egg, homemade dres...",585.0,2,...,False,False,False,False,True,False,False,False,False,36


In [11]:
X = df_processed[features]
y = df_processed['restaurant_category_encoded'] 

In [12]:
X.tail()

Unnamed: 0,Craving_Category_Cheesy,Craving_Category_Comforting,Craving_Category_Heart,Craving_Category_Light,Craving_Category_Salty,Craving_Category_Spicy,Craving_Category_Uncategorized,Craving_Category_Veggie,price_range_budget,price_range_moderate,price_range_premium,price_range_luxury,location_encoded
670,False,False,False,False,False,False,True,False,False,False,True,False,0
671,False,False,False,False,False,False,True,False,False,True,False,False,0
672,False,False,False,False,False,False,True,False,False,False,True,False,0
673,False,False,False,False,False,False,True,False,False,True,False,False,0
674,False,False,False,False,False,False,True,False,False,False,True,False,0


In [13]:
y.head()

0    53
1    53
2    53
3    36
4    36
Name: restaurant_category_encoded, dtype: int64

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [15]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier(n_neighbors=9))
])

In [16]:
param_grid = {
    'knn__n_neighbors': range(1, 21),  # Test k values from 1 to 20
    'knn__weights': ['uniform', 'distance'],  # Try different weight strategies
    'knn__metric': ['euclidean', 'manhattan', 'minkowski']  # Test distance metrics
}

In [17]:
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='accuracy')

In [18]:
grid_search.fit(X_train, y_train)

# Print the best results
print(f"Best Accuracy: {grid_search.best_score_:.2f}")
print(f"Best Parameters: {grid_search.best_params_}")



Best Accuracy: 0.46
Best Parameters: {'knn__metric': 'euclidean', 'knn__n_neighbors': 15, 'knn__weights': 'distance'}


In [19]:
import pickle

In [3]:
with open("../food_recommendation_knn.pkl", 'rb') as model_file:
    knn_model = pickle.load(model_file)


In [26]:
print(X.columns)

Index(['Craving_Category_Cheesy', 'Craving_Category_Comforting',
       'Craving_Category_Heart', 'Craving_Category_Light',
       'Craving_Category_Salty', 'Craving_Category_Spicy',
       'Craving_Category_Uncategorized', 'Craving_Category_Veggie',
       'price_range_budget', 'price_range_moderate', 'price_range_premium',
       'price_range_luxury', 'location_encoded'],
      dtype='object')


In [25]:
# Dummy input
# Dummy input
dummy_input = pd.DataFrame({
    'location_encoded': [2],  # Example location encoding
    'price_range_budget': [0],
    'price_range_moderate': [1],  # Example: moderate pricing
    'price_range_premium': [0],
    'price_range_luxury': [0],
    'Craving_Category_Cheesy': [0],
    'Craving_Category_Comforting': [1],  # Example: craving comforting food
    'Craving_Category_Heart': [0],
    'Craving_Category_Light': [0],
    'Craving_Category_Salty': [1],  # Example: craving salty food
    'Craving_Category_Spicy': [0],
    'Craving_Category_Uncategorized': [0],
    'Craving_Category_Veggie': [0]
})


# Ensure feature alignment
dummy_input = dummy_input.reindex(columns=X.columns, fill_value=0)

# Make prediction
prediction = grid_search.predict(dummy_input)
decoded_label = label_encoder_category.inverse_transform(prediction)
print("Predicted Restaurant Category:", decoded_label[0])

Predicted Restaurant Category: Lockdown Menu


In [22]:

import pickle


with open("food_recommendation_knn.pkl", "wb") as model_file:
    pickle.dump(grid_search, model_file)


with open("label_encoder.pkl", "wb") as encoder_file:
    pickle.dump(label_encoder_category, encoder_file)
