## Import libraries

In [22]:
import pandas as pd
import numpy as np
import joblib
import ast
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from itertools import chain
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error

# ingnore future warnings
import warnings
warnings.filterwarnings('ignore')

## Load data

In [23]:
# Load data
cuisine_df = pd.read_csv('../data/RAW_recipes_cleaned_w.csv')
cuisine_df.head()

Unnamed: 0,id,name,Cuisine_Tags_str,replaced_ingredients_str,steps_str,tags_str,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,Cuisine_Tags,replaced_ingredients
0,137739,arriba baked winter squash mexican style,mexican,winter squash mexican seasoning mixed spice ho...,make a choice and proceed with recipe dependin...,60-minutes-or-less time-to-make course main-in...,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7,['mexican'],"['winter squash', 'mexican seasoning', 'mixed ..."
1,31490,a bit different breakfast pizza,northeastern-united-states,pizza crust sausage egg milk salt and pepper c...,preheat oven to 425 degrees f press dough into...,30-minutes-or-less time-to-make course main-in...,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6,['northeastern-united-states'],"['pizza crust', 'sausage', 'egg', 'milk', 'sal..."
2,44061,amish tomato ketchup for canning,northeastern-united-states,tomato juice apple cider vinegar sugar salt pe...,"mix all ingredients& boil for 2 1 / 2 hours , ...",weeknight time-to-make course main-ingredient ...,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,['northeastern-united-states'],"['tomato juice', 'apple cider vinegar', 'sugar..."
3,25274,aww marinated olives,canadian,fennel seed green olive ripe olive garlic pepp...,toast the fennel seeds and lightly crush them ...,15-minutes-or-less time-to-make course main-in...,15,21730,2002-04-14,"['15-minutes-or-less', 'time-to-make', 'course...","[380.7, 53.0, 7.0, 24.0, 6.0, 24.0, 6.0]",4,['toast the fennel seeds and lightly crush the...,my italian mil was thoroughly impressed by my ...,"['fennel seed', 'green olive', 'ripe olive', '...",9,['canadian'],"['fennel seed', 'green olive', 'ripe olive', '..."
4,43026,chile rellenos,southwestern-united-states,egg roll wrap whole green chili cheese cornsta...,drain green chiles sprinkle cornstarch on shee...,60-minutes-or-less time-to-make course main-in...,45,52268,2002-10-14,"['60-minutes-or-less', 'time-to-make', 'course...","[94.0, 10.0, 0.0, 11.0, 11.0, 21.0, 0.0]",9,"['drain green chiles', 'sprinkle cornstarch on...",a favorite from a local restaurant no longer i...,"['egg roll wrap', 'whole green chili', 'cheese...",5,['southwestern-united-states'],"['egg roll wrap', 'whole green chili', 'cheese..."


## Data preprocessing

In [24]:
# Convert text data to TF-IDF features
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(cuisine_df['replaced_ingredients_str'])
y = cuisine_df['Cuisine_Tags_str']

In [25]:
# Save the TF-IDF Vectorizer
vectorizer_joblib_file = '../model/tfidf_vectorizer_0529.sav'
joblib.dump(vectorizer, vectorizer_joblib_file)
print(f"TF-IDF vectorizer saved to {vectorizer_joblib_file}")

TF-IDF vectorizer saved to ../model/tfidf_vectorizer_0529.sav


## Model Training

In [26]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [27]:
# LinearSVC Prediction
svc = LinearSVC()
svc.fit(X_train, y_train)

In [28]:
svc.score(X_test, y_test)

0.5566468253968254

In [29]:
# # Hyperparameter tuning and cross-validation but the result is not better than the above model
# # Define parameter grid
# param_grid = {
#     'C': [0.1, 1, 10, 100],
#     'max_iter': [1000, 5000, 10000, 20000]
# }

# # Perform hyperparameter tuning using GridSearchCV
# grid_search = GridSearchCV(LinearSVC(), param_grid, cv=5)
# grid_search.fit(X_train, y_train)

# # Output best parameters
# print("Best parameters: ", grid_search.best_params_)

# # Use the best model to make predictions
# best_model = grid_search.best_estimator_
# print("Test accuracy: ", best_model.score(X_test, y_test))

# # Evaluate the model using cross-validation
# cv_scores = cross_val_score(best_model, X, y, cv=5)
# print("Cross-validation scores: ", cv_scores)
# print("Mean cross-validation score: ", np.mean(cv_scores))

In [30]:
y_pred = svc.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.5566468253968254
Classification Report:
                            precision    recall  f1-score   support

                australian       0.52      0.50      0.51       452
                  austrian       0.00      0.00      0.00        26
                   belgian       0.00      0.00      0.00        19
                 brazilian       0.41      0.14      0.21        49
                  canadian       0.37      0.35      0.36       810
                   chilean       0.50      0.06      0.10        18
                   chinese       0.61      0.82      0.70       330
                 colombian       0.50      0.11      0.18         9
                     cuban       0.43      0.13      0.20        46
                     czech       0.00      0.00      0.00        18
                    danish       0.20      0.05      0.08        43
                     dutch       0.33      0.06      0.11        48
                  egyptian       0.36      0.16      0.22      

## Test the model

In [31]:
# Prediction function
def cuisine_recommender(ingredients, model):
    store = []
    store.append(ingredients)
    ingredient_vector = vectorizer.transform(store)
    return model.predict(ingredient_vector)

# Test the recommendation system
print(cuisine_recommender('apple cheese grapes sausage', svc))
print(cuisine_recommender('cumin chicken wheat salt', svc))

['italian']
['moroccan']


## Save the model

In [32]:
model_joblib_file = '../model/classification_model_SVC_0530.sav'
joblib.dump(svc, model_joblib_file)

['../model/classification_model_SVC_0530.sav']

In [33]:
loaded_model = joblib.load(model_joblib_file)

y_preds = loaded_model.predict(X_test)
y_preds

array(['mexican', 'italian', 'italian', ..., 'greek',
       'southern-united-states', 'moroccan'], dtype=object)

In [34]:
score = loaded_model.score(X_test, y_test)
print(f'Model Score: {score}')

Model Score: 0.5566468253968254


## Predict the no cuisine dataset

In [35]:
RAW_recipes_with_no_cuisine = pd.read_csv('../data/RAW_recipes_cleaned_wo.csv')
RAW_recipes_with_no_cuisine.head()

Unnamed: 0,id,name,Cuisine_Tags_str,replaced_ingredients_str,steps_str,tags_str,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,Cuisine_Tags,replaced_ingredients
0,112140,all in the kitchen chili,,ground beef yellow onion diced tomato tomato p...,brown ground beef in large pot add chopped oni...,time-to-make course preparation main-dish chil...,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onion', 'diced tomato'...",13,[],"['ground beef', 'yellow onion', 'diced tomato'..."
1,59389,alouette potatoes,,cheese new potato shallot parsley tarragon oli...,place potatoes in a large pot of lightly salte...,60-minutes-or-less time-to-make course main-in...,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11,[],"['cheese', 'new potato', 'shallot', 'parsley',..."
2,5289,apple a day milk shake,,milk vanilla ice cream frozen apple juice conc...,combine ingredients in blender cover and blend...,15-minutes-or-less time-to-make course main-in...,0,1533,1999-12-06,"['15-minutes-or-less', 'time-to-make', 'course...","[160.2, 10.0, 55.0, 3.0, 9.0, 20.0, 7.0]",4,"['combine ingredients in blender', 'cover and ...",,"['milk', 'vanilla ice cream', 'frozen apple ju...",4,[],"['milk', 'vanilla ice cream', 'frozen apple ju..."
3,67888,backyard style barbecued ribs,,pork sparerib soy sauce fresh garlic fresh gin...,in a medium saucepan combine all the ingredien...,weeknight time-to-make course main-ingredient ...,120,10404,2003-07-30,"['weeknight', 'time-to-make', 'course', 'main-...","[1109.5, 83.0, 378.0, 275.0, 96.0, 86.0, 36.0]",10,['in a medium saucepan combine all the ingredi...,this recipe is posted by request and was origi...,"['pork sparerib', 'soy sauce', 'fresh garlic',...",22,[],"['pork sparerib', 'soy sauce', 'fresh garlic',..."
4,70971,bananas 4 ice cream pie,,chocolate sandwich style cookie chocolate syru...,"crumble cookies into a 9-inch pie plate , or c...",weeknight time-to-make course main-ingredient ...,180,102353,2003-09-10,"['weeknight', 'time-to-make', 'course', 'main-...","[4270.8, 254.0, 1306.0, 111.0, 127.0, 431.0, 2...",8,"['crumble cookies into a 9-inch pie plate , or...",,"['chocolate sandwich style cookie', 'chocolate...",6,[],"['chocolate sandwich style cookie', 'chocolate..."


In [36]:
# Import the vectorizer
vectorizer = joblib.load(vectorizer_joblib_file)

# Convert text data to TF-IDF features
X = vectorizer.transform(RAW_recipes_with_no_cuisine['replaced_ingredients_str'])

# Predict the cuisine
predictions = loaded_model.predict(X)

# Add the cuisine predictions to the dataframe in the [] format
RAW_recipes_with_no_cuisine['Cuisine_Tags_str'] = predictions
RAW_recipes_with_no_cuisine['Cuisine_Tags'] = predictions

# put the cuisine tags in the list format
RAW_recipes_with_no_cuisine['Cuisine_Tags'] = RAW_recipes_with_no_cuisine['Cuisine_Tags'].apply(lambda x: [x])

In [37]:
RAW_recipes_with_no_cuisine.head()

Unnamed: 0,id,name,Cuisine_Tags_str,replaced_ingredients_str,steps_str,tags_str,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,Cuisine_Tags,replaced_ingredients
0,112140,all in the kitchen chili,mexican,ground beef yellow onion diced tomato tomato p...,brown ground beef in large pot add chopped oni...,time-to-make course preparation main-dish chil...,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onion', 'diced tomato'...",13,[mexican],"['ground beef', 'yellow onion', 'diced tomato'..."
1,59389,alouette potatoes,french,cheese new potato shallot parsley tarragon oli...,place potatoes in a large pot of lightly salte...,60-minutes-or-less time-to-make course main-in...,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11,[french],"['cheese', 'new potato', 'shallot', 'parsley',..."
2,5289,apple a day milk shake,canadian,milk vanilla ice cream frozen apple juice conc...,combine ingredients in blender cover and blend...,15-minutes-or-less time-to-make course main-in...,0,1533,1999-12-06,"['15-minutes-or-less', 'time-to-make', 'course...","[160.2, 10.0, 55.0, 3.0, 9.0, 20.0, 7.0]",4,"['combine ingredients in blender', 'cover and ...",,"['milk', 'vanilla ice cream', 'frozen apple ju...",4,[canadian],"['milk', 'vanilla ice cream', 'frozen apple ju..."
3,67888,backyard style barbecued ribs,mexican,pork sparerib soy sauce fresh garlic fresh gin...,in a medium saucepan combine all the ingredien...,weeknight time-to-make course main-ingredient ...,120,10404,2003-07-30,"['weeknight', 'time-to-make', 'course', 'main-...","[1109.5, 83.0, 378.0, 275.0, 96.0, 86.0, 36.0]",10,['in a medium saucepan combine all the ingredi...,this recipe is posted by request and was origi...,"['pork sparerib', 'soy sauce', 'fresh garlic',...",22,[mexican],"['pork sparerib', 'soy sauce', 'fresh garlic',..."
4,70971,bananas 4 ice cream pie,southern-united-states,chocolate sandwich style cookie chocolate syru...,"crumble cookies into a 9-inch pie plate , or c...",weeknight time-to-make course main-ingredient ...,180,102353,2003-09-10,"['weeknight', 'time-to-make', 'course', 'main-...","[4270.8, 254.0, 1306.0, 111.0, 127.0, 431.0, 2...",8,"['crumble cookies into a 9-inch pie plate , or...",,"['chocolate sandwich style cookie', 'chocolate...",6,[southern-united-states],"['chocolate sandwich style cookie', 'chocolate..."


## Combine two dataset into one

In [38]:
# Load the other dataset
RAW_recipes_with_one_cuisine = pd.read_csv('../data/RAW_recipes_cleaned_w.csv')
RAW_recipes_with_one_cuisine.head()

Unnamed: 0,id,name,Cuisine_Tags_str,replaced_ingredients_str,steps_str,tags_str,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,Cuisine_Tags,replaced_ingredients
0,137739,arriba baked winter squash mexican style,mexican,winter squash mexican seasoning mixed spice ho...,make a choice and proceed with recipe dependin...,60-minutes-or-less time-to-make course main-in...,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7,['mexican'],"['winter squash', 'mexican seasoning', 'mixed ..."
1,31490,a bit different breakfast pizza,northeastern-united-states,pizza crust sausage egg milk salt and pepper c...,preheat oven to 425 degrees f press dough into...,30-minutes-or-less time-to-make course main-in...,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6,['northeastern-united-states'],"['pizza crust', 'sausage', 'egg', 'milk', 'sal..."
2,44061,amish tomato ketchup for canning,northeastern-united-states,tomato juice apple cider vinegar sugar salt pe...,"mix all ingredients& boil for 2 1 / 2 hours , ...",weeknight time-to-make course main-ingredient ...,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,['northeastern-united-states'],"['tomato juice', 'apple cider vinegar', 'sugar..."
3,25274,aww marinated olives,canadian,fennel seed green olive ripe olive garlic pepp...,toast the fennel seeds and lightly crush them ...,15-minutes-or-less time-to-make course main-in...,15,21730,2002-04-14,"['15-minutes-or-less', 'time-to-make', 'course...","[380.7, 53.0, 7.0, 24.0, 6.0, 24.0, 6.0]",4,['toast the fennel seeds and lightly crush the...,my italian mil was thoroughly impressed by my ...,"['fennel seed', 'green olive', 'ripe olive', '...",9,['canadian'],"['fennel seed', 'green olive', 'ripe olive', '..."
4,43026,chile rellenos,southwestern-united-states,egg roll wrap whole green chili cheese cornsta...,drain green chiles sprinkle cornstarch on shee...,60-minutes-or-less time-to-make course main-in...,45,52268,2002-10-14,"['60-minutes-or-less', 'time-to-make', 'course...","[94.0, 10.0, 0.0, 11.0, 11.0, 21.0, 0.0]",9,"['drain green chiles', 'sprinkle cornstarch on...",a favorite from a local restaurant no longer i...,"['egg roll wrap', 'whole green chili', 'cheese...",5,['southwestern-united-states'],"['egg roll wrap', 'whole green chili', 'cheese..."


In [39]:
len(RAW_recipes_with_no_cuisine), len(RAW_recipes_with_one_cuisine)

(181238, 50399)

In [40]:
# Combine the two datasets
RAW_recipes_combined = pd.concat([RAW_recipes_with_one_cuisine, RAW_recipes_with_no_cuisine], axis=0)
RAW_recipes_combined.head()

Unnamed: 0,id,name,Cuisine_Tags_str,replaced_ingredients_str,steps_str,tags_str,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,Cuisine_Tags,replaced_ingredients
0,137739,arriba baked winter squash mexican style,mexican,winter squash mexican seasoning mixed spice ho...,make a choice and proceed with recipe dependin...,60-minutes-or-less time-to-make course main-in...,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7,['mexican'],"['winter squash', 'mexican seasoning', 'mixed ..."
1,31490,a bit different breakfast pizza,northeastern-united-states,pizza crust sausage egg milk salt and pepper c...,preheat oven to 425 degrees f press dough into...,30-minutes-or-less time-to-make course main-in...,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6,['northeastern-united-states'],"['pizza crust', 'sausage', 'egg', 'milk', 'sal..."
2,44061,amish tomato ketchup for canning,northeastern-united-states,tomato juice apple cider vinegar sugar salt pe...,"mix all ingredients& boil for 2 1 / 2 hours , ...",weeknight time-to-make course main-ingredient ...,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,['northeastern-united-states'],"['tomato juice', 'apple cider vinegar', 'sugar..."
3,25274,aww marinated olives,canadian,fennel seed green olive ripe olive garlic pepp...,toast the fennel seeds and lightly crush them ...,15-minutes-or-less time-to-make course main-in...,15,21730,2002-04-14,"['15-minutes-or-less', 'time-to-make', 'course...","[380.7, 53.0, 7.0, 24.0, 6.0, 24.0, 6.0]",4,['toast the fennel seeds and lightly crush the...,my italian mil was thoroughly impressed by my ...,"['fennel seed', 'green olive', 'ripe olive', '...",9,['canadian'],"['fennel seed', 'green olive', 'ripe olive', '..."
4,43026,chile rellenos,southwestern-united-states,egg roll wrap whole green chili cheese cornsta...,drain green chiles sprinkle cornstarch on shee...,60-minutes-or-less time-to-make course main-in...,45,52268,2002-10-14,"['60-minutes-or-less', 'time-to-make', 'course...","[94.0, 10.0, 0.0, 11.0, 11.0, 21.0, 0.0]",9,"['drain green chiles', 'sprinkle cornstarch on...",a favorite from a local restaurant no longer i...,"['egg roll wrap', 'whole green chili', 'cheese...",5,['southwestern-united-states'],"['egg roll wrap', 'whole green chili', 'cheese..."


In [41]:
len(RAW_recipes_combined)

231637

## Save to CSV

In [42]:
# Save the dataframe
RAW_recipes_combined.to_csv('../data/RAW_recipes_cleaned_combined.csv', index=False)