# Predict Cuisine from Ingredients

In [1]:
import csv
import pandas as pd

# importing the dataset
dataset = pd.read_json('../dataset/train.json')
y = dataset.cuisine

# creating an ingredients_list of all ingredients in the dataset
ingredients_list= []
for ingredient in dataset.ingredients:
  for i in ingredient:
    if (i not in ingredients_list):
      ingredients_list.append(i)

# creating a dataframe from the dataset for each recipe 
# where the columns are binary values indicating presence of an ingredient
X = pd.DataFrame(columns=ingredients_list, index = range(0,len(y)))
for col in X.columns:
    X[col].values[:] = 0
for row in range(0, len(dataset.ingredients)):
  for i in dataset.ingredients[row]:
    X.at[row,i] = 1

In [2]:
from sklearn.model_selection import train_test_split

# splitting the dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

## Comparing the Classifiers

### CatBoostClassifier

In [3]:
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score

# initializing the CatBoostClassifier
classifier = CatBoostClassifier(silent = True, max_depth= 7, n_estimators=300)

# training the CatBoostClassifier on the training set
classifier.fit(X_train, y_train)

# making predictions on the test_set
y_pred = classifier.predict(X_test)
catboost_accuracy = accuracy_score(y_test, y_pred)*100

### Linear SVC

In [4]:
from sklearn import svm

# initializing the LinearSVC
lin_svc_classifier = svm.LinearSVC()

# training the CatBoostClassifier on the training set
lin_svc_classifier.fit(X_train, y_train)

# making predictions on the test_set
y_pred=lin_svc_classifier.predict(X_test)
lin_svc_classifier_accuracy = accuracy_score(y_test,y_pred)*100

### Decision Tree Classifier

In [5]:
from sklearn.tree import DecisionTreeClassifier

tree_classifier = DecisionTreeClassifier()
tree_classifier.fit(X_train, y_train)
y_pred = tree_classifier.predict(X_test)
tree_classifier_accuracy = accuracy_score(y_test,y_pred)*100

### Comparison 

In [6]:
print("CatBoost Classifier Accuracy:", catboost_accuracy)
print("SVC Classifer Accuracy:", lin_svc_classifier_accuracy)
print("Decision Tree Classifier Accuracy:", tree_classifier_accuracy)

CatBoost Classifier Accuracy: 72.49597747385359
SVC Classifer Accuracy: 77.11182622687048
Decision Tree Classifier Accuracy: 61.48431214802896


As is evident, Linear SVC Classifer has a better accuracy than the other models.

# Predicting the Cuisine from Recipe

In [7]:
# recipe for parsena
parsena="3 boneless skinless chicken breasts (about 1 ¼ lbs), cut in half lengthwise, Kosher salt, Freshly ground black pepper, 1/2 all purpose flour, 2 large eggs, 1/2 tbsp. garlic powder, 1/4 c. canola or peanut Oil, for frying, 5 tbsp. unsalted butter, cut in tablespoons, 1/2 c. dry white wine, 1/2 c. low-sodium chicken stock, 1/2 lemon, zest and juice, 1/2 lemon, sliced, Chopped fresh parsley, for serving"

In [8]:
# function for transforming the input recipe 
# returns a dataframe where the columns are binary values indicating presence of an ingredient
def transform_input(x):
    test = pd.DataFrame(columns=ingredients_list, index = [0])    
    for col in test.columns:
        test[col].values[:] = 0
        if (col in x):
            test[col].values[:] = 1            
    return test

In [9]:
# function for predicting the cuisine from input 'x' using the classifier 'model' 
# returns a cuisine category
def predict_cuisine(x, model):
    test = transform_input(x)
    y_pred = model.predict(test)[0]
    return y_pred

In [10]:
# using the classifier_pipeline for predicting the cuisine
predict_cuisine(parsena, lin_svc_classifier)

'italian'