Ingredients and cuisine dataset from https://www.kaggle.com/competitions/whats-cooking-kernels-only/data?select=train.json

In [1]:
import json
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt #plot diagram
import re #regular expression
import nltk #natural language processing
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
!pip install unidecode
import unidecode #change from unicode to ASCII
from tqdm import tqdm
tqdm.pandas()

from sklearn.preprocessing import LabelEncoder #change text to label
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

from tensorflow.keras.preprocessing.sequence import pad_sequences #making all the vectors in same length
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional,Dropout
from tensorflow.keras.preprocessing.text import Tokenizer #text processing
from tensorflow.keras.models import Sequential #sequential data -- linear model
from tensorflow.keras.optimizers import Adam,RMSprop #optimizer
from tensorflow.keras.utils import to_categorical #transfer label to binary matrix
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint #checkpoint

np.random.seed(4)
tf.random.set_seed(4)

[nltk_data] Downloading package wordnet to /root/nltk_data...


Collecting unidecode
  Downloading Unidecode-1.3.7-py3-none-any.whl (235 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.5/235.5 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: unidecode
Successfully installed unidecode-1.3.7


In [2]:
train = pd.read_json("/content/drive/MyDrive/Analytics/GCPLP/Data/train.json")

In [3]:
train.head(20)

Unnamed: 0,id,cuisine,ingredients
0,10259,greek,"[romaine lettuce, black olives, grape tomatoes..."
1,25693,southern_us,"[plain flour, ground pepper, salt, tomatoes, g..."
2,20130,filipino,"[eggs, pepper, salt, mayonaise, cooking oil, g..."
3,22213,indian,"[water, vegetable oil, wheat, salt]"
4,13162,indian,"[black pepper, shallots, cornflour, cayenne pe..."
5,6602,jamaican,"[plain flour, sugar, butter, eggs, fresh ginge..."
6,42779,spanish,"[olive oil, salt, medium shrimp, pepper, garli..."
7,3735,italian,"[sugar, pistachio nuts, white almond bark, flo..."
8,16903,mexican,"[olive oil, purple onion, fresh pineapple, por..."
9,12734,italian,"[chopped tomatoes, fresh basil, garlic, extra-..."


In [4]:
train.shape

(39774, 3)

In [5]:
# check for null columns
train.isnull().sum()

id             0
cuisine        0
ingredients    0
dtype: int64

In [6]:
# Clean up the ingredients text
lemmatizer = WordNetLemmatizer()
def preprocess(ingredients):
    ingredients = " ".join(ingredients)
    ingredients = re.sub("[^a-zA-Z]"," ",ingredients) # remove all non-letters
    ingredients = ingredients.lower() # convert to lowercase
    words = []
    for word in ingredients.split():
        word = re.sub("[0-9]"," ",word) # remove numbers
        word = re.sub((r"\b(oz|ounc|ounce|pound|lb|inch|inches|kg|g|to|cup|tsp)\b"), " ", word) # remove units
        if len(word) <= 2: continue
        word = unidecode.unidecode(word)
        word = lemmatizer.lemmatize(word)
        if len(word) > 0: words.append(word)
    return " ".join(words)

In [7]:
train['x'] = train['ingredients'].progress_apply(preprocess)

100%|██████████| 39774/39774 [00:15<00:00, 2530.99it/s]


In [8]:
train.head(30)

Unnamed: 0,id,cuisine,ingredients,x
0,10259,greek,"[romaine lettuce, black olives, grape tomatoes...",romaine lettuce black olive grape tomato garli...
1,25693,southern_us,"[plain flour, ground pepper, salt, tomatoes, g...",plain flour ground pepper salt tomato ground b...
2,20130,filipino,"[eggs, pepper, salt, mayonaise, cooking oil, g...",egg pepper salt mayonaise cooking oil green ch...
3,22213,indian,"[water, vegetable oil, wheat, salt]",water vegetable oil wheat salt
4,13162,indian,"[black pepper, shallots, cornflour, cayenne pe...",black pepper shallot cornflour cayenne pepper ...
5,6602,jamaican,"[plain flour, sugar, butter, eggs, fresh ginge...",plain flour sugar butter egg fresh ginger root...
6,42779,spanish,"[olive oil, salt, medium shrimp, pepper, garli...",olive oil salt medium shrimp pepper garlic cho...
7,3735,italian,"[sugar, pistachio nuts, white almond bark, flo...",sugar pistachio nut white almond bark flour va...
8,16903,mexican,"[olive oil, purple onion, fresh pineapple, por...",olive oil purple onion fresh pineapple pork po...
9,12734,italian,"[chopped tomatoes, fresh basil, garlic, extra-...",chopped tomato fresh basil garlic extra virgin...


In [9]:
# vectorize
vectorizer = TfidfVectorizer(sublinear_tf=True)

X_train = vectorizer.fit_transform(train['x'].values)
X_train.sort_indices()

label_encoder = LabelEncoder()
Y_train = label_encoder.fit_transform(train['cuisine'].values)

In [10]:
 #split data into train and test (80-20)
from sklearn.model_selection import train_test_split
X, X_val, Y, Y_val = train_test_split(X_train, Y_train, test_size=0.2, random_state=4)

In [11]:
classifier = SVC(kernel='rbf',random_state=0)

In [12]:
%%time
model = OneVsRestClassifier(classifier, n_jobs=4)
model.fit(X, Y)

CPU times: user 7.68 s, sys: 1.64 s, total: 9.32 s
Wall time: 13min 51s


In [13]:
Y_test = model.predict(X_val)
Y_pred = label_encoder.inverse_transform(Y_test)

In [14]:
print("Accuracy Score:", accuracy_score(Y_val, Y_test))

Accuracy Score: 0.8194846008799497


In [15]:
Y_pred[:20]

array(['mexican', 'french', 'moroccan', 'southern_us', 'italian', 'greek',
       'indian', 'chinese', 'french', 'indian', 'italian', 'thai',
       'filipino', 'indian', 'italian', 'british', 'french', 'italian',
       'mexican', 'southern_us'], dtype=object)

In [16]:
# read in the pre-processed data
recipe_df = pd.read_csv('/content/drive/MyDrive/Analytics/GCPLP/Data/cleaned_recipes.csv')

In [17]:
recipe_df

Unnamed: 0,name,steps,ingredients,combined_steps,combined_ingredients,combined_recipe,cleaned_text,cleaned_ingredients
0,Arriba Baked Winter Squash Mexican Style,"['make a choice and proceed with recipe', 'dep...","['winter squash', 'mexican seasoning', 'mixed ...",1) make a choice and proceed with recipe 2) de...,"winter squash, mexican seasoning, mixed spice,...",Arriba Baked Winter Squash Mexican Style Steps...,arriba baked winter squash mexican style winte...,winter squash mexican season mixed spice honey...
1,A Bit Different Breakfast Pizza,"['preheat oven to 425 degrees f', 'press dough...","['prepared pizza crust', 'sausage patty', 'egg...",1) preheat oven to 425 degrees f 2) press doug...,"prepared pizza crust, sausage patty, eggs, mil...",A Bit Different Breakfast Pizza Steps: 1) preh...,bit different breakfast pizza prepared pizza c...,prepared pizza crust sausage patty egg milk sa...
2,All In The Kitchen Chili,"['brown ground beef in large pot', 'add choppe...","['ground beef', 'yellow onions', 'diced tomato...",1) brown ground beef in large pot 2) add chopp...,"ground beef, yellow onions, diced tomatoes, to...",All In The Kitchen Chili Steps: 1) brown groun...,kitchen chili ground beef yellow onion diced t...,ground beef yellow onion diced tomato tomato p...
3,Alouette Potatoes,['place potatoes in a large pot of lightly sal...,"['spreadable cheese with garlic and herbs', 'n...",1) place potatoes in a large pot of lightly sa...,"spreadable cheese with garlic and herbs, new p...",Alouette Potatoes Steps: 1) place potatoes in ...,alouette potatoes spreadable cheese garlic her...,spreadable cheese garlic herb new potato shall...
4,Amish Tomato Ketchup For Canning,['mix all ingredients& boil for 2 1 / 2 hours ...,"['tomato juice', 'apple cider vinegar', 'sugar...",1) mix all ingredients& boil for 2 1 / 2 hours...,"tomato juice, apple cider vinegar, sugar, salt...",Amish Tomato Ketchup For Canning Steps: 1) mix...,amish tomato ketchup canning tomato juice appl...,tomato juice apple cider vinegar sugar salt pe...
...,...,...,...,...,...,...,...,...
231738,Singapore Sling,['Add the first 3 ingredients to a cocktail sh...,"['1 1/3 ounces gin', '2/3 ounce cherry brandy'...",1) Add the first 3 ingredients to a cocktail s...,"1 1/3 ounces gin, 2/3 ounce cherry brandy, 1/2...",Singapore Sling Steps: 1) Add the first 3 ingr...,singapore sling gin cherry brandy fresh squeez...,gin cherry brandy fresh squeeze lemon juice cl...
231739,Spaghetti Bolognese With Tomato Chicken Sauce,"['For tomato sauce:', 'peel tomatoes', 'Make “...","['1.2 kg tomatoes', '1 tsp fine salt', '2 tbsp...",1) For tomato sauce: 2) peel tomatoes 3) Make ...,"1.2 kg tomatoes, 1 tsp fine salt, 2 tbsps suga...",Spaghetti Bolognese With Tomato Chicken Sauce ...,spaghetti bolognese tomato chicken sauce tomat...,tomatoe fine salt tbsps sugar tbsp dry mixed h...
231740,Singapore Black Pepper Crab Sauce,"['Combine oyster sauce, sugar, water, and Keca...","['1/2 cup oyster sauce', '1/2 cup granulated s...","1) Combine oyster sauce, sugar, water, and Kec...","1/2 cup oyster sauce, 1/2 cup granulated sugar...",Singapore Black Pepper Crab Sauce Steps: 1) Co...,singapore black pepper crab sauce oyster sauce...,oyster sauce granulate sugar water ketjap mani...
231741,Crispy Deep-Fried Dried Anchovies (Ikan Bilis),['Rinse the cleaned anchovies 3 times to remov...,['100 g dried anchovies (heads & intestines re...,1) Rinse the cleaned anchovies 3 times to remo...,100 g dried anchovies (heads & intestines remo...,Crispy Deep-Fried Dried Anchovies (Ikan Bilis)...,crispy deep fried dried anchovies ikan bilis d...,dry anchovy head intestine remove ml cooking oil


In [18]:
X_test = vectorizer.transform(recipe_df['cleaned_ingredients'].values)
X_test.sort_indices()

In [19]:
%%time
y_test = model.predict(X_test)
y_pred = label_encoder.inverse_transform(y_test)

CPU times: user 58min 56s, sys: 25.3 s, total: 59min 21s
Wall time: 59min 21s


In [20]:
recipe_df['cuisine'] = y_pred

recipe_df

Unnamed: 0,name,steps,ingredients,combined_steps,combined_ingredients,combined_recipe,cleaned_text,cleaned_ingredients,cuisine
0,Arriba Baked Winter Squash Mexican Style,"['make a choice and proceed with recipe', 'dep...","['winter squash', 'mexican seasoning', 'mixed ...",1) make a choice and proceed with recipe 2) de...,"winter squash, mexican seasoning, mixed spice,...",Arriba Baked Winter Squash Mexican Style Steps...,arriba baked winter squash mexican style winte...,winter squash mexican season mixed spice honey...,mexican
1,A Bit Different Breakfast Pizza,"['preheat oven to 425 degrees f', 'press dough...","['prepared pizza crust', 'sausage patty', 'egg...",1) preheat oven to 425 degrees f 2) press doug...,"prepared pizza crust, sausage patty, eggs, mil...",A Bit Different Breakfast Pizza Steps: 1) preh...,bit different breakfast pizza prepared pizza c...,prepared pizza crust sausage patty egg milk sa...,italian
2,All In The Kitchen Chili,"['brown ground beef in large pot', 'add choppe...","['ground beef', 'yellow onions', 'diced tomato...",1) brown ground beef in large pot 2) add chopp...,"ground beef, yellow onions, diced tomatoes, to...",All In The Kitchen Chili Steps: 1) brown groun...,kitchen chili ground beef yellow onion diced t...,ground beef yellow onion diced tomato tomato p...,mexican
3,Alouette Potatoes,['place potatoes in a large pot of lightly sal...,"['spreadable cheese with garlic and herbs', 'n...",1) place potatoes in a large pot of lightly sa...,"spreadable cheese with garlic and herbs, new p...",Alouette Potatoes Steps: 1) place potatoes in ...,alouette potatoes spreadable cheese garlic her...,spreadable cheese garlic herb new potato shall...,french
4,Amish Tomato Ketchup For Canning,['mix all ingredients& boil for 2 1 / 2 hours ...,"['tomato juice', 'apple cider vinegar', 'sugar...",1) mix all ingredients& boil for 2 1 / 2 hours...,"tomato juice, apple cider vinegar, sugar, salt...",Amish Tomato Ketchup For Canning Steps: 1) mix...,amish tomato ketchup canning tomato juice appl...,tomato juice apple cider vinegar sugar salt pe...,southern_us
...,...,...,...,...,...,...,...,...,...
231738,Singapore Sling,['Add the first 3 ingredients to a cocktail sh...,"['1 1/3 ounces gin', '2/3 ounce cherry brandy'...",1) Add the first 3 ingredients to a cocktail s...,"1 1/3 ounces gin, 2/3 ounce cherry brandy, 1/2...",Singapore Sling Steps: 1) Add the first 3 ingr...,singapore sling gin cherry brandy fresh squeez...,gin cherry brandy fresh squeeze lemon juice cl...,spanish
231739,Spaghetti Bolognese With Tomato Chicken Sauce,"['For tomato sauce:', 'peel tomatoes', 'Make “...","['1.2 kg tomatoes', '1 tsp fine salt', '2 tbsp...",1) For tomato sauce: 2) peel tomatoes 3) Make ...,"1.2 kg tomatoes, 1 tsp fine salt, 2 tbsps suga...",Spaghetti Bolognese With Tomato Chicken Sauce ...,spaghetti bolognese tomato chicken sauce tomat...,tomatoe fine salt tbsps sugar tbsp dry mixed h...,italian
231740,Singapore Black Pepper Crab Sauce,"['Combine oyster sauce, sugar, water, and Keca...","['1/2 cup oyster sauce', '1/2 cup granulated s...","1) Combine oyster sauce, sugar, water, and Kec...","1/2 cup oyster sauce, 1/2 cup granulated sugar...",Singapore Black Pepper Crab Sauce Steps: 1) Co...,singapore black pepper crab sauce oyster sauce...,oyster sauce granulate sugar water ketjap mani...,moroccan
231741,Crispy Deep-Fried Dried Anchovies (Ikan Bilis),['Rinse the cleaned anchovies 3 times to remov...,['100 g dried anchovies (heads & intestines re...,1) Rinse the cleaned anchovies 3 times to remo...,100 g dried anchovies (heads & intestines remo...,Crispy Deep-Fried Dried Anchovies (Ikan Bilis)...,crispy deep fried dried anchovies ikan bilis d...,dry anchovy head intestine remove ml cooking oil,italian


In [21]:
# save processed data to csv file
recipe_df.to_csv('/content/drive/MyDrive/Analytics/GCPLP/Data/cleaned_recipes_cuisine.csv', index=False)