In [1]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, Model
import time
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import keras as K
from tensorflow import keras
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical

In [2]:

import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [3]:
#looking into data and its columns
import pandas as pd

# Replace 'your_file_path.csv' with the actual path to your CSV file
file_path = 'datasets/source/kaggle_food.csv'

# Read the CSV file into a Pandas DataFrame
df = pd.read_csv(file_path)

# Display the first few rows of the DataFrame to check the data
print(df.head())


   Unnamed: 0                                              Title  \
0           0  Miso-Butter Roast Chicken With Acorn Squash Pa...   
1           1                    Crispy Salt and Pepper Potatoes   
2           2                        Thanksgiving Mac and Cheese   
3           3                 Italian Sausage and Bread Stuffing   
4           4                                       Newton's Law   

                                         Ingredients  \
0  ['1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher...   
1  ['2 large egg whites', '1 pound new potatoes (...   
2  ['1 cup evaporated milk', '1 cup whole milk', ...   
3  ['1 (¾- to 1-pound) round Italian loaf, cut in...   
4  ['1 teaspoon dark brown sugar', '1 teaspoon ho...   

                                        Instructions  \
0  Pat chicken dry with paper towels, season all ...   
1  Preheat oven to 400°F and line a rimmed baking...   
2  Place a rack in middle of oven; preheat to 400...   
3  Preheat oven to 350°F with 

In [4]:
print(df.columns)

Index(['Unnamed: 0', 'Title', 'Ingredients', 'Instructions', 'Image_Name',
       'Cleaned_Ingredients'],
      dtype='object')


In [34]:
#here we will only be using 'Title' and 'Ingredients' for our purpose
#so we will be dropping the remaining columns
df = df.drop(columns=['Unnamed: 0', 'Ingredients', 'Image_Name',
       'Cleaned_Ingredients'])

In [35]:
print(df.columns)

Index(['Title', 'Instructions'], dtype='object')


In [36]:
#here we are creating a fake labels for temporary test
#1/3 part of the labels to 'appetizers'. another 1/3 part to 'dinner' and the last 1/3 part to 'desserts'

total_size = len(df)
category_size = total_size // 3

df.loc[:category_size - 1, 'Title'] = 'Appetizers'
df.loc[category_size:2*category_size - 1, 'Title'] = 'Dinner'
df.loc[2*category_size:total_size - 1, 'Title'] = 'Desserts'

In [37]:
print(df.head())

        Title                                       Instructions
0  Appetizers  Pat chicken dry with paper towels, season all ...
1  Appetizers  Preheat oven to 400°F and line a rimmed baking...
2  Appetizers  Place a rack in middle of oven; preheat to 400...
3  Appetizers  Preheat oven to 350°F with rack in middle. Gen...
4  Appetizers  Stir together brown sugar and hot water in a c...


In [38]:
#shuffling the dataframe
df = df.sample(frac=1, random_state=42)

# Assuming df['Instructions'] is your text data
df['Instructions'].fillna('', inplace=True)  # Replace NaN values with an empty string


In [39]:
print(df.head())

            Title                                       Instructions
13335    Desserts  Put oven rack in middle position and preheat o...
4385   Appetizers  In a medium bowl, whisk together dry mix ingre...
1175   Appetizers  Prepare a grill for medium-high heat. Brush ea...
6557       Dinner  Combine short ribs, onion, prunes, and garlic ...
11439    Desserts  In blender, purée olive oil, garlic, shallot, ...


In [46]:
#defining the parameters
num_classes = 1
embedding_dim = 100

# Assuming df['Instructions'] is your text data, we  tokenize the input dataset into tokens for the CNN model
#Tokenizer Initialization and Fitting:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['Instructions'])

#Vocabulary Size and Maximum Sequence Length Calculation:
vocab_size = len(tokenizer.word_index) + 1
max_sequence_length = max(df['Instructions'].apply(lambda x: len(x.split())))

#Texts to Sequences:
sequences = tokenizer.texts_to_sequences(df['Instructions'])

#Padding Sequences
data = pad_sequences(sequences, maxlen=max_sequence_length)


In [47]:
# Assuming 'text' is your input data and 'label' is your target variable
X = data   #data
y = df['Title'].values           #labels 

# Convert labels to numerical format using LabelEncoder
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [48]:
# Define the model
model = Sequential()

# Embedding layer: Converts words into dense vectors of fixed size
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_sequence_length))

# Convolutional layer with max pooling
model.add(Conv1D(filters=128, kernel_size=5, activation='relu'))
model.add(GlobalMaxPooling1D())

# Fully connected layers for classification
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=num_classes, activation='softmax'))


In [49]:

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [43]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 2587, 100)         1327100   
                                                                 
 conv1d_3 (Conv1D)           (None, 2583, 128)         64128     
                                                                 
 global_max_pooling1d_3 (Gl  (None, 128)               0         
 obalMaxPooling1D)                                               
                                                                 
 dense_6 (Dense)             (None, 64)                8256      
                                                                 
 dense_7 (Dense)             (None, 3)                 195       
                                                                 
Total params: 1399679 (5.34 MB)
Trainable params: 1399679 (5.34 MB)
Non-trainable params: 0 (0.00 Byte)
________________

In [50]:
# Train the model and display the activations after each epoch
start_time = time.time()
history = model.fit(X_train, y_train, epochs=1, validation_data=(X_test, y_test))
end_time = time.time()

  return dispatch_target(*args, **kwargs)




In [51]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('Test accuracy:', test_acc)
print('Time elapsed: ', end_time - start_time)

85/85 - 3s - loss: 0.0000e+00 - accuracy: 0.3221 - 3s/epoch - 39ms/step
Test accuracy: 0.32210293412208557
Time elapsed:  48.25538396835327
