# Task for Today  

***

## McDonald's Menu Item Type Prediction  

Given *data about McDonald's menu items*, let's try to predict the **type** of a given item.  
  
We will use a TensorFlow ANN with two inputs to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd

import re
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf

In [None]:
data = pd.read_csv('../input/nutrition-facts/menu.csv')

In [None]:
data

In [None]:
data.info()

# Encoding Label Column

In [None]:
data['Category'].unique()

In [None]:
label_encoder = LabelEncoder()

data['Category'] = label_encoder.fit_transform(data['Category'])

label_mappings = dict(enumerate(label_encoder.classes_))
label_mappings

# Encoding Item Column

In [None]:
data

In [None]:
names = data['Item'].copy()

tokenizer = Tokenizer()
tokenizer.fit_on_texts(names)

names = tokenizer.texts_to_sequences(names)

In [None]:
vocab_length = len(tokenizer.word_index) + 1

max_seq_length = np.max(list(map(lambda x: len(x), names)))

print("Vocabulary length:", vocab_length)
print("Max sequence length:", max_seq_length)

In [None]:
names = pad_sequences(names, maxlen=max_seq_length, padding='post')
names

In [None]:
data = data.drop('Item', axis=1)

In [None]:
data

# Cleaning Serving Size Feature

In [None]:
data['Serving Size']

In [None]:
data['Serving Size'].unique()

In [None]:
units = []

def get_grams(serving):
    units.append(0)
    return np.float(re.search(r'(?<=\()[\d]+', serving).group(0))

def get_ml(serving):
    units.append(1)
    return np.float(re.search(r'(?<=\()[\d]+', serving).group(0))

def get_fl_oz(serving):
    units.append(2)
    return np.float(re.search(r'^[\d.]+', serving).group(0))

In [None]:
def get_units(serving):
    if ' g)' in serving:
        return get_grams(serving)
    
    elif ' ml)' in serving:
        return get_ml(serving)
    
    else:
        return get_fl_oz(serving)

In [None]:
data['Serving Size'] = data['Serving Size'].apply(get_units)
data['Serving Units'] = units

In [None]:
def onehot_encode(df, column, prefix):
    df = df.copy()
    dummies = pd.get_dummies(df[column], prefix=prefix)
    df = pd.concat([df, dummies], axis=1)
    df = df.drop(column, axis=1)
    return df

In [None]:
data = onehot_encode(data, 'Serving Units', 'units')

# Splitting/Scaling

In [None]:
data

In [None]:
y = data['Category'].copy()
X = data.drop('Category', axis=1).copy()

In [None]:
scaler = StandardScaler()

X = scaler.fit_transform(X)

In [None]:
names_train, names_test, X_train, X_test, y_train, y_test = train_test_split(names, X, y, train_size=0.7)

# Modeling

In [None]:
num_classes = len(y.unique())
print("Number of classes:", num_classes)

name_feature_length = names.shape[1]
print("Name feature length:", name_feature_length)

other_feature_length = X.shape[1]
print("Other feature length:", other_feature_length)

In [None]:
# Name features
name_input = tf.keras.Input(shape=(name_feature_length,), name="name_input")

name_embedding = tf.keras.layers.Embedding(
    input_dim=vocab_length,
    output_dim=64,
    input_length=name_feature_length,
    name="name_embedding"
)(name_input)

name_flatten = tf.keras.layers.Flatten(name="name_flatten")(name_embedding)


# Other features
other_input = tf.keras.Input(shape=(other_feature_length,), name="other_input")

dense_1 = tf.keras.layers.Dense(64, activation='relu', name="dense_1")(other_input)
dense_2 = tf.keras.layers.Dense(64, activation='relu', name="dense_2")(dense_1)


# Combined
concat = tf.keras.layers.concatenate([name_flatten, dense_2], name="concatenate")

outputs = tf.keras.layers.Dense(num_classes, activation='softmax', name="output_layer")(concat)


# Create model
model = tf.keras.Model(inputs=[name_input, other_input], outputs=outputs)

In [None]:
print(model.summary())
tf.keras.utils.plot_model(model)

# Training

In [None]:
batch_size = 32
epochs = 100

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    [names_train, X_train],
    y_train,
    validation_split=0.2,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=[
        tf.keras.callbacks.ReduceLROnPlateau()
    ]
)

# Results

In [None]:
results = model.evaluate([names_test, X_test], y_test, verbose=0)

print("Model Accuracy:", results[1])

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/YFvFoTZLKlA