MIS 285N Cognitive Computing<br>
Final Project<br>
Jerry Che - Jose Guerrero - Riley Moynihan - Noah Placke - Sarah Teng - Palmer Wenzel

# Ingredients Generation Model

Following techniques from:
- https://towardsdatascience.com/generative-adversarial-networks-in-python-73d3972823d3
- https://www.maskaravivek.com/post/gan-synthetic-data-generation/

#### Read data from CSV.

In [None]:
import pandas as pd
# pd.options.display.max_columns = 500


df = pd.read_csv('../data/kaggle/processed/recipes_processed_sml.csv')#.sample(frac=0.1, random_state=42)

df.head(3)

#### Drop unnecessary columns.

In [None]:
df = df.drop(['name', 'steps'], axis=1)

df.head()

#### Train and test split.

In [None]:
from sklearn.model_selection import train_test_split


X_train, X_test = train_test_split(df, test_size=0.125, random_state=0)

print(X_train.shape)
print(X_test.shape)

## Model Definition

#### Build functions and model definitions.

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Dense, LeakyReLU, Dropout, Input


# Model configs
noise_dim = 100
dim = 256
data_dim = df.shape[1]


def build_generator():
    generator = Sequential()
    
    generator.add(Dense(dim, input_dim=noise_dim))
    generator.add(Dense(dim, activation='relu'))
    generator.add(Dense(dim * 2, activation='relu'))
    generator.add(Dense(dim * 4, activation='relu'))
    generator.add(Dense(data_dim, activation='tanh'))
    
    generator.compile(loss='binary_crossentropy', optimizer='adam')
    
    return generator


def build_discriminator():
    discriminator = Sequential()
    
    discriminator.add(Dense(dim * 4, input_dim=data_dim))
    discriminator.add(Dropout(0.1))
    discriminator.add(Dense(dim * 2, activation='relu'))
    discriminator.add(Dropout(0.1))
    discriminator.add(Dense(dim, activation='relu'))
    discriminator.add(Dense(1, activation='sigmoid'))
    
    discriminator.compile(loss='binary_crossentropy', optimizer='adam')
    
    return discriminator


def build_gan(generator, discriminator):
    # Only train generator in combined model
    discriminator.trainable=False
    
    gan_input = Input(shape=(noise_dim,))
    x = generator(gan_input)
    gan_output = discriminator(x)
    
    # Create the GAN model
    gan = Model(inputs=gan_input, outputs=gan_output)
                      
    gan.compile(loss='binary_crossentropy', optimizer='adam')
    
    return gan


generator = build_generator()
discriminator = build_discriminator()
gan = build_gan(generator, discriminator)

gan.summary()

#### Create function to display generator output.

In [None]:
import numpy as np


def display_recipe(epoch, generator, examples=1):
    # Create noise
    noise = np.random.normal(0, 1, size=[examples, noise_dim])
    
    # Generate recipes
    generated_recipes = generator.predict(noise)
    
    # Get used ingredients
    ingredients = []
    for i in range(generated_recipes.shape[0]):
        for j in range(len(generated_recipes[i])):
            if j >= 0.5:
                ingredients.append(df.columns[j])
    
    # Display
    print("*** Generated Recipe ***")
    print(f"# of ingredients: {len(ingredients)}")
    print(f"First 5 ingredients: {ingredients[:5]}")

#### Training logic.

In [None]:
from tqdm import tqdm


def training(X_train, X_test, epochs=1, batch_size=32, sample_interval=10):
    # Get batch count
    batch_count = X_train.shape[0] / batch_size
    
    # Build GAN
    generator = build_generator()
    discriminator = build_discriminator()
    gan = build_gan(generator, discriminator)
    
    # Training step
    for e in range(1, epochs + 1):
        # for _ in tqdm(range(batch_size)):
            
        # Random noise as an input to initialize the generator
        noise = np.random.normal(0, 1, [batch_size, noise_dim])

        # Use the GAN to generate "fake" recipes
        generated_recipes = generator.predict(noise)

        # Get a sample of real recipes from data
        # real_recipes = X_train.loc[np.random.randint(low=0, high=X_train.shape[0], size=batch_size)]
        real_recipes = X_train.sample(batch_size)

        # Mix the real and fake data
        X = np.concatenate([real_recipes, generated_recipes])

        # Create labels for real and fake data
        y_dis = np.zeros(2 * batch_size)  # fake
        y_dis[:batch_size] = 1.0          # real

        # Train the discriminator while generator is fixed
        discriminator.trainable = True
        d_loss = discriminator.train_on_batch(X, y_dis)

        # Fix the images generated by the generator as real 
        noise = np.random.normal(0, 1, [batch_size, noise_dim])
        y_gen = np.ones(batch_size)

        # Train the generator (to have the discriminator label samples as valid)
        discriminator.trainable = False
        g_loss = gan.train_on_batch(noise, y_gen)

        # Output loss
        print(f"E{e} [D Loss: {d_loss:.4f}] [G loss: {g_loss:.4f}]")
            
        # Display created recipes at a given epoch interval
        if e % sample_interval == 8:
            # Display recipe
            display_recipe(e, generator)
    
    return generator, discriminator, gan


generator, discriminator, gan = training(X_train, X_test, epochs=256, batch_size=8)