# Modeling a Multi-Class Prediction using TensorFlow LSTM

In [1]:
# imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import time

# standard sklearn imports
from sklearn.datasets import make_classification, make_regression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV

# tensorflow imports for Neural Networks
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout, Flatten, Conv2D, MaxPooling2D, GRU, LSTM, Embedding
from tensorflow.keras.initializers import Constant
from tensorflow.keras.optimizers import Adam

# Import regularizers
from tensorflow.keras.regularizers import l2
# Import Dropout
from tensorflow.keras.layers import Dropout
# Import Early Stopping
from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.keras.utils import to_categorical, plot_model

# CNN imports 
import os
from tensorflow.keras.preprocessing.image import img_to_array, load_img

# GridSearch imports 
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

# RNN imports 
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator




# imports for reports on classification
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, accuracy_score

plt.style.use(style='seaborn')
%matplotlib inline

In [2]:
df = pd.read_csv('../../Data/reviews_stemmed.csv')
df.head()

Unnamed: 0,business_id,name,review_id,review_stars,text,amb_casual,amb_classy,amb_target,text_length,clean_text,clean_text_length,clean_text_stem,clean_text_stem_length
0,0lCiLKpjrinltPFbBby4sw,The Great Wall Restaurant,wve8w6gIuPpCfo5J--AHjg,3,"The menu sounded promising, with over fifty di...",0.0,0.0,0,121,menu sounded promising fifty different dishes ...,68,menu sound promis fifti differ dish differ sty...,66
1,0lCiLKpjrinltPFbBby4sw,The Great Wall Restaurant,5rFuHGGbimVxPHxgM0sNSA,3,This wasn't the worst Chinese food but it wasn...,0.0,0.0,0,78,wasn' worst chinese food wasn' best egg foo yo...,41,worst chines food best egg foo young dri overc...,39
2,0lCiLKpjrinltPFbBby4sw,The Great Wall Restaurant,2iD3Rdbw0DUzjZSqBq3hXQ,1,I have been coming to this restaurant for over...,0.0,0.0,0,52,coming restaurant 20 years purchased shrimp fr...,27,come restaur 20 year purchas shrimp fri rice g...,26
3,0lCiLKpjrinltPFbBby4sw,The Great Wall Restaurant,e61y5ZlNwg04mAGtcD3vbQ,5,My husband and I love this place.\nGreat price...,0.0,0.0,0,23,husband love place great price lot food make s...,13,husband love place great price lot food make s...,12
4,kZFTi8FKjs30EuzurZ3v3g,Donerick's Pub,38lN2ONaypsfBDLwhGxcSg,5,Great place for beverages with your friends wh...,0.0,0.0,0,61,great place beverages friends watch game lots ...,43,great place beverag friend watch game lot tv g...,43


## 1) Modeling with stemmed text

In [36]:
# Sets up X and y
X = df['clean_text_stem']
y = df['amb_target']

In [37]:
# One-hot encoding for the categorical y response variable 
y = to_categorical(y)

In [38]:
# Splits the data into training and test sets from sample
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    stratify=y, 
                                                    random_state=42)

In [39]:
X_train

25923    creativ menu food wonder trendi atmospher love...
40820    chose restaur watch osu rose bowl januari 1 ti...
30535    realli good fish rib fish similar like queen t...
72374    realli huge pizza fan place close peopl want e...
25922    fist taco least dozen time last night suppos f...
                               ...                        
45302    area use yelp review decid pizza want tri plac...
45214    food great portion size definit fill much way ...
73125    happen visit columbu first time also vegetaria...
77558    restaur great patio summer awesom select beer ...
16196    usual like restaur never ever issu one 5th gra...
Name: clean_text_stem, Length: 64641, dtype: object

### Basic NLP

In [40]:
# import the Counter function
from collections import Counter

# import the tokenizer from keras preprocessing 
from tensorflow.keras.preprocessing.text import Tokenizer

In [41]:
# Creates a function that counts unique words
def counter_word(text):
    count = Counter()
    for doc in text.values:
        for word in doc.split():
            count[word] += 1
    return count

In [42]:
X_train

25923    creativ menu food wonder trendi atmospher love...
40820    chose restaur watch osu rose bowl januari 1 ti...
30535    realli good fish rib fish similar like queen t...
72374    realli huge pizza fan place close peopl want e...
25922    fist taco least dozen time last night suppos f...
                               ...                        
45302    area use yelp review decid pizza want tri plac...
45214    food great portion size definit fill much way ...
73125    happen visit columbu first time also vegetaria...
77558    restaur great patio summer awesom select beer ...
16196    usual like restaur never ever issu one 5th gra...
Name: clean_text_stem, Length: 64641, dtype: object

In [43]:
X_train.values[0]

'creativ menu food wonder trendi atmospher love goat chees blackberri flatbread also like meatloaf sand crafti burger'

In [44]:
# Counts the number of times a unique word appears
counter = counter_word(X_train)

In [45]:
# Finds the length or the number of unique words
len(counter)

31542

In [46]:
counter

Counter({'creativ': 399,
         'menu': 12498,
         'food': 48834,
         'wonder': 3447,
         'trendi': 352,
         'atmospher': 5255,
         'love': 16777,
         'goat': 511,
         'chees': 9939,
         'blackberri': 88,
         'flatbread': 316,
         'also': 14929,
         'like': 26610,
         'meatloaf': 304,
         'sand': 95,
         'crafti': 74,
         'burger': 6150,
         'chose': 780,
         'restaur': 16122,
         'watch': 1998,
         'osu': 387,
         'rose': 99,
         'bowl': 3546,
         'januari': 68,
         '1': 3957,
         'time': 27708,
         'big': 4318,
         'game': 2847,
         'never': 9022,
         'busi': 6764,
         'unfortun': 1252,
         'situat': 464,
         'flavor': 9552,
         'price': 10496,
         'fair': 780,
         'ryan': 46,
         'pleasant': 1085,
         'appear': 747,
         'could': 8515,
         'use': 5126,
         'coupl': 2794,
         'staff': 9

#### Define max number of words in a sequence 
* Setting this max number is important because we need to define a maximum sequence length that we can set to a number we pick 
* Note: Depending on the text, it is better to set this number high
* (ex: Tweet - it is better to set this number to a high number between 50-70) 
* (ex: bigger text - you can set it to 200 or more) 
* In our trial, we're just going to start off small with 50 words

The reason we need to define the sequence length is because when we use it with Tensorflow, we're going to need the same number of words/sequence length for each sequence. 

We won't be able to have sequences of different lengths. We need to map them to the same sequence size. 

In [66]:
num_words = len(counter)

# Max number of words in a sequence
max_length = 50

#### Use the Tokenizer Class

The next thing we need to use is the Tokenizer class from keras to tokenize the train sentences

In [48]:
# import the tokenizer from keras preprocessing 
from tensorflow.keras.preprocessing.text import Tokenizer

In [49]:
# Fit the tokenizer onto the train sentences 
tokenizer = Tokenizer(num_words=num_words)
tokenizer.fit_on_texts(X_train)

In [50]:
# Pulls the word index from the tokenizer 
word_index = tokenizer.word_index

# This is an attribute of the tokenizer
# The attribute is a dictionary where
# key = actual word 
# value = the number that will now represent that word

In [51]:
word_index

{'food': 1,
 'good': 2,
 'place': 3,
 'order': 4,
 'great': 5,
 'time': 6,
 'like': 7,
 'get': 8,
 'go': 9,
 'servic': 10,
 'one': 11,
 'back': 12,
 'tri': 13,
 'would': 14,
 'realli': 15,
 'love': 16,
 'chicken': 17,
 'restaur': 18,
 'also': 19,
 'got': 20,
 'come': 21,
 'pizza': 22,
 'wait': 23,
 'even': 24,
 'menu': 25,
 'drink': 26,
 'nice': 27,
 'delici': 28,
 'us': 29,
 'well': 30,
 'best': 31,
 'alway': 32,
 'sauc': 33,
 'make': 34,
 'fri': 35,
 'eat': 36,
 'want': 37,
 'bar': 38,
 'tabl': 39,
 'price': 40,
 'littl': 41,
 'look': 42,
 'tast': 43,
 'chees': 44,
 'first': 45,
 'came': 46,
 'definit': 47,
 'staff': 48,
 'flavor': 49,
 'friendli': 50,
 'salad': 51,
 'meal': 52,
 'pretti': 53,
 'never': 54,
 'columbu': 55,
 'much': 56,
 'ask': 57,
 'went': 58,
 'could': 59,
 'amaz': 60,
 'experi': 61,
 'locat': 62,
 'peopl': 63,
 'sandwich': 64,
 'made': 65,
 'thing': 66,
 'take': 67,
 'fresh': 68,
 'recommend': 69,
 'server': 70,
 'side': 71,
 'say': 72,
 'night': 73,
 'minut': 74,


In [52]:
# Creates the sequences from our tokenizer, based on the indices from the word_index
train_sequences = tokenizer.texts_to_sequences(X_train)

In [53]:
X_train.values[0]

'creativ menu food wonder trendi atmospher love goat chees blackberri flatbread also like meatloaf sand crafti burger'

In [54]:
# Checks the first observation of train_sequences
train_sequences[0]  # This sequence has 24 words

[1229,
 25,
 1,
 213,
 1345,
 132,
 16,
 1044,
 44,
 2979,
 1435,
 19,
 7,
 1471,
 2877,
 3226,
 106]

In [55]:
# finds the length of the first train_sequence
len(train_sequences[0])

17

In [56]:
# Now adding padding
from keras.preprocessing.sequence import pad_sequences 

train_padded = pad_sequences(
    train_sequences, maxlen=max_length, padding='post', truncating='post'
)

In [57]:
train_padded[0]

array([1229,   25,    1,  213, 1345,  132,   16, 1044,   44, 2979, 1435,
         19,    7, 1471, 2877, 3226,  106,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0], d

In [58]:
# Creates the test dataset sequences and padding
test_sequences = tokenizer.texts_to_sequences(X_test)
test_padded = pad_sequences(
    test_sequences, maxlen=max_length, padding='post', truncating='post'
)

In [59]:
print(X_train.values[0])
print(train_sequences[0])

creativ menu food wonder trendi atmospher love goat chees blackberri flatbread also like meatloaf sand crafti burger
[1229, 25, 1, 213, 1345, 132, 16, 1044, 44, 2979, 1435, 19, 7, 1471, 2877, 3226, 106]


In [60]:
# Checks to make sure that you can decode in reverse 
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def decode(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

decode(train_sequences[0])

'creativ menu food wonder trendi atmospher love goat chees blackberri flatbread also like meatloaf sand crafti burger'

In [61]:
# Checks the shape of the train and the shape of the test
print(f'Shape of train {train_padded.shape}')
print(f'Shape of test {test_padded.shape}')

Shape of train (64641, 150)
Shape of test (21548, 150)


### Set up the Network topology

* We could've used One Hot Encoding (OHE) to convert these indices into vectors of 0s an 1s, but this would increase the dimensionality of our features 

**Instead...**
* The Embedding layer - maps each word to a vector of a fixed size with real value elements...
* In contrast to One Hot Encoding, we can use finite size vector to represent an infinite number of real numbers. 
* We're going to use dimensionality of this embedding layer (32) and the input length will be the max length

In [62]:
# Sets up network topology 
model = Sequential()

model.add(Embedding(num_words, 32, input_length=max_length))
model.add(LSTM(64, dropout=0.1))
model.add(Dense(4, activation='softmax'))

optimizer = Adam(lr=3e-4)

model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['acc'])

In [63]:
# model_l.add(Bidirectional(LSTM(24)))

In [64]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 150, 32)           1009344   
_________________________________________________________________
lstm_1 (LSTM)                (None, 64)                24832     
_________________________________________________________________
dense_1 (Dense)              (None, 4)                 260       
Total params: 1,034,436
Trainable params: 1,034,436
Non-trainable params: 0
_________________________________________________________________


In [67]:
# Fit the model
history = model.fit(
    train_padded, y_train, epochs=25, batch_size=256, 
    validation_data=(test_padded, y_test)
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


## 2) Modeling with original text with stopwords removed

In [68]:
# Sets up X and y
X2 = df['clean_text']
y2 = df['amb_target']

In [69]:
# One-hot encoding for the categorical y response variable 
y2 = to_categorical(y2)

In [70]:
# Splits the data into training and test sets from sample
X_train2, X_test2, y_train2, y_test2 = train_test_split(X2, y2, 
                                                    stratify=y2, 
                                                    random_state=42)

In [71]:
X_train2

25923    creative menu food wonderful trendy atmosphere...
40820    chose restaurant watch osu rose bowl january 1...
30535    really good fish ribs fish similar like queens...
72374    really huge pizza fan place close people wante...
25922    fist tacos least dozen times last night suppos...
                               ...                        
45302    area used yelp reviews decide pizza wanted try...
45214    food great portion sizes definitely filling mu...
73125    happen visited columbus first time also vegeta...
77558    restaurant great patio summer awesome selectio...
16196    usually like restaurants never ever issues one...
Name: clean_text, Length: 64641, dtype: object

### Basic NLP

In [72]:
# import the Counter function
from collections import Counter

# import the tokenizer from keras preprocessing 
from tensorflow.keras.preprocessing.text import Tokenizer

In [73]:
# Creates a function that counts unique words
def counter_word(text):
    count = Counter()
    for doc in text.values:
        for word in doc.split():
            count[word] += 1
    return count

In [74]:
X_train2

25923    creative menu food wonderful trendy atmosphere...
40820    chose restaurant watch osu rose bowl january 1...
30535    really good fish ribs fish similar like queens...
72374    really huge pizza fan place close people wante...
25922    fist tacos least dozen times last night suppos...
                               ...                        
45302    area used yelp reviews decide pizza wanted try...
45214    food great portion sizes definitely filling mu...
73125    happen visited columbus first time also vegeta...
77558    restaurant great patio summer awesome selectio...
16196    usually like restaurants never ever issues one...
Name: clean_text, Length: 64641, dtype: object

In [75]:
X_train2.values[0]

'creative menu food wonderful trendy atmosphere loved goat cheese blackberry flatbread also liked meatloaf sand crafty burger'

In [76]:
# Counts the number of times a unique word appears
counter = counter_word(X_train2)

In [77]:
# Finds the length or the number of unique words
len(counter)

47986

In [78]:
counter

Counter({'creative': 333,
         'menu': 11894,
         'food': 48023,
         'wonderful': 2724,
         'trendy': 347,
         'atmosphere': 5242,
         'loved': 3580,
         'goat': 505,
         'cheese': 9645,
         'blackberry': 78,
         'flatbread': 264,
         'also': 14929,
         'liked': 1974,
         'meatloaf': 304,
         'sand': 90,
         'crafty': 73,
         'burger': 4532,
         'chose': 779,
         'restaurant': 12940,
         'watch': 1121,
         'osu': 372,
         'rose': 90,
         'bowl': 2564,
         'january': 68,
         '1': 3956,
         'times': 6046,
         'big': 4316,
         'games': 1227,
         'never': 9022,
         'busy': 4146,
         'unfortunately': 1118,
         'situation': 392,
         'flavorful': 2044,
         'prices': 3889,
         'fair': 743,
         'ryan': 35,
         'pleasant': 1085,
         'appears': 167,
         'could': 8297,
         'use': 2014,
         'couple': 26

#### Define max number of words in a sequence 
* Setting this max number is important because we need to define a maximum sequence length that we can set to a number we pick 
* Note: Depending on the text, it is better to set this number high
* (ex: Tweet - it is better to set this number to a high number between 50-70) 
* (ex: bigger text - you can set it to 200 or more) 
* In our trial, we're just going to start off small with 50 words

The reason we need to define the sequence length is because when we use it with Tensorflow, we're going to need the same number of words/sequence length for each sequence. 

We won't be able to have sequences of different lengths. We need to map them to the same sequence size. 

In [79]:
num_words = len(counter)

# Max number of words in a sequence
max_length = 50

#### Use the Tokenizer Class

The next thing we need to use is the Tokenizer class from keras to tokenize the train sentences

In [80]:
# import the tokenizer from keras preprocessing 
from tensorflow.keras.preprocessing.text import Tokenizer

In [81]:
# Fit the tokenizer onto the train sentences 
tokenizer = Tokenizer(num_words=num_words)
tokenizer.fit_on_texts(X_train2)

In [82]:
# Pulls the word index from the tokenizer 
word_index = tokenizer.word_index

# This is an attribute of the tokenizer
# The attribute is a dictionary where
# key = actual word 
# value = the number that will now represent that word

In [83]:
word_index

{'food': 1,
 'good': 2,
 'place': 3,
 'great': 4,
 "i'": 5,
 'like': 6,
 'service': 7,
 'time': 8,
 'get': 9,
 "it'": 10,
 'one': 11,
 'back': 12,
 'go': 13,
 'really': 14,
 'would': 15,
 'chicken': 16,
 'order': 17,
 'also': 18,
 'got': 19,
 'ordered': 20,
 'restaurant': 21,
 'us': 22,
 'well': 23,
 'menu': 24,
 'delicious': 25,
 'love': 26,
 'best': 27,
 'pizza': 28,
 'always': 29,
 'nice': 30,
 "don'": 31,
 'try': 32,
 'even': 33,
 'little': 34,
 'bar': 35,
 'first': 36,
 'sauce': 37,
 'came': 38,
 'cheese': 39,
 'staff': 40,
 'definitely': 41,
 'friendly': 42,
 "didn'": 43,
 'pretty': 44,
 'never': 45,
 'much': 46,
 'columbus': 47,
 'went': 48,
 'amazing': 49,
 'could': 50,
 'come': 51,
 'made': 52,
 'people': 53,
 'salad': 54,
 'wait': 55,
 'fresh': 56,
 'experience': 57,
 'eat': 58,
 'better': 59,
 'meal': 60,
 'make': 61,
 'two': 62,
 'table': 63,
 'night': 64,
 'lunch': 65,
 "you'": 66,
 'everything': 67,
 'drinks': 68,
 '2': 69,
 'minutes': 70,
 'way': 71,
 'location': 72,
 'g

In [84]:
# Creates the sequences from our tokenizer, based on the indices from the word_index
train_sequences2 = tokenizer.texts_to_sequences(X_train2)

In [85]:
X_train2.values[0]

'creative menu food wonderful trendy atmosphere loved goat cheese blackberry flatbread also liked meatloaf sand crafty burger'

In [86]:
train_sequences2[0]  # This sequence has 24 words

[1579,
 24,
 1,
 254,
 1530,
 114,
 180,
 1166,
 39,
 3910,
 1847,
 18,
 372,
 1679,
 3599,
 4067,
 137]

In [87]:
len(train_sequences2[0])

17

In [88]:
# Now adding padding
from keras.preprocessing.sequence import pad_sequences 

train_padded2 = pad_sequences(
    train_sequences2, maxlen=max_length, padding='post', truncating='post'
)

In [89]:
train_padded2[0]

array([1579,   24,    1,  254, 1530,  114,  180, 1166,   39, 3910, 1847,
         18,  372, 1679, 3599, 4067,  137,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0], dtype=int32)

In [90]:
# Creates the test dataset sequences and padding
test_sequences2 = tokenizer.texts_to_sequences(X_test2)
test_padded2 = pad_sequences(
    test_sequences2, maxlen=max_length, padding='post', truncating='post'
)

In [91]:
print(X_train2.values[0])
print(train_sequences2[0])

creative menu food wonderful trendy atmosphere loved goat cheese blackberry flatbread also liked meatloaf sand crafty burger
[1579, 24, 1, 254, 1530, 114, 180, 1166, 39, 3910, 1847, 18, 372, 1679, 3599, 4067, 137]


In [92]:
# Checks to make sure that you can decode in reverse 
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def decode(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

decode(train_sequences2[0])

'creative menu food wonderful trendy atmosphere loved goat cheese blackberry flatbread also liked meatloaf sand crafty burger'

In [93]:
# Checks the shape of the train and the shape of the test
print(f'Shape of train {train_padded2.shape}')
print(f'Shape of test {test_padded2.shape}')

Shape of train (64641, 50)
Shape of test (21548, 50)


### Set up the Network topology

* We could've used One Hot Encoding (OHE) to convert these indices into vectors of 0s an 1s, but this would increase the dimensionality of our features 

**Instead...**
* The Embedding layer - maps each word to a vector of a fixed size with real value elements...
* In contrast to One Hot Encoding, we can use finite size vector to represent an infinite number of real numbers. 
* We're going to use dimensionality of this embedding layer (32) and the input length will be the max length

In [94]:
# Sets up network topology 
model2 = Sequential()

model2.add(Embedding(num_words, 32, input_length=max_length))
model2.add(LSTM(64, dropout=0.1))
model2.add(Dense(4, activation='softmax'))

optimizer = Adam(lr=3e-4)

model2.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['acc'])

In [95]:
# model_l.add(Bidirectional(LSTM(24)))

In [96]:
model2.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 50, 32)            1535552   
_________________________________________________________________
lstm_2 (LSTM)                (None, 64)                24832     
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 260       
Total params: 1,560,644
Trainable params: 1,560,644
Non-trainable params: 0
_________________________________________________________________


In [97]:
# Fit the model
history2 = model2.fit(
    train_padded2, y_train2, epochs=25, batch_size=256, 
    validation_data=(test_padded2, y_test2)
)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
