# Data Processing 

In [117]:
# Dependencies 
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical

In [118]:
# Extract Data - Model to learn fromhistorical data 
# UAT: csv manually created.
# Production: Feed from a SQLite table similiar to the below structure, created as a dictionary of arrays in JS
phrases = pd.read_csv('../Data/Phrases_Frequency_test.csv')
phrases.head()

Unnamed: 0,Phrase,Frequency
0,A Pile Of Coats,37
1,A Pile Of Coats On The Bed,2
2,A Shelf Full Of Knickknacks,11
3,A Sock With Holes In It,2
4,A Vase Filled With Sunflowers,3


In [119]:
#Quality Check: no spaces embedded in column names 
print(phrases.columns.tolist())

['Phrase', 'Frequency']


# Model - Neural Networks

In [120]:
# Assign X & y 
X = phrases.drop("Phrase", axis=1)
y = phrases["Phrase"]
print(X.shape, y.shape)

(1556, 1) (1556,)


In [121]:
#Data Split: Train vs Test  
from sklearn.model_selection import train_test_split
# X_train, X_test, y_train, y_test = train_test_split(X, one_hot_y, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)


In [98]:
# Scale Data 
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

  return self.partial_fit(X, y)


In [126]:
# Hot-code y data 
from tensorflow.keras.utils import to_categorical

#Step 1: Categorical labeling (convert to special values)
label_encoder_train = LabelEncoder()
label_encoder_train.fit(y_train)
# label_encoder_test.fit(y_test)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Label-encode (convert to binary vectors)
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [166]:
#Create Deep Learning Model 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Create Sequential model - linear stack of layers 
model = Sequential()

# Add /Stack layers 
# units changed to input number of rows from original data X,y in order for fitting function to work.
model.add(Dense(units=1556, activation='relu', input_dim=1))
model.add(Dense(units=1556, activation='relu'))
model.add(Dense(units=1556, activation='softmax'))

#Compile Model 
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [167]:
model.summary()

Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_53 (Dense)             (None, 1556)              3112      
_________________________________________________________________
dense_54 (Dense)             (None, 1556)              2422692   
_________________________________________________________________
dense_55 (Dense)             (None, 1556)              2422692   
Total params: 4,848,496
Trainable params: 4,848,496
Non-trainable params: 0
_________________________________________________________________


In [168]:
#Fit Model (train data) 

model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2
)

Epoch 1/60
1167/1167 - 1s - loss: 7.3705 - acc: 0.0000e+00
Epoch 2/60
1167/1167 - 1s - loss: 7.3407 - acc: 0.0000e+00
Epoch 3/60
1167/1167 - 1s - loss: 7.2856 - acc: 0.0000e+00
Epoch 4/60
1167/1167 - 1s - loss: 7.0622 - acc: 8.5690e-04
Epoch 5/60
1167/1167 - 1s - loss: 6.7417 - acc: 0.0000e+00
Epoch 6/60
1167/1167 - 1s - loss: 6.5258 - acc: 8.5690e-04
Epoch 7/60
1167/1167 - 1s - loss: 6.3810 - acc: 0.0017
Epoch 8/60
1167/1167 - 1s - loss: 6.2641 - acc: 0.0026
Epoch 9/60
1167/1167 - 1s - loss: 6.1604 - acc: 0.0017
Epoch 10/60
1167/1167 - 1s - loss: 6.0805 - acc: 8.5690e-04
Epoch 11/60
1167/1167 - 1s - loss: 6.0075 - acc: 0.0017
Epoch 12/60
1167/1167 - 1s - loss: 5.9501 - acc: 0.0000e+00
Epoch 13/60
1167/1167 - 1s - loss: 5.9009 - acc: 8.5690e-04
Epoch 14/60
1167/1167 - 1s - loss: 5.8529 - acc: 0.0026
Epoch 15/60
1167/1167 - 1s - loss: 5.8105 - acc: 0.0000e+00
Epoch 16/60
1167/1167 - 1s - loss: 5.7755 - acc: 8.5690e-04
Epoch 17/60
1167/1167 - 1s - loss: 5.7391 - acc: 0.0017
Epoch 18/60
1

<tensorflow.python.keras.callbacks.History at 0x2f6aa53d438>

In [169]:
#Evaluate / Validate Model  
# model_loss, model_accuracy = model.evaluate(
#     X_test_scaled, y_test_categorical, verbose=2)

model_loss, model_accuracy = model.evaluate(
    X_train_scaled, y_train_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

1167/1167 - 0s - loss: 4.7284 - acc: 0.0283
Normal Neural Network - Loss: 4.728353955956126, Accuracy: 0.02827763557434082


In [173]:
# Predictions
encoded_predictions = model.predict_classes(X_test_scaled[:10])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)
prediction_labels

array(['Garden Hose', 'Handwoven Wicker Basket', 'Designer Jeans',
       'Handwoven Wicker Basket', 'Sofas & Lounge Chairs',
       'Wet Washcloth', 'End Tables', 'Freestanding Pantry',
       'Dining Table', 'End Tables'], dtype=object)

# Notes 

In [170]:
# ML Model Overview  
# ----------------------------------------------------------------------
# Goal: predict phrase, parse predicted phrase into letters for selection 
# History Data: Frequency of phrases 
# Supervised Model b/c phrase is the y output and frequency is the x input 
# Accuracy scores outputed from fitting step extremely low 
# Conducted evaluation on train data due to error with test data
# Since score on the 2 supervised models (SVM, DL) are low, the data set being used will not suffice



In [None]:
# Pending Questions / Problems -
# Unable to evaluate model on test data due to "shape mismatch" of input elements



