# Data Processing 

In [1]:
# Dependencies 
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
from tensorflow.keras.utils import to_categorical

In [2]:
# Extract Data - Model to learn from historical data 
# UAT: csv manually created.
# Production: Feed from a SQLite table similiar to the below structure, created as a dictionary of arrays in JS
phrases = pd.read_csv('../Data/Phrases_Frequency_test.csv') 
phrases.head()

Unnamed: 0,Phrase,Frequency
0,A Pile Of Coats,37
1,A Pile Of Coats On The Bed,2
2,A Shelf Full Of Knickknacks,11
3,A Sock With Holes In It,2
4,A Vase Filled With Sunflowers,3


In [3]:
#Quality Check: no spaces embedded in column names 
print(phrases.columns.tolist())

['Phrase', 'Frequency']


# Model - SVM

In [4]:
# Assign X & y 
X = phrases.drop("Phrase", axis=1)
y = phrases["Phrase"]


In [6]:
#Data Split: Train vs Test  
from sklearn.model_selection import train_test_split
# X_train, X_test, y_train, y_test = train_test_split(X, one_hot_y, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)


In [7]:
# Scale Data 
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

  return self.partial_fit(X, y)


In [19]:
# Explore data 
X_train_scaled

array([[0.51020408],
       [0.48979592],
       [0.48979592],
       ...,
       [0.44897959],
       [0.57142857],
       [0.42857143]])

In [20]:
#Create Model 
from sklearn.svm import SVC 
model = SVC(kernel='linear')

#Fit Model (train data)
model.fit(X_train_scaled, y_train)


SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [21]:
predictions = model.predict(X_test_scaled)
predictions

array(['Wall Decor & Mirrors', 'Yellow Sundress', 'Warm Snug Cap',
       'Wool Shag Rug', 'Thick Wool Snow Gloves', 'Wet Washcloth',
       'Wood-Paneled Ceiling', 'Wooden Shelf', 'Warm Fuzzy Blanket',
       'Throw Rugs', 'Wall-Mounted Wine Racks', 'Wooden Shelf',
       'Wooden End Table', 'Warm Tea', 'Woven Horse Blanket',
       'Vanity Bench', 'Worn Rug', 'Wet Mopping Pads',
       'Wall Decor & Mirrors', 'Tin Pail', 'Teal Accent Chair',
       'Wet Washcloth', 'Wet Mopping Pads', 'Wool Blazer',
       'Writing Tablet', 'Wool Blazer', 'Thick Wool Snow Gloves',
       'Wireless Gaming Controller', 'Wooden Stepladder',
       'Wet Mopping Pads', 'Wet Mopping Pads',
       'White Goose Down Comforter', 'Wooden Stepladder', 'Wooden Shelf',
       'Warm Fuzzy Blanket', 'Wall-Mounted Wine Racks', 'Warm Snug Cap',
       'Woven Horse Blanket', 'Wallpaper & Paneling',
       'Writing A Love Letter', 'Wood Window Blinds', 'Wood Bookmark',
       'Writing A Love Letter', 'Wool Shag Rug', '

In [23]:
# Evaluate / Validate Model - test data 

from sklearn.metrics import classification_report

print('Test Acc: %.3f' % model.score(X_test_scaled, y_test))
print(classification_report(y_test, predictions))

Test Acc: 0.000
                                          precision    recall  f1-score   support

                      After-Shave Lotion       0.00      0.00      0.00         1
                     Aloe-Vera Hand Soap       0.00      0.00      0.00         1
            Annual Sailing Weekend Event       0.00      0.00      0.00         1
                Antibacterial Hand Wipes       0.00      0.00      0.00         1
                         Antique Bathtub       0.00      0.00      0.00         1
                  Antique Sewing Machine       0.00      0.00      0.00         1
       Antique Sterling-Silver Coffeepot       0.00      0.00      0.00         1
                        Appointment Book       0.00      0.00      0.00         1
                  Arm & Hammer Spinbrush       0.00      0.00      0.00         1
                            Attic Stairs       0.00      0.00      0.00         1
            Automatic Garage-Door Opener       0.00      0.00      0.00         1

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


# Notes 

In [None]:
# ML Model Overview Functionality -
# --------------------------------------------------------------------------------------
# Goal: AI predict phrase and then parse to select letter
# History Data: frequency of phrases used in past games - data size increases with more games played
# Supervised Model however assigned phrase feature (column) as y variable b/c we want AI to predict that prior to selecting letters
# Current State: Model failed due to zero scores
# Model failing b/c the paired points aren't close enough to each other to create groups
# This model may be better for repeating lables on rows



In [None]:
# Pending Questions / Problems -
# ----------------------------------------------------------------------------------------------
# Scores are zero:
# This means the prediction model failed, is this due to the lack of more independent variables? 
# Should the data set be structured in a format where the phrase is listed on a row every time it is selected for a game? 



