## Modeling

**Imports**

In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
np.random.seed(42)

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import RandomNormal
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
model_sets = np.load('../data/model_sets.npy')
model_sets.shape

(36003, 21)

In [3]:
#remember, I am modeling for song n + 1 based on songs n previously played:
X_rr, y_rr = model_sets[:, :-1], model_sets[:, -1] 

In [4]:
X_rr.shape, y_rr.shape

((36003, 20), (36003,))

**Train-Test Split**

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X_rr, y_rr, 
                                                    test_size = 0.2, 
                                                    random_state = 42)

In [6]:
print(f'X_train and y_train: {X_train.shape}, {y_train.shape}')
print(f'X_test and y_test: {X_test.shape}, {y_test.shape}')

X_train and y_train: (28802, 20), (28802,)
X_test and y_test: (7201, 20), (7201,)


In [7]:
#scaling data for RNN:

sc = StandardScaler()
X_train_sc = sc.fit_transform(X_train)
X_test_sc = sc.transform(X_test)

In [8]:
#remeber from '4_prep ...ipynb'

#number of all unique songs played:
#len(set(setlists_string))

#970

In [9]:
# shape_X_train = X_train_sc.reshape((X_train_sc.shape[0], 1, X_train_sc.shape[1]))
# shape_X_test = X_test_sc.reshape((X_test_sc.shape[0], 1, X_test_sc.shape[1]))

In [10]:
#https://stackoverflow.com/questions/55774632/gridsearchcv-randomizedsearchcv-with-lstm
def model_func(layer_one_neurons = 64, layer_two_neurons = 32, LSTM_layers = 100):
    #instantiate the model:
    model = Sequential()

    model.add(Dense(layer_one_neurons, activation = 'relu',
                        input_shape = (20, 1, )))

    model.add(LSTM(LSTM_layers, activation = 'relu', 
                       kernel_initializer=RandomNormal(mean = 0.0, stddev = 1, seed = 42), 
                       bias_initializer=RandomNormal(mean = 0.0, stddev = 1, seed = 42)))
    model.add(Dropout(rate = 0.25))
    
    model.add(Dense(layer_two_neurons, activation = 'relu',
                        input_shape = (20, 1, )))

    model.add(Dense(970, activation = 'softmax'))

    #compiling:
    model.compile(optimizer = 'adam', loss = 'mse', metrics=['acc'])
    
    return model

In [11]:
nn = KerasRegressor(build_fn = model_func, batch_size = 64, verbose = 0)

  nn = KerasRegressor(build_fn = model_func, batch_size = 64, verbose = 0)


In [12]:
param_grid = {
    'epochs' : [100, 50],
    'layer_one_neurons' : [32, 42],
    'layer_two_neurons' : [32, 42],
    'LSTM_layers' : [10, 32, 100]
}

#gridsearch:

gs = GridSearchCV(nn, param_grid = param_grid, cv = 5)

In [None]:
gs.fit(X_train_sc, y_train, verbose = 0)

#print outs:
print(gs.best_score_)
gs.best_params_

In [None]:
output_array = model.predict(X_train)