In [1]:
import pandas as pd
import os

In [2]:
COMBINED_DATA = os.path.join('..', 'data', 'combined-data.csv')

In [None]:
songs_df = pd.read_csv(COMBINED_DATA)
songs_df.head()

In [None]:
songs_df.columns

In [None]:
X = songs_df.drop(labels=['Artist', 'Title', 'URL', 'TrackId', 'Album', 'Image', 'Winner'], axis=1)
X.head()

In [None]:
X.dtypes

In [None]:
X.Explicit = X.Explicit.astype(int)

In [None]:
X.Explicit.value_counts()

In [None]:
#excluded_features = ['Year',  'Duration (ms)', 'Key',  'Mode', 'Speechiness', 'Tempo', 'Valence', 'Explicit']
excluded_features = ['Popularity']
X.drop(labels=excluded_features, axis=1, inplace=True)


In [None]:
y = songs_df['Winner']
y.head()

In [None]:
y.value_counts()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler, Normalizer, StandardScaler
from keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, shuffle=True, stratify=y)

X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Convert labels to one-hot-encoding
y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)

In [None]:
X.shape

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

dims = len(X.columns)
# Create model and add layers
def make_model(dense_layer_sizes, activation):
    model = Sequential()
    for dense_layer in dense_layer_sizes:
        model.add(Dense(units=dense_layer, activation=activation, input_dim=dims))
        
    model.add(Dense(units=2, activation='softmax'))
    
    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
dense_size_candidates = [[80, 160]]
classifier = KerasClassifier(make_model)
validator = GridSearchCV(classifier,
                         param_grid={'dense_layer_sizes': dense_size_candidates,
                                     'epochs': [115],
                                     'activation': ['relu'],
                                    }, n_jobs=1)
validator.fit(X_train_scaled, y_train_categorical, validation_data=(X_test, y_test_categorical))



In [None]:
print('The parameters of the best model are: ')
print(validator.best_params_)
print(validator.best_score_)

In [None]:
best_model = validator.best_estimator_.model
metric_names = best_model.metrics_names
metric_values = best_model.evaluate(X_test_scaled, y_test_categorical)
for metric, value in zip(metric_names, metric_values):
    print(metric, ': ', value)

In [None]:
model_file = os.path.join('..', 'model', 'grammy_prediction_model.h5')
best_model.save(model_file)


In [None]:
X_test.columns


In [None]:
from sklearn.externals import joblib
scaler_file = os.path.join('..', 'model', 'grammy_prediction_scaler.sav')
joblib.dump(X_scaler, scaler_file) 