In [1]:
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import make_column_transformer
from sklearn.model_selection import GroupShuffleSplit

from tensorflow import keras
from tensorflow.keras import layers, callbacks





In [2]:
# Plot settings
plt.style.use('seaborn-whitegrid')
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)


OSError: 'seaborn-whitegrid' is not a valid package style, path of style file, URL of style file, or library style name (library styles are listed in `style.available`)

In [None]:
from learntools.core import binder
binder.bind(globals())
from learntools.deep_learning_intro.ex4 import *

print("Setup Complete")


In [None]:
spotify = pd.read_csv('../input/dl-course-data/spotify.csv')
spotify.head()


In [None]:
X = spotify.copy().dropna()
y = X.pop('track_popularity')

artists = X['track_artist']


In [None]:
features_num = [
    'danceability', 'energy', 'key', 'loudness', 'mode',
    'speechiness', 'acousticness', 'instrumentalness',
    'liveness', 'valence', 'tempo', 'duration_ms'
]

features_cat = ['playlist_genre']


In [None]:
preprocessor = make_column_transformer(
    (StandardScaler(), features_num),
    (OneHotEncoder(), features_cat),
)


In [None]:
def group_split(X, y, group, train_size=0.75):
    splitter = GroupShuffleSplit(train_size=train_size)
    train, valid = next(splitter.split(X, y, groups=group))
    return X.iloc[train], X.iloc[valid], y.iloc[train], y.iloc[valid]


In [None]:
X_train, X_valid, y_train, y_valid = group_split(X, y, artists)


In [None]:
X_train = preprocessor.fit_transform(X_train)
X_valid = preprocessor.transform(X_valid)

y_train = y_train / 100
y_valid = y_valid / 100


In [None]:
input_shape = [X_train.shape[1]]
print("Input shape:", input_shape)


In [None]:
early_stopping = callbacks.EarlyStopping(
    min_delta=0.001,
    patience=5,
    restore_best_weights=True,
)


In [None]:
model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=input_shape),
    layers.Dense(64, activation='relu'),
    layers.Dense(1),
])


In [None]:
model.compile(
    optimizer='adam',
    loss='mae',
)


In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size=512,
    epochs=200,
    callbacks=[early_stopping],
    verbose=0,
)


In [None]:
history_df = pd.DataFrame(history.history)
history_df.head()


In [None]:
history_df.loc[10:, ['loss', 'val_loss']].plot()
plt.show()


In [None]:
print(
    "Minimum Validation Loss: {:0.4f}"
    .format(history_df['val_loss'].min())
)
