In [3]:
# modules we'll use
from collections import Callable

import numpy as np
import pandas as pd
import gensim
import tensorflow as tf
import sys

from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers

#mount Google Drive
#from google.colab import drive
#drive.mount('/content/drive', force_remount=True)
#sys.path.insert(0,'/content/drive/MyDrive/Praca_Inżynierska/kickstarter/code')
from genetic_algorithm.crossover import OnePointCrossover
from genetic_algorithm.gene import FloatGene, Gene, IntegerGene
from genetic_algorithm.genetic_algorithm import GeneticAlgorithm
from genetic_algorithm.genome import Genome, LabeledSequence
from genetic_algorithm.initialization import RandomInitialization
from genetic_algorithm.mutation import RandomMutation
from genetic_algorithm.parent_selection import TournamentSelection

from preprocessing.label_binarizer import DataFrameLabelBinarizer
from plotting.loss_plotter import LossPlotter
from plotting.real_multi_pred_plotter import RealMultiPredPlotter

TypeError: <class 'genetic_algorithm.genome.LabeledSequence'> is not a generic class

In [None]:
# read in all our data
# Google Drive's path: /content/drive/MyDrive/Praca_Inżynierska/kickstarter/data/spotify.csv
spotify_orig  = pd.read_csv("../data/spotify.csv")
# set seed for reproducibility
np.random.seed(0)

In [None]:
# look at a few rows of the Spotify dataset file
spotify_orig.sample(5)

In [None]:
# get info about DataFrame columns
spotify_orig.info()

In [None]:
# convert 'key' to one-hot-encoded
lb_category = DataFrameLabelBinarizer(data_frame=spotify_orig, column_to_encode='key')
spotify_converted = lb_category.encode()

In [None]:
# convert release_date to milliseconds
spotify_converted['release_date'] = pd.to_datetime(spotify_converted['release_date'], format = "%Y-%m-%d")\
                                        .astype(np.int64) / 10**9

In [None]:
# drop redundant columns
spotify_reduced = spotify_converted.drop(['artists', 'name', 'year', 'id'], axis=1)

In [None]:
# get info about DataFrame columns
spotify_reduced.info()

In [None]:
# look at a few rows of the Spotify dataset
spotify_converted.sample(5)

In [None]:
# get index of 'popularity' column
popularity_index = spotify_reduced.columns.get_loc("popularity")

In [None]:
# convert to numpy_array
spotify_numpy = spotify_reduced.to_numpy()

In [None]:
# get X and y from numpy array
X = np.delete(arr=spotify_numpy, obj=popularity_index, axis=1)
y = spotify_numpy[:, popularity_index]

In [None]:
# count percent of y values above 0.5
np.count_nonzero(y > 0.5) / y.shape[0]

In [None]:
# get train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=True)

In [None]:
# train set size (train + valid)
X_train.shape[0]

In [None]:
# test set size
X_test.shape[0]

In [None]:
# count percent of y_train values above 0.5
np.count_nonzero(y_train > 0.5) / y_train.shape[0]

In [None]:
# count percent of y_test values above 0.5
np.count_nonzero(y_test > 0.5) / y_test.shape[0]

In [None]:
# create model

model = keras.models.Sequential()

model.add(layers.Input(shape=(X_train.shape[1],), name='inputs'))

model.add(layers.Dense(units=30, kernel_initializer=keras.initializers.he_normal(),
                       bias_initializer=keras.initializers.he_normal(),
                       activation=keras.activations.elu, name='hidden1'))
model.add(layers.BatchNormalization(momentum=0.99))
model.add(layers.Dropout(rate=0.5))

model.add(layers.Dense(units=15, kernel_initializer=keras.initializers.he_normal(),
                       bias_initializer=keras.initializers.he_normal(),
                       activation=keras.activations.elu, name='hidden2'))
model.add(layers.BatchNormalization(momentum=0.99))
model.add(layers.Dropout(rate=0.5))

model.add(layers.Dense(units=1, activation=keras.activations.linear, name="outputs"))
model.add(layers.BatchNormalization(momentum=0.99))

#print model details
model.summary()

In [None]:
# train model
# optimizer = tf.keras.optimizers.Adam(learning_rate=0.05)
# model.compile(optimizer=optimizer, loss=keras.losses.mse, metrics=[keras.metrics.mse, keras.metrics.mae,
#                                                                    keras.metrics.RootMeanSquaredError(name='rmse')])
# history=model.fit(X_train, y_train, epochs=10, batch_size=170, verbose=1, validation_split=0.2)

In [None]:
# train set size
int(X_train.shape[0]*0.8)

In [None]:
# validation set size
int(X_train.shape[0]*0.2)

In [None]:
class GeneticAlgorithmImpl(GeneticAlgorithm):

    def init_population(self, ):
        return RandomInitialization(self.population_size, self.genome).init_population()

    def select_parents(self, fitness):
        return TournamentSelection().select_parents(self.population, fitness)

    def crossover(self, parents):
        return OnePointCrossover().crossover(parents)

    def mutate(self, crossovers):
        return RandomMutation().mutate(crossovers)

In [None]:
labeled_sequence = LabeledSequence()
labeled_sequence.append(FloatGene(label='learning_rate', minimum=0.01, maximum=0.2))
labeled_sequence.append(IntegerGene(label='batch_size', minimum=100, maximum=200))

In [None]:
def rank(params: LabeledSequence[Gene]) -> float:
    optimizer = tf.keras.optimizers.Adam(learning_rate=params.get_by_label('learning_rate').value)
    model.compile(optimizer=optimizer, loss=keras.losses.mse, metrics=[keras.metrics.mse, keras.metrics.mae,
                                                                   keras.metrics.RootMeanSquaredError(name='rmse')])
    history=model.fit(X_train, y_train, epochs=10, batch_size=params.get_by_label('batch_size').value, verbose=1,
                      validation_split=0.2)
    return float(list(history.history.items())[1][1][9])

In [None]:
genome = Genome(genes=labeled_sequence, rank_funk=rank)
algorithm_impl = GeneticAlgorithmImpl(genome=genome, population_size=3, generation_count=3,
                                        best_last_generations_size=3)
algorithm_impl.calculate()

In [None]:
# save model
# model.save('models/spotify_v2_he_elu_adam_30_15')

In [None]:
# print history keys
# print(history.history.keys())

# print mse value from last epoch
# print(list(history.history.items())[1][1][9])

In [None]:
# loss_plotter = LossPlotter(history)
# loss_plotter.plot()

In [None]:
# Evaluate the model on the test data using `evaluate`

spotify_v2_he_elu_adam_20_10 = tf.keras.models.load_model('models/spotify_v2_he_elu_adam_20_10')
spotify_v2_he_elu_adam_20_10_results = spotify_v2_he_elu_adam_20_10.evaluate(X_test, y_test, batch_size=170)
print("spotify_v2_he_elu_adam_20_10 results:")
print(spotify_v2_he_elu_adam_20_10_results)

spotify_v2_he_elu_adam_30_15 = tf.keras.models.load_model('models/spotify_v2_he_elu_adam_30_15')
spotify_v2_he_elu_adam_30_15_results = spotify_v2_he_elu_adam_30_15.evaluate(X_test, y_test, batch_size=170)
print("spotify_v2_he_elu_adam_30_15 results:")
print(spotify_v2_he_elu_adam_30_15_results)

spotify_v2_he_elu_adam_40_20_10 = tf.keras.models.load_model('models/spotify_v2_he_elu_adam_40_20_10')
spotify_v2_he_elu_adam_40_20_10_results = spotify_v2_he_elu_adam_40_20_10.evaluate(X_test, y_test, batch_size=170)
print("spotify_v2_he_elu_adam_40_20_10 results:")
print(spotify_v2_he_elu_adam_40_20_10_results)

spotify_v2_he_elu_adam_40_20_10_5 = tf.keras.models.load_model('models/spotify_v2_he_elu_adam_40_20_10_5')
spotify_v2_he_elu_adam_40_20_10_5_results = spotify_v2_he_elu_adam_40_20_10_5.evaluate(X_test, y_test, batch_size=170)
print("spotify_v2_he_elu_adam_40_20_10_5 results:")
print(spotify_v2_he_elu_adam_40_20_10_5_results)

In [None]:
# Generate predictions (probabilities -- the output of the last layer)
# on new data using `predict`
print("Generate predictions for 10 samples")

spotify_v2_he_elu_adam_20_10_predictions = spotify_v2_he_elu_adam_20_10.predict(X_test[:10])
spotify_v2_he_elu_adam_30_15_predictions = spotify_v2_he_elu_adam_30_15.predict(X_test[:10])
spotify_v2_he_elu_adam_40_20_10_predictions = spotify_v2_he_elu_adam_40_20_10.predict(X_test[:10])
spotify_v2_he_elu_adam_40_20_10_5_predictions = spotify_v2_he_elu_adam_40_20_10_5.predict(X_test[:10])

real = np.reshape(y_test[:10], (-1, 1))
real_pred = np.append(real, spotify_v2_he_elu_adam_20_10_predictions, axis=1)
real_pred = np.append(real_pred, spotify_v2_he_elu_adam_30_15_predictions, axis=1)
real_pred = np.append(real_pred, spotify_v2_he_elu_adam_40_20_10_predictions, axis=1)
real_pred = np.append(real_pred, spotify_v2_he_elu_adam_40_20_10_5_predictions, axis=1)
print("real/predictions")
print(real_pred)

In [None]:
# plot real/pred chart
pred_dict = {"spotify_v2_he_elu_adam_20_10":spotify_v2_he_elu_adam_20_10_predictions,
             "spotify_v2_he_elu_adam_30_15":spotify_v2_he_elu_adam_30_15_predictions,
             "spotify_v2_he_elu_adam_40_20_10":spotify_v2_he_elu_adam_40_20_10_predictions,
             "spotify_v2_he_elu_adam_40_20_10_5":spotify_v2_he_elu_adam_40_20_10_5_predictions}
real_pred_plot = RealMultiPredPlotter(real=real, pred_dict=pred_dict)
real_pred_plot.plot()
