## Track feature encoder using an Autoencoder

We used simple feed forward netural network to create an autoencoder that will reduce the dimension of song feature list from more than 20 to 8

#### Importing necessary libraries for this notebook

In [127]:
import glob
import pandas as pd
import numpy as np
from tqdm import tqdm
import pickle

Loading the sampled track data that contains 29 features

In [2]:
track_pd = pd.read_csv("sampled_track_features_data.csv")
track_pd.head()

Unnamed: 0,track_id,duration,release_year,us_popularity_estimate,acousticness,beat_strength,bounciness,danceability,dyn_range_mean,energy,...,time_signature,valence,acoustic_vector_0,acoustic_vector_1,acoustic_vector_2,acoustic_vector_3,acoustic_vector_4,acoustic_vector_5,acoustic_vector_6,acoustic_vector_7
0,t_f9b9a072-8dae-4816-bfd6-d91fc75a5744,180.133331,1989,97.720734,0.002047,0.425469,0.486543,0.65454,7.990334,0.625231,...,4,0.691534,0.188888,0.306256,-0.325102,-0.53262,0.660405,0.149624,-0.354731,0.192537
1,t_7f435320-5d3b-497d-9346-0c97429af2b9,539.76001,1980,98.383158,0.075846,0.499848,0.494943,0.461215,7.512061,0.563252,...,4,0.46844,0.180359,-0.149974,-0.185145,-0.152791,0.560996,0.047161,-0.295635,-0.074902
2,t_949441c4-4920-4eed-b929-2d67d37b7bd9,114.133331,1983,98.967866,0.002274,0.243778,0.229295,0.290262,5.105952,0.958647,...,4,0.2931,0.101301,0.186855,-0.525412,-0.293849,0.596498,0.010453,-0.280584,0.418848
3,t_63eae5ac-744a-46ee-b719-0db8e70d6f7f,164.906677,1983,99.773741,0.005024,0.33858,0.331703,0.348846,6.060782,0.886674,...,4,0.387131,0.132311,0.248023,-0.510729,-0.387413,0.602931,0.033381,-0.312206,0.365894
4,t_3c2c29b5-c738-42c0-83e1-ee0e88ac834d,182.293335,1969,99.98781,0.198284,0.336354,0.353215,0.395759,6.443376,0.338417,...,4,0.377411,0.083737,-0.306129,0.045954,0.0784,0.66664,-0.032503,-0.299952,-0.584207


In [3]:
track_pd.columns

Index(['track_id', 'duration', 'release_year', 'us_popularity_estimate',
       'acousticness', 'beat_strength', 'bounciness', 'danceability',
       'dyn_range_mean', 'energy', 'flatness', 'instrumentalness', 'key',
       'liveness', 'loudness', 'mechanism', 'mode', 'organism', 'speechiness',
       'tempo', 'time_signature', 'valence', 'acoustic_vector_0',
       'acoustic_vector_1', 'acoustic_vector_2', 'acoustic_vector_3',
       'acoustic_vector_4', 'acoustic_vector_5', 'acoustic_vector_6',
       'acoustic_vector_7'],
      dtype='object')

Out of all the features, 20 features are selected that points to the song type/genre features.

In [75]:
relevant_features_list = ['track_id', 'acousticness', 'beat_strength', 'bounciness', 'danceability',
       'energy', 'flatness', 'instrumentalness',
       'liveness', 'mechanism', 'organism', 'speechiness',
       'valence', 'acoustic_vector_0', 'acoustic_vector_1', 
       'acoustic_vector_2', 'acoustic_vector_3',
       'acoustic_vector_4', 'acoustic_vector_5', 'acoustic_vector_6',
       'acoustic_vector_7']
print(len(relevant_features_list))

21


In [76]:
relevant_features_track_pd = track_pd[relevant_features_list]
relevant_features_track_pd.head()

Unnamed: 0,track_id,acousticness,beat_strength,bounciness,danceability,energy,flatness,instrumentalness,liveness,mechanism,...,speechiness,valence,acoustic_vector_0,acoustic_vector_1,acoustic_vector_2,acoustic_vector_3,acoustic_vector_4,acoustic_vector_5,acoustic_vector_6,acoustic_vector_7
0,t_f9b9a072-8dae-4816-bfd6-d91fc75a5744,0.002047,0.425469,0.486543,0.65454,0.625231,0.957158,0.003155269,0.261363,0.473837,...,0.052713,0.691534,0.188888,0.306256,-0.325102,-0.53262,0.660405,0.149624,-0.354731,0.192537
1,t_7f435320-5d3b-497d-9346-0c97429af2b9,0.075846,0.499848,0.494943,0.461215,0.563252,0.916317,0.3736224,0.693975,0.187614,...,0.065943,0.46844,0.180359,-0.149974,-0.185145,-0.152791,0.560996,0.047161,-0.295635,-0.074902
2,t_949441c4-4920-4eed-b929-2d67d37b7bd9,0.002274,0.243778,0.229295,0.290262,0.958647,0.846197,0.0565884,0.259133,0.332016,...,0.115356,0.2931,0.101301,0.186855,-0.525412,-0.293849,0.596498,0.010453,-0.280584,0.418848
3,t_63eae5ac-744a-46ee-b719-0db8e70d6f7f,0.005024,0.33858,0.331703,0.348846,0.886674,0.904784,1.998796e-10,0.381728,0.185455,...,0.13759,0.387131,0.132311,0.248023,-0.510729,-0.387413,0.602931,0.033381,-0.312206,0.365894
4,t_3c2c29b5-c738-42c0-83e1-ee0e88ac834d,0.198284,0.336354,0.353215,0.395759,0.338417,1.021726,2.620291e-06,0.138143,0.245665,...,0.030352,0.377411,0.083737,-0.306129,0.045954,0.0784,0.66664,-0.032503,-0.299952,-0.584207


In [77]:
relevant_features_track_pd.shape

(275328, 21)

## Feed forward Autoencoder

In [78]:
from keras.layers import Input, Dense
from keras.models import Model
import matplotlib.pyplot as plt
%matplotlib inline

### Creation of the autoencoder

The feature values varies from -1 to +1, therefore tanh was chosen as the activation function.

In [87]:
input_features = Input(shape=(20,))
encoded = Dense(units=14, activation='tanh')(input_features)
encoded = Dense(units=8, activation='tanh')(encoded)
decoded = Dense(units=14, activation='tanh')(encoded)
decoded = Dense(units=20, activation='tanh')(decoded)

In [88]:
autoencoder = Model(input_features, decoded)

In [89]:
encoder = Model(input_features, encoded)

In [90]:
autoencoder.summary()

Model: "model_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         (None, 20)                0         
_________________________________________________________________
dense_28 (Dense)             (None, 14)                294       
_________________________________________________________________
dense_29 (Dense)             (None, 8)                 120       
_________________________________________________________________
dense_30 (Dense)             (None, 14)                126       
_________________________________________________________________
dense_31 (Dense)             (None, 20)                300       
Total params: 840
Trainable params: 840
Non-trainable params: 0
_________________________________________________________________


In [91]:
encoder.summary()

Model: "model_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         (None, 20)                0         
_________________________________________________________________
dense_28 (Dense)             (None, 14)                294       
_________________________________________________________________
dense_29 (Dense)             (None, 8)                 120       
Total params: 414
Trainable params: 414
Non-trainable params: 0
_________________________________________________________________


In [92]:
autoencoder.compile(optimizer='adam', loss='mean_squared_error')

In [93]:
x_train = relevant_features_track_pd.loc[:, relevant_features_track_pd.columns != 'track_id']
x_train.head()

Unnamed: 0,acousticness,beat_strength,bounciness,danceability,energy,flatness,instrumentalness,liveness,mechanism,organism,speechiness,valence,acoustic_vector_0,acoustic_vector_1,acoustic_vector_2,acoustic_vector_3,acoustic_vector_4,acoustic_vector_5,acoustic_vector_6,acoustic_vector_7
0,0.002047,0.425469,0.486543,0.65454,0.625231,0.957158,0.003155269,0.261363,0.473837,0.372056,0.052713,0.691534,0.188888,0.306256,-0.325102,-0.53262,0.660405,0.149624,-0.354731,0.192537
1,0.075846,0.499848,0.494943,0.461215,0.563252,0.916317,0.3736224,0.693975,0.187614,0.576942,0.065943,0.46844,0.180359,-0.149974,-0.185145,-0.152791,0.560996,0.047161,-0.295635,-0.074902
2,0.002274,0.243778,0.229295,0.290262,0.958647,0.846197,0.0565884,0.259133,0.332016,0.472339,0.115356,0.2931,0.101301,0.186855,-0.525412,-0.293849,0.596498,0.010453,-0.280584,0.418848
3,0.005024,0.33858,0.331703,0.348846,0.886674,0.904784,1.998796e-10,0.381728,0.185455,0.575982,0.13759,0.387131,0.132311,0.248023,-0.510729,-0.387413,0.602931,0.033381,-0.312206,0.365894
4,0.198284,0.336354,0.353215,0.395759,0.338417,1.021726,2.620291e-06,0.138143,0.245665,0.551515,0.030352,0.377411,0.083737,-0.306129,0.045954,0.0784,0.66664,-0.032503,-0.299952,-0.584207


In [94]:
autoencoder.fit(x_train, x_train, epochs=50, batch_size=128, shuffle=False)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.callbacks.History at 0x11d545d30>

The prediction from the encoder is the latent representation of each song.

In [99]:
latent_track_features = encoder.predict(x_train)

In [100]:
latent_track_features.shape

(275328, 8)

In [112]:
latent_track_features_dict = dict()
for ind in tqdm(track_pd.index):
    track_id = track_pd['track_id'][ind]
    latent_features = latent_track_features[ind]
    latent_track_features_dict[track_id] = latent_features

100%|██████████| 275328/275328 [00:02<00:00, 101650.91it/s]


In [124]:
latent_track_features_df = pd.DataFrame.from_dict(latent_track_features_dict, orient='index')
latent_track_features_df.reset_index(inplace = True)
latent_track_features_df.rename(columns={'index':'track_id'}, inplace=True)
latent_track_features_df.head()

Unnamed: 0,track_id,0,1,2,3,4,5,6,7
0,t_f9b9a072-8dae-4816-bfd6-d91fc75a5744,0.085536,0.023943,0.125375,-0.145933,0.44901,-0.106019,-0.190885,0.282416
1,t_7f435320-5d3b-497d-9346-0c97429af2b9,0.152102,-0.118725,-0.168167,0.291125,0.540807,0.042686,-0.288468,0.13009
2,t_949441c4-4920-4eed-b929-2d67d37b7bd9,0.056647,-0.077121,0.052887,-0.076303,0.644329,-0.148826,-0.292301,0.208705
3,t_63eae5ac-744a-46ee-b719-0db8e70d6f7f,0.120041,-0.071038,-0.012514,-0.061835,0.621581,-0.082328,-0.210929,0.185848
4,t_3c2c29b5-c738-42c0-83e1-ee0e88ac834d,0.075071,0.094992,0.372702,0.001345,0.534761,-0.017185,-0.420036,0.035725


In [125]:
latent_track_features_df.shape

(275328, 9)

In [126]:
latent_track_features_df.to_csv('latent_track_features.csv', index=False)

In [129]:
pickle.dump(encoder, open('pickle/track_encoder.sav', 'wb'))

In [130]:
pickle.dump(autoencoder, open('pickle/track_autoencoder.sav', 'wb'))