In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.sparse as sps
from sklearn.preprocessing import MinMaxScaler
seed=123

In [3]:
track_features = pd.read_csv('../../../data/v2/tracks_v3.0.csv', sep='\t')
track_features.shape

(2262292, 18)

In [4]:
feat = ['acousticness', 'danceability', 'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'tempo',
       'speechiness', 'time_signature', 'valence', 'duration_ms']

In [5]:
track_features[feat].max()
track_features[feat].min()

acousticness        9.960000e-01
danceability        9.960000e-01
energy              1.000000e+00
instrumentalness    1.000000e+00
key                 1.100000e+01
liveness            1.000000e+00
loudness            4.923000e+00
mode                1.000000e+00
tempo               2.499870e+02
speechiness         9.710000e-01
time_signature      5.000000e+00
valence             1.000000e+00
duration_ms         2.074458e+07
dtype: float64

acousticness         0.0
danceability         0.0
energy               0.0
instrumentalness     0.0
key                  0.0
liveness             0.0
loudness           -60.0
mode                 0.0
tempo                0.0
speechiness          0.0
time_signature       0.0
valence              0.0
duration_ms         -1.0
dtype: float64

In [6]:
track_features[feat] = track_features[feat].astype(np.float32)
scaler = MinMaxScaler()
track_features['tempo'] = scaler.fit_transform(track_features.tempo.values.reshape(-1,1))
track_features['loudness'] = scaler.fit_transform(track_features.loudness.values.reshape(-1,1))
track_features['duration_ms'] = scaler.fit_transform(track_features.loudness.values.reshape(-1,1))

In [7]:
track_features[feat].max()
track_features[feat].min()

acousticness         0.996
danceability         0.996
energy               1.000
instrumentalness     1.000
key                 11.000
liveness             1.000
loudness             1.000
mode                 1.000
tempo                1.000
speechiness          0.971
time_signature       5.000
valence              1.000
duration_ms          1.000
dtype: float32

acousticness        0.0
danceability        0.0
energy              0.0
instrumentalness    0.0
key                 0.0
liveness            0.0
loudness            0.0
mode                0.0
tempo               0.0
speechiness         0.0
time_signature      0.0
valence             0.0
duration_ms         0.0
dtype: float32

In [8]:
track_features = track_features.drop(['arid', 'alid', 'track_name', 'track_uri'], axis=1)

In [9]:
track_features.tail()

Unnamed: 0,tid,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence,duration_ms
2262287,401903,0.142985,0.579333,0.660833,0.102223,6.0,0.149533,0.815574,1.0,0.085117,0.473099,4.0,0.29965,0.815574
2262288,401904,0.020492,0.447,0.758,0.230404,5.0,0.11668,0.827251,1.0,0.045,0.517228,4.0,0.279,0.827251
2262289,1537013,0.071669,0.749667,0.654,0.751333,5.0,0.222727,0.758841,0.0,0.0566,0.481439,4.0,0.349067,0.758841
2262290,1537015,0.071669,0.749667,0.654,0.751333,5.0,0.222727,0.758841,0.0,0.0566,0.481439,4.0,0.349067,0.758841
2262291,753659,0.136902,0.699538,0.655308,0.0635,6.0,0.151669,0.83102,1.0,0.076692,0.488635,4.0,0.488231,0.83102


In [10]:
track_features.sort_values(by='tid', inplace=True)

In [11]:
track_features.tail()

Unnamed: 0,tid,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence,duration_ms
2262128,2262287,0.0241,0.587,0.83,0.0,2.0,0.0781,0.867073,1.0,0.0556,0.332045,4.0,0.722,0.867073
2262129,2262288,0.0112,0.541,0.85,0.000315,2.0,0.106,0.830569,1.0,0.0973,0.356363,4.0,0.175,0.830569
2262130,2262289,0.00634,0.724,0.568,1.1e-05,7.0,0.159,0.801319,1.0,0.0533,0.31206,4.0,0.383,0.801319
2262131,2262290,0.00612,0.53,0.806,0.0,1.0,0.163,0.821974,0.0,0.0319,0.615456,4.0,0.777,0.821974
2262132,2262291,0.0679,0.31,0.548,0.000112,9.0,0.11,0.790506,0.0,0.0361,0.475233,4.0,0.192,0.790506


In [12]:
track_features = track_features.drop('tid',axis=1)

In [13]:
track_features.columns

Index(['acousticness', 'danceability', 'energy', 'instrumentalness', 'key',
       'liveness', 'loudness', 'mode', 'speechiness', 'tempo',
       'time_signature', 'valence', 'duration_ms'],
      dtype='object')

In [14]:
tf = track_features.values

In [15]:
audio_matrix = sps.csr_matrix(tf)

In [19]:
sps.save_npz(file='audio_features_csr', matrix=audio_matrix)