In [None]:
#widen work flow
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [None]:
#module imports
import pandas as pd
import numpy as np
import librosa
import os
import eyed3
from eyed3 import id3
import random
import scipy
from scipy.fft import fft
from statistics import mean
from scipy.stats import entropy
import pickle
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.utils.extmath import randomized_svd
import seaborn as sns
import matplotlib.pyplot as plt
import EntropyHub as eh

In [None]:
#load working setList to continue feature engineering
with open("setList", "rb") as f:
    library = pickle.load(f)

In [None]:
#build matrix dataframe to generate short time fourier transform and spectrogram matrices
matrixPCA = pd.DataFrame(columns = ["Track","STFT","specMatrix"])
matrixPCA["Track"] = library.iloc[:,2]
matrixPCA["STFT"] = [librosa.stft(x) for x in library["Amplitudes"]]
matrixPCA["specMatrix"] = [librosa.amplitude_to_db(abs(x)) for x in matrixPCA["STFT"]]

In [None]:
#check shapes of spectrogram matrices
print(matrixPCA.iloc[0,2].shape, matrixPCA.iloc[1,2].shape, matrixPCA.iloc[2,2].shape)

In [None]:
#define PCA function that receives track matrix and produces PCs, explained variance array for each principal component
results = pd.DataFrame(columns = ["25 Principal Components","Explained Variance", "Explained Variance Ratios"])
def musicPCA(matrix):
    standard = StandardScaler().fit_transform(matrix)
    pca = PCA(n_components = 25, svd_solver ='randomized')
    pca.fit_transform(standard)
    results.loc[len(results.index)] = [pca.components_, pca.explained_variance_, pca.explained_variance_ratio_] 

In [None]:
#call musicPCA function for each matrix in matrixPCA df
[musicPCA(x) for x in matrixPCA["specMatrix"]];

In [None]:
#update music library with new features
library["25 Principal Components"] = results["25 Principal Components"]
library["Explained Variance"] = results["Explained Variance"]
library["Explained Variance Ratios"] = results["Explained Variance Ratios"]

In [None]:
#sum of eigenvalues, explained variance ratios
library["PCs EVR Sum"] = [np.sum(x) for x in library["Explained Variance Ratios"]]

In [None]:
#implement Gibb's entropy formula by summing p log p of each sample total EVR
library["Gibb's Entropy of 25 PCs"] = [np.sum((-1)*x*np.log(x)) for x in library["Explained Variance Ratios"]]

In [None]:
#run EntropyHub metrics
X = library["Amplitudes"]
#sample entropy
Samp, Phi1, Phi2 = eh.SampEn(X, m = 4, tau = 0)
library["EH Time Series Entropy"] = Samp
#kolmogorov entropy
K2, Ci = K2En(X, m = 4, tau = 0, r = 0.2*np.std(Sig), Logx = np.exp(1))
library["EH Kolmogorov Entropy"] = K2
#bidimensional fuzzy entropy
library["EH Fuzzy2D Entropy"] = [eh.FuzzEn2D(x, m = (7, 8), tau = 0, Fx = "default", r = (0.2,2), Logx = np.exp(1), Lock = False) for x in matrixPCA["specMa$
matrixPCA.drop(columns = ["specMatrix"], inplace = True)

In [None]:
#export final df dataframe to playlist pickle file
with open('playlist', 'wb') as f:
    pickle.dump(library, f)