In [1]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler

#------------------------------------import feature selection----------------------------------
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2            #score_func, need to normalize to remove negative to use
from sklearn.feature_selection import f_classif
#-----------------------------------------Seaborn----------------------------------------------
import matplotlib.pyplot as plt
import ssl
import seaborn as sns
ssl._create_default_https_context = ssl._create_unverified_context
sns.set(style='darkgrid')

In [2]:
df = pd.read_csv('./Data/features_30_sec.csv')

In [3]:
df = df.drop(['length','filename'],axis=1)             #remove the length and filename column since irrelevant
df = df.sample(frac=1)  

In [4]:
#-----------------------------------------Encode The Genre Into Numbers-----------------------------------------
labelEncoder = LabelEncoder()                              #store encoded labels into variable
le = labelEncoder.fit(df['label'])                         #fit label into the variable
df['label'] = le.transform(df['label'])                    #transform label values into numbers
Y_genre = df['label']                                      #Assign label to Y_genre
X_features = df.drop('label',axis=1)                       #Assign all features to X_feature

In [5]:
#---------------------------------------Normalize the features between 0-1---------------------------------------
scalar = MinMaxScaler()                                    
scalar.fit(X_features)                                     #Fit features into scalar
X_cols = X_features.columns
X_features[X_cols] = scalar.transform(X_features)                 #Transform features into 0-1

In [6]:
#------------------------------------------Feature Reduction-----------------------------------------------------
best_feat = SelectKBest(score_func= f_classif, k=4)          #Auto select 4 features that best differentiate
fit = best_feat.fit(X_features,Y_genre)                      #the genres

In [7]:
feat_scores = pd.DataFrame(fit.scores_)                      #Extract optimum score of each feature
feat_columns = pd.DataFrame(X_features.columns)              #Extract the column names

In [8]:
sel_scores = pd.concat([feat_columns,feat_scores],axis=1)    #Concatenate the two
sel_scores.columns = ['Features','Scores']                   #Name the two new columns
sel_scores.sort_values(by=['Scores'],ascending=False)        #Sort the scores so that highest is displayed first
sel_largest = sel_scores.nlargest(4,'Scores')                #Display the 4 best ones
sel_largest

Unnamed: 0,Features,Scores
0,chroma_stft_mean,176.453282
17,mfcc1_mean,130.371835
6,spectral_bandwidth_mean,116.601879
8,rolloff_mean,110.871317


In [9]:
X_features = X_features[sel_largest['Features'].T]           #Reduce the features to the 4 with best scores

In [10]:
X_train,X_test,y_train,y_test = train_test_split(X_features,Y_genre,test_size=0.20,random_state=42)

In [11]:
X_train = torch.FloatTensor(X_train.values)
X_test = torch.FloatTensor(X_test.values)
y_train = torch.IntTensor(y_train.values)
y_test = torch.IntTensor(y_test.values)

In [12]:
#Create NN
class NN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(in_features=4, out_features=16)
        self.fc2 = nn.Linear(in_features=16, out_features=12)
        self.output = nn.Linear(in_features=12, out_features=3)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.output(x)
        return x

In [13]:
model = NN()
model

NN(
  (fc1): Linear(in_features=4, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=12, bias=True)
  (output): Linear(in_features=12, out_features=3, bias=True)
)

In [14]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [15]:
%%time

epochs = 100
loss_arr = []

for i in range(epochs):
    y_hat = model.forward(X_train)
    loss = criterion(y_hat,y_train)
    loss_arr.append(loss)
    
    if i % 10 == 0:
        print(f'Epoch: {i} Loss: {loss}')
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

RuntimeError: expected scalar type Long but found Int