In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.model_selection as ms
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score, cross_validate
from imblearn.over_sampling import SMOTE 
from sklearn.preprocessing import StandardScaler 
from sklearn.pipeline import make_pipeline

url = 'https://raw.githubusercontent.com/tannerskluz/SpotifyClassifier/main/data.csv'
df = pd.read_csv(url)
labels = df['explicit']
df.drop(columns=['id', 'name', 'artists', 'release_date', 'explicit','duration_ms',
    'key', 'liveness', 'mode', 'tempo', 'valence', 'loudness', 'instrumentalness'],inplace=True)
df.head()

Unnamed: 0,acousticness,danceability,energy,popularity,speechiness,year
0,0.991,0.598,0.224,12,0.0936,1920
1,0.643,0.852,0.517,7,0.0534,1920
2,0.993,0.647,0.186,4,0.174,1920
3,0.000173,0.73,0.798,17,0.0425,1920
4,0.295,0.704,0.707,2,0.0768,1920


In [14]:
clf = Perceptron(tol=1e-6, random_state=10)
scaler = StandardScaler()
clf_mlp = make_pipeline(scaler, MLPClassifier(hidden_layer_sizes=(10, 5), random_state=1, max_iter = 1000))

# generate synthetic samples from minority class using kNN
sm = SMOTE(random_state=0, sampling_strategy='minority')
X_res, y_res = sm.fit_resample(df, labels)

X_train, X_test, y_train, y_test = ms.train_test_split(X_res, y_res, random_state=0)


scaler.fit(X_train)
X_train = scaler.transform(X_train)  
X_test = scaler.transform(X_test)  

clf.fit(X_train, y_train)
display("Perceptron Train-test split score: " + str(clf.score(X_test, y_test)))

clf_mlp.fit(X_train, y_train)
display("MLP Train-test split score: " + str(clf_mlp.score(X_test, y_test)))

strat_k_fold = StratifiedKFold(n_splits=5)

score = cross_val_score(clf, X_res, y_res, cv=strat_k_fold, scoring='accuracy').mean()
display("Perceptron accuracy with 5-fold cross validation score: " + str(score))
score = cross_val_score(clf_mlp, X_res, y_res, cv=strat_k_fold, scoring='accuracy').mean()
display("MLP accuracy with 5-fold cross validation score: " + str(score))

score = cross_val_score(clf, X_res, y_res, cv=strat_k_fold, scoring='precision').mean()
display("Perceptron precision with 5-fold cross validation score: " + str(score))
score = cross_val_score(clf_mlp, X_res, y_res, cv=strat_k_fold, scoring='precision').mean()
display("MLP precision with 5-fold cross validation score: " + str(score))

score = cross_val_score(clf, X_res, y_res, cv=strat_k_fold, scoring='recall').mean()
display("Perceptron recall with 5-fold cross validation score: " + str(score))
score = cross_val_score(clf_mlp, X_res, y_res, cv=strat_k_fold, scoring='recall').mean()
display("MLP recall with 5-fold cross validation score: " + str(score))

score = cross_val_score(clf, X_res, y_res, cv=strat_k_fold, scoring='f1').mean()
display("Perceptron f1 with 5-fold cross validation score: " + str(score))
score = cross_val_score(clf_mlp, X_res, y_res, cv=strat_k_fold, scoring='f1').mean()
display("MLP f1 with 5-fold cross validation score: " + str(score))


'Perceptron Train-test split score: 0.8436507741157359'

'MLP Train-test split score: 0.8965958598961282'

'Perceptron accuracy with 5-fold cross validation score: 0.6416153309272796'

'MLP accuracy with 5-fold cross validation score: 0.8893955000061835'

'Perceptron precision with 5-fold cross validation score: 0.6935646868338796'

'MLP precision with 5-fold cross validation score: 0.8720081136144853'

'Perceptron recall with 5-fold cross validation score: 0.7790960730647896'

'MLP recall with 5-fold cross validation score: 0.9133328996124233'

'Perceptron f1 with 5-fold cross validation score: 0.6025963629863867'

'MLP f1 with 5-fold cross validation score: 0.8919834447260827'