# Task for Today  

***

## Song Popularity Prediction  

Given *data about the top 50 Spotify songs from 2019*, let's try to predict whether a given song will be **more or less popular**.  
  
We will be using five different models to make our predictions.

In [None]:
import numpy as np
import pandas as pd
import plotly.express as px

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC

In [None]:
data = pd.read_csv('../input/top50spotify2019/top50.csv', encoding='latin-1')

In [None]:
data

In [None]:
data.info()

# Preprocessing

In [None]:
data = data.drop(['Unnamed: 0', 'Track.Name'], axis=1)

In [None]:
data

In [None]:
data['Popularity'] = pd.qcut(data['Popularity'], q=2, labels=[0, 1])

## One-Hot Encoding

In [None]:
def onehot_encode(df, column, prefix):
    df = df.copy()
    dummies = pd.get_dummies(df[column], prefix=prefix)
    df = pd.concat([df, dummies], axis=1)
    df = df.drop(column, axis=1)
    return df

In [None]:
data = onehot_encode(data, 'Genre', 'genre')
data = onehot_encode(data, 'Artist.Name', 'artist')

In [None]:
data

## Splitting and Scaling

In [None]:
y = data.loc[:, 'Popularity']
X = data.drop('Popularity', axis=1)

In [None]:
scaler = StandardScaler()

X = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=20)

# Modeling and Training

In [None]:
log_model = LogisticRegression()
knn_model = KNeighborsClassifier()
dec_model = DecisionTreeClassifier()
mlp_model = MLPClassifier()
svm_model = SVC()

In [None]:
log_model.fit(X_train, y_train)
knn_model.fit(X_train, y_train)
dec_model.fit(X_train, y_train)
mlp_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)

In [None]:
log_acc = log_model.score(X_test, y_test)
knn_acc = knn_model.score(X_test, y_test)
dec_acc = dec_model.score(X_test, y_test)
mlp_acc = mlp_model.score(X_test, y_test)
svm_acc = svm_model.score(X_test, y_test)

In [None]:
print("Logistic Regression Accuracy:", log_acc)
print("K-Nearest-Neighbors Accuracy:", knn_acc)
print("Decision Tree Accuracy:", dec_acc)
print("Neural Network Accuracy:", mlp_acc)
print("Support Vector Machine Accuracy:", svm_acc)

In [None]:
fig = px.bar(
    x=["Logistic Regression", "K-Nearest-Neighbors", "Decision Tree", "Neural Network", "Support Vector Machine"],
    y=[log_acc, knn_acc, dec_acc, mlp_acc, svm_acc],
    color=["Logistic Regression", "K-Nearest-Neighbors", "Decision Tree", "Neural Network", "Support Vector Machine"],
    labels={'x': "Model", 'y': "Accuracy"},
    title="Model Accuracy Comparison"
)

fig.show()

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/k7Joc0aKRVg