In [291]:
import pandas as pd
df = pd.read_csv('spotify_cleaned.csv')

In [292]:
df = df.drop(['track_name'], axis=1)

In [293]:
df_popfilm = df[df['track_genre'] == 'pop-film']
df_sad = df[df['track_genre'] == 'sad']
df_electronic = df[df['track_genre'] == 'electronic']
df_metal = df[df['track_genre'] == 'metal']
df_acoustic = df[df['track_genre'] == 'acoustic']

## Random Forest

In [294]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Ridge, Lasso, LinearRegression


In [295]:
def random_forest(df_genre):
    # separate features / target
    X = df_genre.drop(columns=['popularity'])
    y = df_genre['popularity']
    
    X = pd.get_dummies(X, drop_first=True)
    
    # split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestRegressor(random_state=42, n_estimators=100)
    
    # train the model
    model.fit(X_train, y_train)
    
    # predictions
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    feature_importances = pd.DataFrame({
        'Feature': X.columns,
        'Importance': model.feature_importances_
    }).sort_values(by='Importance', ascending=False)
    
    return {"MSE": mse, "R2": r2, "Feature Rankings": feature_importances}

In [296]:
metrics_popfilm = random_forest(df_popfilm)

In [297]:
metrics_sad = random_forest(df_sad)

In [298]:
metrics_electronic = random_forest(df_electronic)

In [299]:
metrics_metal = random_forest(df_metal)

In [300]:
metrics_acoustic = random_forest(df_acoustic)

In [301]:
with open('random_forest_report.txt', 'w') as file:
    print(f'POPFILM METRICS ==> \n{metrics_popfilm}', file=file)
    print('', file=file)
    print(f'SAD METRICS ==> \n{metrics_sad}', file=file)
    print('', file=file)
    print(f'ELECTRONIC METRICS ==> \n{metrics_electronic}', file=file)
    print('', file=file)
    print(f'METAL METRICS ==> \n{metrics_metal}', file=file)
    print('', file=file)
    print(f'ACOUSTIC METRICS ==> \n{metrics_acoustic}', file=file)


## Ridge Regression

In [302]:
def ridge_regression(df_genre):
    # separate features / target
    X = df_genre.drop(columns=['popularity'])
    y = df_genre['popularity']
    
    X = pd.get_dummies(X, drop_first=True)
    
    # split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = Ridge(alpha=10)
    
    # train the model
    model.fit(X_train, y_train)
    
    # predictions
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    feature_importances = pd.DataFrame({
        'Feature': X.columns,
        'Importance': model.coef_
    }).sort_values(by='Importance', key=lambda x: x.abs(), ascending=False)
    
    return {"MSE": mse, "R2": r2, "Feature Rankings": feature_importances}

In [303]:
ridge_metrics_popfilm = ridge_regression(df_popfilm)

In [304]:
ridge_metrics_sad = ridge_regression(df_sad)

In [305]:
ridge_metrics_electronic = ridge_regression(df_electronic)

In [306]:
ridge_metrics_metal = ridge_regression(df_metal)

In [307]:
ridge_metrics_acoustic = ridge_regression(df_acoustic)

In [308]:
with open('ridge_report.txt', 'w') as file:
    print(f'POPFILM METRICS ==> \n{ridge_metrics_popfilm}', file=file)
    print('', file=file)
    print(f'SAD METRICS ==> \n{ridge_metrics_sad}', file=file)
    print('', file=file)
    print(f'ELECTRONIC METRICS ==> \n{ridge_metrics_electronic}', file=file)
    print('', file=file)
    print(f'METAL METRICS ==> \n{ridge_metrics_metal}', file=file)
    print('', file=file)
    print(f'ACOUSTIC METRICS ==> \n{ridge_metrics_acoustic}', file=file)

## Lasso Regression

In [309]:
def lasso_regression(df_genre):
    # separate features / target
    X = df_genre.drop(columns=['popularity'])
    y = df_genre['popularity']
    
    X = pd.get_dummies(X, drop_first=True)
    
    # split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = Lasso(alpha=0.01)
    
    # train the model
    model.fit(X_train, y_train)
    
    # predictions
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    feature_importances = pd.DataFrame({
        'Feature': X.columns,
        'Importance': model.coef_
    }).sort_values(by='Importance', key=lambda x: x.abs(), ascending=False)
    
    return {"MSE": mse, "R2": r2, "Feature Rankings": feature_importances}

In [310]:
lasso_metrics_popfilm = lasso_regression(df_popfilm)

In [311]:
lasso_metrics_sad = lasso_regression(df_sad)

In [312]:
lasso_metrics_electronic = lasso_regression(df_electronic)

In [313]:
lasso_metrics_metal = lasso_regression(df_metal)

In [314]:
lasso_metrics_acoustic = lasso_regression(df_acoustic)

In [315]:
with open('lasso_report.txt', 'w') as file:
    print(f'POPFILM METRICS ==> \n{lasso_metrics_popfilm}', file=file)
    print('', file=file)
    print(f'SAD METRICS ==> \n{lasso_metrics_sad}', file=file)
    print('', file=file)
    print(f'ELECTRONIC METRICS ==> \n{lasso_metrics_electronic}', file=file)
    print('', file=file)
    print(f'METAL METRICS ==> \n{lasso_metrics_metal}', file=file)
    print('', file=file)
    print(f'ACOUSTIC METRICS ==> \n{lasso_metrics_acoustic}', file=file)

## Multiple Linear Regression

In [316]:
def mlr(df_genre):
    # separate features / target
    X = df_genre.drop(columns=['popularity'])
    y = df_genre['popularity']
    
    X = pd.get_dummies(X, drop_first=True)
    
    # split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = LinearRegression()
    
    # train the model
    model.fit(X_train, y_train)
    
    # predictions
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    feature_importances = pd.DataFrame({
        'Feature': X.columns,
        'Importance': model.coef_
    }).sort_values(by='Importance', key=lambda x: x.abs(), ascending=False)
    
    return {"MSE": mse, "R2": r2, "Feature Rankings": feature_importances}

In [317]:
mlr_metrics_popfilm = mlr(df_popfilm)

In [318]:
mlr_metrics_sad = mlr(df_sad)

In [319]:
mlr_metrics_electronic = mlr(df_electronic)

In [320]:
mlr_metrics_metal = mlr(df_metal)

In [321]:
mlr_metrics_acoustic = mlr(df_acoustic)

In [322]:
with open('mlr_report.txt', 'w') as file:
    print(f'POPFILM METRICS ==> \n{mlr_metrics_popfilm}', file=file)
    print('', file=file)
    print(f'SAD METRICS ==> \n{mlr_metrics_sad}', file=file)
    print('', file=file)
    print(f'ELECTRONIC METRICS ==> \n{mlr_metrics_electronic}', file=file)
    print('', file=file)
    print(f'METAL METRICS ==> \n{mlr_metrics_metal}', file=file)
    print('', file=file)
    print(f'ACOUSTIC METRICS ==> \n{mlr_metrics_acoustic}', file=file)