In [66]:
import pandas as pd
df = pd.read_csv('spotify_cleaned.csv')

In [67]:
df = df.drop(['track_name'], axis=1)

In [68]:
df_popfilm = df[df['track_genre'] == 'pop-film']
df_sad = df[df['track_genre'] == 'sad']
df_electronic = df[df['track_genre'] == 'electronic']
df_metal = df[df['track_genre'] == 'metal']
df_acoustic = df[df['track_genre'] == 'acoustic']

## Random Forest

In [69]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Ridge, Lasso, LinearRegression


In [70]:
def random_forest(df_genre):
    # separate features / target
    X = df_genre.drop(columns=['popularity'])
    y = df_genre['popularity']
    
    X = pd.get_dummies(X, drop_first=True)
    
    # split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestRegressor(random_state=100, n_estimators=100)
    
    # train the model
    model.fit(X_train, y_train)
    
    # predictions
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    feature_importances = pd.DataFrame({
        'Feature': X.columns,
        'Importance': model.feature_importances_
    }).sort_values(by='Importance', ascending=False)
    
    return {"MSE": mse, "R2": r2, "Feature Rankings": feature_importances}

In [71]:
metrics_popfilm = random_forest(df_popfilm)

In [72]:
metrics_sad = random_forest(df_sad)

In [73]:
metrics_electronic = random_forest(df_electronic)

In [74]:
metrics_metal = random_forest(df_metal)

In [75]:
metrics_acoustic = random_forest(df_acoustic)

In [76]:
with open('random_forest_report.txt', 'w') as file:
    print(f'POPFILM METRICS ==> \n{metrics_popfilm}', file=file)
    print('', file=file)
    print(f'SAD METRICS ==> \n{metrics_sad}', file=file)
    print('', file=file)
    print(f'ELECTRONIC METRICS ==> \n{metrics_electronic}', file=file)
    print('', file=file)
    print(f'METAL METRICS ==> \n{metrics_metal}', file=file)
    print('', file=file)
    print(f'ACOUSTIC METRICS ==> \n{metrics_acoustic}', file=file)


## Ridge Regression

In [77]:
def ridge_regression(df_genre):
    # separate features / target
    X = df_genre.drop(columns=['popularity'])
    y = df_genre['popularity']
    
    X = pd.get_dummies(X, drop_first=True)
    
    # split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = Ridge(alpha=10)
    
    # train the model
    model.fit(X_train, y_train)
    
    # predictions
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    feature_importances = pd.DataFrame({
        'Feature': X.columns,
        'Importance': model.coef_
    }).sort_values(by='Importance', key=lambda x: x.abs(), ascending=False)
    
    return {"MSE": mse, "R2": r2, "Feature Rankings": feature_importances}

In [78]:
ridge_metrics_popfilm = ridge_regression(df_popfilm)

In [79]:
ridge_metrics_sad = ridge_regression(df_sad)

In [80]:
ridge_metrics_electronic = ridge_regression(df_electronic)

In [81]:
ridge_metrics_metal = ridge_regression(df_metal)

In [82]:
ridge_metrics_acoustic = ridge_regression(df_acoustic)

In [83]:
with open('ridge_report.txt', 'w') as file:
    print(f'POPFILM METRICS ==> \n{ridge_metrics_popfilm}', file=file)
    print('', file=file)
    print(f'SAD METRICS ==> \n{ridge_metrics_sad}', file=file)
    print('', file=file)
    print(f'ELECTRONIC METRICS ==> \n{ridge_metrics_electronic}', file=file)
    print('', file=file)
    print(f'METAL METRICS ==> \n{ridge_metrics_metal}', file=file)
    print('', file=file)
    print(f'ACOUSTIC METRICS ==> \n{ridge_metrics_acoustic}', file=file)

## Lasso Regression

In [84]:
def lasso_regression(df_genre):
    # separate features / target
    X = df_genre.drop(columns=['popularity'])
    y = df_genre['popularity']
    
    X = pd.get_dummies(X, drop_first=True)
    
    # split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = Lasso(alpha=0.01)
    
    # train the model
    model.fit(X_train, y_train)
    
    # predictions
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    feature_importances = pd.DataFrame({
        'Feature': X.columns,
        'Importance': model.coef_
    }).sort_values(by='Importance', key=lambda x: x.abs(), ascending=False)
    
    return {"MSE": mse, "R2": r2, "Feature Rankings": feature_importances}

In [85]:
lasso_metrics_popfilm = lasso_regression(df_popfilm)

In [86]:
lasso_metrics_sad = lasso_regression(df_sad)

In [87]:
lasso_metrics_electronic = lasso_regression(df_electronic)

In [88]:
lasso_metrics_metal = lasso_regression(df_metal)

In [89]:
lasso_metrics_acoustic = lasso_regression(df_acoustic)

In [90]:
with open('lasso_report.txt', 'w') as file:
    print(f'POPFILM METRICS ==> \n{lasso_metrics_popfilm}', file=file)
    print('', file=file)
    print(f'SAD METRICS ==> \n{lasso_metrics_sad}', file=file)
    print('', file=file)
    print(f'ELECTRONIC METRICS ==> \n{lasso_metrics_electronic}', file=file)
    print('', file=file)
    print(f'METAL METRICS ==> \n{lasso_metrics_metal}', file=file)
    print('', file=file)
    print(f'ACOUSTIC METRICS ==> \n{lasso_metrics_acoustic}', file=file)

## Multiple Linear Regression

In [91]:
def mlr(df_genre):
    # separate features / target
    X = df_genre.drop(columns=['popularity'])
    y = df_genre['popularity']
    
    X = pd.get_dummies(X, drop_first=True)
    
    # split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = LinearRegression()
    
    # train the model
    model.fit(X_train, y_train)
    
    # predictions
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    feature_importances = pd.DataFrame({
        'Feature': X.columns,
        'Importance': model.coef_
    }).sort_values(by='Importance', key=lambda x: x.abs(), ascending=False)
    
    return {"MSE": mse, "R2": r2, "Feature Rankings": feature_importances}

In [92]:
mlr_metrics_popfilm = mlr(df_popfilm)

In [93]:
mlr_metrics_sad = mlr(df_sad)

In [94]:
mlr_metrics_electronic = mlr(df_electronic)

In [95]:
mlr_metrics_metal = mlr(df_metal)

In [96]:
mlr_metrics_acoustic = mlr(df_acoustic)

In [97]:
with open('mlr_report.txt', 'w') as file:
    print(f'POPFILM METRICS ==> \n{mlr_metrics_popfilm}', file=file)
    print('', file=file)
    print(f'SAD METRICS ==> \n{mlr_metrics_sad}', file=file)
    print('', file=file)
    print(f'ELECTRONIC METRICS ==> \n{mlr_metrics_electronic}', file=file)
    print('', file=file)
    print(f'METAL METRICS ==> \n{mlr_metrics_metal}', file=file)
    print('', file=file)
    print(f'ACOUSTIC METRICS ==> \n{mlr_metrics_acoustic}', file=file)