In [84]:
import pandas as pd
df = pd.read_csv('spotify_cleaned.csv')

In [85]:
df = df.drop(['track_name'], axis=1)

In [86]:
df_popfilm = df[df['track_genre'] == 'pop-film']
df_sad = df[df['track_genre'] == 'sad']
df_electronic = df[df['track_genre'] == 'electronic']
df_metal = df[df['track_genre'] == 'metal']
df_acoustic = df[df['track_genre'] == 'acoustic']

## Random Forest

In [87]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Ridge, Lasso

In [88]:
def random_forest(df_genre):
    # separate features / target
    X = df_genre.drop(columns=['popularity'])
    y = df_genre['popularity']
    
    X = pd.get_dummies(X, drop_first=True)
    
    # split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestRegressor(random_state=100, n_estimators=100)
    
    # train the model
    model.fit(X_train, y_train)
    
    # predictions
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    feature_importances = pd.DataFrame({
        'Feature': X.columns,
        'Importance': model.feature_importances_
    }).sort_values(by='Importance', ascending=False)
    
    return {"MSE": mse, "R2": r2, "Feature Rankings": feature_importances}

In [89]:
metrics_popfilm = random_forest(df_popfilm)

In [90]:
metrics_sad = random_forest(df_sad)

In [91]:
metrics_electronic = random_forest(df_electronic)

In [92]:
metrics_metal = random_forest(df_metal)

In [93]:
metrics_acoustic = random_forest(df_acoustic)

In [94]:
with open('random_forest_report.txt', 'w') as file:
    print(f'POPFILM METRICS ==> \n{metrics_popfilm}', file=file)
    print('', file=file)
    print(f'SAD METRICS ==> \n{metrics_sad}', file=file)
    print('', file=file)
    print(f'ELECTRONIC METRICS ==> \n{metrics_electronic}', file=file)
    print('', file=file)
    print(f'METAL METRICS ==> \n{metrics_metal}', file=file)
    print('', file=file)
    print(f'ACOUSTIC METRICS ==> \n{metrics_acoustic}', file=file)


In [None]:
def ridge_regression(df_genre):
    # separate features / target
    X = df_genre.drop(columns=['popularity'])
    y = df_genre['popularity']
    
    X = pd.get_dummies(X, drop_first=True)
    
    # split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = Ridge(alpha=10)
    
    # train the model
    model.fit(X_train, y_train)
    
    # predictions
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    feature_importances = pd.DataFrame({
        'Feature': X.columns,
        'Importance': model.coef_
    }).sort_values(by='Importance', key=lambda x: x.abs(), ascending=False)
    
    return {"MSE": mse, "R2": r2, "Feature Rankings": feature_importances}

In [96]:
ridge_metrics_popfilm = ridge_regression(df_popfilm)

In [97]:
ridge_metrics_sad = ridge_regression(df_sad)

In [98]:
ridge_metrics_electronic = ridge_regression(df_electronic)

In [99]:
ridge_metrics_metal = ridge_regression(df_metal)

In [100]:
ridge_metrics_acoustic = ridge_regression(df_acoustic)

In [101]:
with open('ridge_report.txt', 'w') as file:
    print(f'POPFILM METRICS ==> \n{ridge_metrics_popfilm}', file=file)
    print('', file=file)
    print(f'SAD METRICS ==> \n{ridge_metrics_sad}', file=file)
    print('', file=file)
    print(f'ELECTRONIC METRICS ==> \n{ridge_metrics_electronic}', file=file)
    print('', file=file)
    print(f'METAL METRICS ==> \n{ridge_metrics_metal}', file=file)
    print('', file=file)
    print(f'ACOUSTIC METRICS ==> \n{ridge_metrics_acoustic}', file=file)

In [None]:
def lasso_regression(df_genre):
    # separate features / target
    X = df_genre.drop(columns=['popularity'])
    y = df_genre['popularity']
    
    X = pd.get_dummies(X, drop_first=True)
    
    # split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = Lasso(alpha=0.01)
    
    # train the model
    model.fit(X_train, y_train)
    
    # predictions
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    feature_importances = pd.DataFrame({
        'Feature': X.columns,
        'Importance': model.coef_
    }).sort_values(by='Importance', key=lambda x: x.abs(), ascending=False)
    
    return {"MSE": mse, "R2": r2, "Feature Rankings": feature_importances}

In [103]:
lasso_metrics_popfilm = lasso_regression(df_popfilm)

In [104]:
lasso_metrics_sad = lasso_regression(df_sad)

In [105]:
lasso_metrics_electronic = lasso_regression(df_electronic)

In [106]:
lasso_metrics_metal = lasso_regression(df_metal)

In [107]:
lasso_metrics_acoustic = lasso_regression(df_acoustic)

In [108]:
with open('lasso_report.txt', 'w') as file:
    print(f'POPFILM METRICS ==> \n{lasso_metrics_popfilm}', file=file)
    print('', file=file)
    print(f'SAD METRICS ==> \n{lasso_metrics_sad}', file=file)
    print('', file=file)
    print(f'ELECTRONIC METRICS ==> \n{lasso_metrics_electronic}', file=file)
    print('', file=file)
    print(f'METAL METRICS ==> \n{lasso_metrics_metal}', file=file)
    print('', file=file)
    print(f'ACOUSTIC METRICS ==> \n{lasso_metrics_acoustic}', file=file)