In [3]:
import pandas as pd
df = pd.read_csv('spotify_cleaned.csv')

In [8]:
df = df.drop(['track_name'], axis=1)

In [9]:
df_popfilm = df[df['track_genre'] == 'pop-film']
df_sad = df[df['track_genre'] == 'sad']
df_electronic = df[df['track_genre'] == 'electronic']
df_metal = df[df['track_genre'] == 'metal']
df_acoustic = df[df['track_genre'] == 'acoustic']

## Random Forest

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [12]:
def random_forest(df_genre):
    # separate features / target
    X = df_genre.drop(columns=['popularity'])
    y = df_genre['popularity']
    
    X = pd.get_dummies(X, drop_first=True)
    
    # split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestRegressor(random_state=100, n_estimators=100)
    
    # train the model
    model.fit(X_train, y_train)
    
    # predictions
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    feature_importances = pd.DataFrame({
        'Feature': X.columns,
        'Importance': model.feature_importances_
    }).sort_values(by='Importance', ascending=False)
    
    return {"MSE": mse, "R2": r2, "Feature Rankings": feature_importances}

In [13]:
metrics_popfilm = random_forest(df_popfilm)

In [14]:
metrics_sad = random_forest(df_sad)

In [15]:
metrics_electronic = random_forest(df_electronic)

In [16]:
metrics_metal = random_forest(df_metal)

In [17]:
metrics_acoustic = random_forest(df_acoustic)

In [19]:
with open('random_forest_report.txt', 'w') as file:
    print(f'POPFILM METRICS ==> {metrics_popfilm}', file=file)
    print('', file=file)
    print(f'SAD METRICS ==> {metrics_sad}', file=file)
    print('', file=file)
    print(f'ELECTRONIC METRICS ==> {metrics_electronic}', file=file)
    print('', file=file)
    print(f'METAL METRICS ==> {metrics_metal}', file=file)
    print('', file=file)
    print(f'ACOUSTIC METRICS ==> {metrics_acoustic}', file=file)
