# Applying RF

In [1]:
from sklearn import tree
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [2]:
spotify_df = pd.read_csv("dataset-of-10s.csv")
features = ["danceability", "energy", "instrumentalness", "duration_ms", "sections", "loudness"]
X = spotify_df.loc[:, features].values
Y = spotify_df.loc[:,["target"]].values

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.10, random_state=42)

clf = tree.DecisionTreeClassifier()

param_grid = {
    'max_depth': [10, 50, 100, 500, 1000],
    'min_samples_leaf': [3, 4, 5, 10],
    'criterion': ["gini", "entropy"],
    'splitter': ["best", "random"],
    'min_samples_split': [2, 5, 10, 15],
    'max_features': [None, "auto", "sqrt", "log2"]
}

grid_clf = GridSearchCV(clf, param_grid, cv=10)
grid_clf.fit(X_train, y_train)

GridSearchCV(cv=10, error_score='raise-deprecating',
       estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'max_depth': [10, 50, 100, 500, 1000], 'min_samples_leaf': [3, 4, 5, 10], 'criterion': ['gini', 'entropy'], 'splitter': ['best', 'random'], 'min_samples_split': [2, 5, 10, 15], 'max_features': [None, 'auto', 'sqrt', 'log2']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [8]:
grid_clf.best_params_

{'criterion': 'entropy',
 'max_depth': 10,
 'max_features': 'log2',
 'min_samples_leaf': 10,
 'min_samples_split': 5,
 'splitter': 'best'}

In [9]:
best_grid = grid_clf.best_estimator_
predictions = best_grid.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions)
f1 = f1_score(y_test, predictions)

In [10]:
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

Accuracy: 0.8203125
Precision: 0.771117166212534
Recall: 0.9012738853503185
F1 Score: 0.8311306901615272
