# Decision Tree

The main purpose of this file serves to run hyperparamter tuning to find the best settings for the chosen model, which will then be transferred onto the main `models.ipynb` file.

In [16]:
# IMPORTS
from utils import *
from sklearn.tree import DecisionTreeClassifier, plot_tree

### Data

In [17]:
# READ AND SPLIT DATA
df = pd.read_pickle("../../datasets/pickle/processed_action_movie_data.pkl")

X, y = df.drop("rating", axis=1), df['rating']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test = catboost_encoding(X_train, X_test, y_train, y_test)

### Tuning

In [18]:
param_grid = {
    'criterion': ['entropy'],
    'max_depth': range(1, 100)
}

In [19]:
# Create a gradient boosting classifier
dt = DecisionTreeClassifier()

# Create a grid search object with the specified hyperparameters and 5-fold cross-validation
grid_search = GridSearchCV(dt, param_grid, cv=5, n_jobs=-1, scoring='f1_macro')

# Fit the grid search object to the training data
grid_search.fit(X_train, y_train)

In [20]:
# Print the best parameters and score
print("Best parameters: ", grid_search.best_params_)
print("Best score: ", grid_search.best_score_)

Best parameters:  {'criterion': 'entropy', 'max_depth': 7}
Best score:  0.5874714187270781
