In [30]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score, auc, roc_auc_score

In [3]:
df = pd.read_csv("US_Accidents_Dec20_updated_cleaned_imputed_data_prepared.csv")

In [4]:
X, y = df.drop(["Severity"], axis=1), df["Severity"]

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [9]:
cols_to_scale = ["Temperature(F)", "Wind_Chill(F)", "Humidity(%)", "Pressure(in)", "Visibility(mi)", "Wind_Speed(mph)", "Precipitation(in)"]

In [23]:
best_features = ["Month_Of_Year_sin", "Day_Of_Week_cos", "Precipitation(in)", "Month_Of_Year_cos", "Day_Of_Week_sin", "Hour_Of_Day_cos", "Temperature(F)", "Wind_Chill(F)", "Wind_Speed(mph)", "Pressure(in)", "Visibility(mi)", "Clear", "Side", "CA"]
X_train_best_features = X_train[best_features]
X_test_best_features = X_test[best_features]

In [25]:
unique_classes = np.unique(y_train)
dtr_class_weight = {(key+1): value for key, value in enumerate(compute_class_weight("balanced", unique_classes, y_train))}

In [26]:
best_params = {'criterion': 'gini', 'max_depth': 6}
dtr_model_best = DecisionTreeClassifier(**best_params, class_weight=dtr_class_weight, random_state=0)

In [27]:
dtr_model_best.fit(X_train_best_features, y_train)

DecisionTreeClassifier(class_weight={1: 13.428971610788786,
                                     2: 0.31260271020926594,
                                     3: 2.3509602710623847,
                                     4: 3.319750700709468},
                       max_depth=6, random_state=0)

In [31]:
y_pred = dtr_model_best.predict(X_test_best_features)
acc = accuracy_score(y_test, y_pred)
f1_macro = f1_score(y_test, y_pred, average="macro")
f1_micro = f1_score(y_test, y_pred, average="micro")
roc_auc = roc_auc_score(y_test, dtr_model_best.predict_proba(X_test_best_features), multi_class="ovr")
print(f"Accuracy: {acc}, F1 (macro): {f1_macro}, F1 (micro): {f1_micro}, AUC: {roc_auc}")

Accuracy: 0.5381959216788198, F1 (macro): 0.33775969806778555, F1 (micro): 0.5381959216788198, AUC: 0.777894626775324
