In [None]:
import pandas as pd

from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
from scipy.stats import randint

from sklearn.tree import export_graphviz
import graphviz

dataset = pd.read_csv('../data/processed/aggregated_dataset.csv')

dataset = dataset.iloc[:, :15]

dataset

In [None]:
X = dataset.drop('NLOS', axis=1)
y = dataset['NLOS']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train)

In [None]:
y_pred = rf.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy: ', accuracy)

In [None]:
for i in range(3):
    tree = rf.estimators_[i]
    dot_data = export_graphviz(tree,
                               feature_names=X_train.columns,
                               filled=True,
                               max_depth=2,
                               impurity=False,
                               proportion=True)
    graph = graphviz.Source(dot_data)
    display(graph)

In [None]:
rand_params = {'n_estimators': 100, 'max_depth': 20}

rf = RandomForestClassifier(random_state=42)

rand_search = RandomizedSearchCV(rf, 
                                 param_distributions = rand_params,
                                 n_iter = 1,
                                 cv = 5,
                                 random_state=42,
                                 n_jobs=-1,
                                 scoring='neg_mean_squared_error',
                                 verbose=2)

rand_search.fit(X_train, y_train)

In [None]:
best_rf = rand_search.best_estimator_

print('Best hyperparameters: ', rand_search.best_params_)

In [None]:
y_pred = best_rf.predict(X_test)

cm = confusion_matrix(y_test, y_pred)

ConfusionMatrixDisplay(confusion_matrix=cm).plot()

In [None]:
y_pred = best_rf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print('Accuracy: ', accuracy)
print('Accuracy: ', precision)
print('Accuracy: ', recall)

In [None]:
feature_importance = pd.Series(best_rf.feature_importances_, index=X_train.columns).sort_values(ascending=False)

feature_importance.plot.bar()