## Packages

In [12]:
import os
import pandas as pd
import numpy as np
from warnings import filterwarnings
filterwarnings("ignore")
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import pickle

## Global Parameters

In [2]:
ROOT_DIRECTORY = "/home/kaan.aytekin/Thesis"
# Non-feature columns
non_feature_columns = [
    "simulation_run",
    "is_accident_simulation",
    "accident_location",
    "accident_start_time",
    "accident_duration",
    "accident_lane",
    "prev_detector_detector_number",
    "next_detector_detector_number",
    "detector_number",
    "timestamp",
]

## Data Loading

In [3]:
processed_feature_columns_path = os.path.join(
    ROOT_DIRECTORY, "data/thesis_data/processed_feature_columns.txt"
)
with open(processed_feature_columns_path, "r") as reader:
    FEATURE_COLUMNS = reader.read().split("\n")


df_train = pd.read_csv(
    os.path.join(ROOT_DIRECTORY, "data/thesis_data/x_train_processed.csv")
)[FEATURE_COLUMNS + ["target"]]
df_test = pd.read_csv(
    os.path.join(ROOT_DIRECTORY, "data/thesis_data/x_test_processed.csv")
)[FEATURE_COLUMNS + ["target"]]

In [10]:
x_train = df_train[FEATURE_COLUMNS]
y_train = df_train["target"]

x_test = df_test[FEATURE_COLUMNS]
y_test = df_test["target"]



In [5]:
min_max_scaler = MinMaxScaler()
min_max_scaler = min_max_scaler.fit(x_train)



In [6]:
x_train_scaled = min_max_scaler.transform(x_train)

In [7]:
rf_regressor = RandomForestRegressor(
    n_jobs = 20,
    n_estimators=100,
    min_samples_leaf=10,
    max_features=1/3,
)
rf_regressor = rf_regressor.fit(X=x_train_scaled,y=y_train)

In [8]:
y_train_pred = rf_regressor.predict(x_train_scaled)
mean_squared_error(y_train,y_train_pred)

194.5333993097076

In [11]:
x_test_scaled = min_max_scaler.transform(x_test)
y_test_pred = rf_regressor.predict(x_test_scaled)
mean_squared_error(y_test,y_test_pred)

132.01484451181597

In [15]:
with open(os.path.join(ROOT_DIRECTORY, "model/random_forest_regressor.pkl"), "wb") as writer:
    pickle.dump(obj=rf_regressor, file=writer)

In [16]:
with open(os.path.join(ROOT_DIRECTORY, "model/min_max_scaler.pkl"), "wb") as writer:
    pickle.dump(obj=min_max_scaler, file=writer)