In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from joblib import dump, load
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pickle

In [None]:
scenario7 = "Scenario7_DisparoGeneradorNoFreqOversampled2x01stdLagged"
best_config = {'criterion':'poisson', 'max_depth': 61, 'splitter': 'random'}

data = pd.read_csv(f'data/processed_100ms/{scenario7}.csv') # Read the data

data = data[~data.code.str.startswith("1_1")] # Remove one scenario and case (including subcases and results)

input_columns = [col for col in data.columns if col not in ['target', 'code', 'interval', 'pred_time']] # Select inputs
X = data[input_columns].values
y = data['target'].values

model = DecisionTreeRegressor()
model.set_params(**best_config) # Train with best configuration
model.fit(X, y)

dump(model, 'models/best_model_100ms.joblib')
filehandler = open(b"models/best_model_100ms.pkl","wb")
s = pickle.dump(model, filehandler)

In [None]:
model = load('models/best_model_100ms.joblib')

In [None]:
scenario = 1
case = 1
result = 1

n_preds = 5

raw_data = pd.read_excel(f'data/raw/1.InerciaGeneralizada-DisparoGenerador/Scenario{scenario}Case{case}Result{result}_processed.xlsx')

interval_ms = 100

raw_data = raw_data.set_index('Time (s)')

raw_data['target'] = raw_data['Fmin (Hz)'].min()
raw_data['scenario'] = f'{scenario}'
raw_data['case'] = f'{case}'
raw_data['scenario_case_result'] = f'{scenario}_{case}_{result}'


raw_data_1s = raw_data[raw_data.index>=1]

raw_data_1s.loc[:, 'interval'] = (raw_data_1s.index-1)//(interval_ms/1000) # the intervals cover exactly 0.1 seconds

input_cols = ["AngB1_unwrapped (degrees)", "AngB2_unwrapped (degrees)", "AngB3_unwrapped (degrees)"]

true = raw_data_1s.target.values[:5]
predictions = []

input_data_values =[]

dates_excel = []
for i in range(5):

    input_data = raw_data_1s.loc[raw_data_1s.interval==i][input_cols].values[-115:].flatten("F")

    input_data_values.append(input_data)

    predictions.append(model.predict(input_data.reshape(1, -1))[0])
    dates_excel.append(raw_data_1s.loc[raw_data_1s.interval==i].index[-1])

predictions = np.array(predictions)
input_data_values = np.array(input_data_values)

matplotlib.rcParams.update({'font.size': 22})
plt.figure(figsize=(20,5))
plt.plot(raw_data.iloc[:-12000].index, raw_data['Fmin (Hz)'].iloc[:-12000], linewidth=2)
plt.xlabel("Time (s)")
plt.ylabel("F (Hz)")

plt.scatter(dates_excel, predictions, color="r")
plt.title(f"MAE: {round(np.abs(true-predictions).mean(), 3)}")