In [3]:
import pandas as pd
from filterpy.kalman import KalmanFilter
from filterpy.common import Q_discrete_white_noise
import numpy as np
import os 
import matplotlib.pyplot as plt 
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
import plotly.graph_objects as go
from plotly_resampler import FigureResampler, FigureWidgetResampler

In [4]:
data_path = '/Users/alessandroquattrociocchi/Documents/Data/pump_station_data/raw_models/'

In [5]:
power_df1 = pd.read_parquet(os.path.join(data_path, "pump1_power_siso.par"))
power_df2 = pd.read_parquet(os.path.join(data_path, "pump3_power_siso.par"))
power_df3 = pd.read_parquet(os.path.join(data_path, "pump4_power_siso.par"))

In [6]:
power_df1.set_index('time', inplace=True)
power_df2.set_index('time', inplace=True)
power_df3.set_index('time', inplace=True)

In [7]:
power_df = power_df1.join(power_df2, on = 'time').join(power_df3, on = 'time')

In [34]:
df = power_df.rolling(window=10, center=True).median().dropna()
power_df = df.resample("1min").mean().dropna()

In [35]:
train_df = power_df["2024-01-01 06:00:00":"2024-02-14 11:00:00"]
test_df = power_df["2024-01-15":]

In [36]:
train_df = train_df[(train_df["pump1_power"] >= 0) & (train_df["pump4_power"] == 0) & (train_df["pump3_power"] == 0) & (train_df["outflow"] > 0)]
test_df = test_df[(test_df["pump1_power"] > 0) & (test_df["pump4_power"] == 0) & (test_df["pump3_power"] == 0) & (test_df["outflow"] > 0)]

KeyError: 'outflow'

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

model = LinearRegression()
model.fit(train_df[["pump1_power"]], train_df["outflow"])
test_df["outflow_pred"] = model.predict(test_df[["pump1_power"]])

In [None]:
rmse = np.sqrt(mean_squared_error(test_df["outflow"], test_df["outflow_pred"]))
print("Root Mean Squared Error (RMSE):", rmse)


Root Mean Squared Error (RMSE): 28.018984529207657


In [None]:
r_squared = r2_score(test_df["outflow"], test_df["outflow_pred"])
print("R-squared (R^2):", r_squared)

R-squared (R^2): 0.8750040881729315


In [None]:
test_df = test_df.reset_index()

In [None]:
import plotly.graph_objects as go
from plotly_resampler import FigureResampler, FigureWidgetResampler
 
 
fig = FigureWidgetResampler(go.Figure())
fig.update_layout(margin=dict(l=10, r=10, t=10, b=10))
fig.add_trace(go.Scattergl(name=r'Inflow', showlegend=True), hf_x=test_df.index, hf_y=test_df['outflow'] - np.random.normal(0,0.2, len(test_df)))
fig.add_trace(go.Scattergl(name=r'Prediction', showlegend=True), hf_x=test_df.index, hf_y=test_df['outflow_pred'])
fig.update_layout(height=400, template="plotly_dark")
display(fig)

FigureWidgetResampler({
    'data': [{'name': '<b style="color:sandybrown">[R]</b> Inflow <i style="color:#fc9944">~11</i>',
              'showlegend': True,
              'type': 'scattergl',
              'uid': '52f3a9a1-46cf-4f27-919a-d0a649399d65',
              'x': array([    0,     6,    16, ..., 10964, 10977, 10987], dtype=uint64),
              'y': array([308.47856183, 678.88263416, 576.06092099, ..., 654.59231094,
                          610.91361997, 692.75264229])},
             {'name': '<b style="color:sandybrown">[R]</b> Prediction <i style="color:#fc9944">~11</i>',
              'showlegend': True,
              'type': 'scattergl',
              'uid': '3913b961-ee53-4651-b642-fccdf482d93d',
              'x': array([    0,     6,    16, ..., 10964, 10975, 10987], dtype=uint64),
              'y': array([402.59445259, 699.87581036, 595.69394793, ..., 616.65431752,
                          579.45563597, 658.79202647])}],
    'layout': {'height': 400, 'margin': {'b