**PLAN:**
1. Model szeregu czasowego sprawdzający wartości parametrów za 6 minut;
2. Sprawdzenie kombinacji;
3. Wybranie kombinacji, która da nam wartości najbliższe celu za 6 minut

In [1]:
import os
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import seaborn as sns
from plotly.subplots import make_subplots

from itertools import product, combinations
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [22]:
arrays = [np.linspace(-800, 801, 100), np.linspace(-0.0013, 0.0013, 100), np.linspace(-20, 20, 100)]

In [23]:
DELTAS_PROD = list(product(*arrays))

In [24]:
MEAN_REACTION_TIME = 30

In [25]:
ZMIENNE_MANIPULOWANE = ["001FCx00285_SPPV.PV",
                        "001XXXCALC01.NUM.PV[3]",
                        "001SCx00274_SPPV.PV",                  
                        "001FCx00241_sppv.pv"]
ZMIENNE_PIERSCIENIE = ["001NIR0SZRG.daca.pv",
                        "001NIR0S600.daca.pv",
                        "001NIR0S500.daca.pv",
                        "001NIR0S300.daca.pv",
                        "001NIR0S100.daca.pv"]

ZMIENNA_CELU = "001NIR0SZR0.daca.pv"
ZMIENNA_CZASU = 'Czas'

In [26]:
df = pd.read_csv('processed_data_zad2.csv')
# df = df.drop(labels=ZMIENNE_PIERSCIENIE, axis=1)

In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3954000 entries, 0 to 3953999
Data columns (total 27 columns):
 #   Column                  Dtype  
---  ------                  -----  
 0   Czas                    object 
 1   001FCx00285_SPPV.PV     float64
 2   001XXXCALC01.NUM.PV[3]  float64
 3   001SCx00274_SPPV.PV     float64
 4   001FCx00241_sppv.pv     float64
 5   001NIR0SZR0.daca.pv     float64
 6   001NIR0SZRG.daca.pv     float64
 7   001NIR0S600.daca.pv     float64
 8   001NIR0S500.daca.pv     float64
 9   001NIR0S300.daca.pv     float64
 10  001NIR0S100.daca.pv     float64
 11  001FYx00206_SPSUM.pv    float64
 12  001FCx00231_SPPV.PV     float64
 13  001FCx00251_SPPV.PV     float64
 14  001FCx00281.PV          float64
 15  001FCx00262.PV          float64
 16  001FCx00261.PV          float64
 17  001XXXCALC01.NUM.PV[2]  float64
 18  prob_corg               float64
 19  prob_s                  float64
 20  sita_nadziarno          float64
 21  sita_podziarno          float64

In [28]:
def plot_date_range(df, start_date, stop_date):
    df = df.reset_index(drop=False)
    df = df[(df[ZMIENNA_CZASU]>=start_date) & (df[ZMIENNA_CZASU]<=stop_date)]
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(go.Scatter(mode='lines',
                            x=df[ZMIENNA_CZASU],
                            y=df[ZMIENNA_CELU],
                            text='Straty KSR',
                            name='Straty KSR'))    
    fig.add_trace(go.Scatter(mode='lines',
                            x=df[ZMIENNA_CZASU],
                            y=df['001FCx00285_SPPV.PV'],                            
                            text='Przepływ powietrza dystrybucyjnego',
                            name='Przepływ powietrza dystrybucyjnego'),
                 secondary_y=True)
    
    return fig

In [29]:
def preprocess_df(df, start_date, stop_date):
    df[ZMIENNA_CZASU] = pd.to_datetime(df[ZMIENNA_CZASU])
    df = df[(df[ZMIENNA_CZASU]>=start_date) & (df[ZMIENNA_CZASU]<stop_date)]
    df = df.set_index(ZMIENNA_CZASU)
    df = df.resample('10S').mean()
    df = df.dropna().reset_index()
    df = df.set_index([ZMIENNA_CZASU, ZMIENNA_CELU])
    return df

In [30]:
df_april_train, df_april_test = preprocess_df(df, '2021-04-01', '2021-04-26'), preprocess_df(df, '2021-04-26', '2021-04-29')

In [31]:
df_april_train

Unnamed: 0_level_0,Unnamed: 1_level_0,001FCx00285_SPPV.PV,001XXXCALC01.NUM.PV[3],001SCx00274_SPPV.PV,001FCx00241_sppv.pv,001NIR0SZRG.daca.pv,001NIR0S600.daca.pv,001NIR0S500.daca.pv,001NIR0S300.daca.pv,001NIR0S100.daca.pv,001FYx00206_SPSUM.pv,...,001XXXCALC01.NUM.PV[2],prob_corg,prob_s,sita_nadziarno,sita_podziarno,poziom_zuzel,001UCx00274.pv,001NIR0ODS0.daca.pv,temp_zuz,007SxR00555.daca1.pv
Czas,001NIR0SZR0.daca.pv,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2021-04-19 00:00:00,23.320475,2700.0,81.0,50.0,31.0,0.870842,1.513727,7.211709,7.297815,6.446786,114.0,...,303.0,8.61,11.27,2.316,62.719999,1349.0,13.706723,9.335643,1305.1,-0.212540
2021-04-19 00:00:10,23.368764,2700.0,81.0,50.0,31.0,0.870579,1.513468,7.223036,7.312535,6.448606,114.0,...,303.0,8.61,11.27,2.340,62.799999,1360.0,13.714506,9.349653,1305.0,-0.141969
2021-04-19 00:00:20,23.397422,2700.0,81.0,50.0,31.0,0.870315,1.513208,7.233054,7.327014,6.450426,114.0,...,303.0,8.61,11.27,2.340,62.799999,1360.0,13.647700,9.363130,1305.0,-0.300758
2021-04-19 00:00:30,23.414602,2700.0,81.0,50.0,31.0,0.870052,1.512949,7.239280,7.335812,6.452246,114.0,...,303.0,8.61,11.27,2.340,62.799999,1360.0,13.608099,9.375699,1305.0,-0.088279
2021-04-19 00:00:40,23.456724,2700.0,81.0,50.0,31.0,0.869789,1.512690,7.245265,7.342650,6.454066,114.0,...,303.0,8.61,11.27,2.340,62.799999,1360.0,13.654952,9.374357,1305.0,-0.226348
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-04-25 23:59:10,24.206277,2700.0,80.0,50.0,23.0,0.867885,1.590155,7.460678,7.604284,6.670415,110.0,...,301.0,8.88,11.07,2.240,65.199997,1290.0,14.407694,9.545496,1311.0,-0.357130
2021-04-25 23:59:20,24.192292,2700.0,80.0,50.0,23.0,0.867448,1.588648,7.444157,7.591574,6.668189,110.0,...,301.0,8.88,11.07,2.240,65.199997,1290.0,14.353438,9.547455,1311.0,-0.225190
2021-04-25 23:59:30,24.141316,2900.0,80.0,50.0,23.0,0.867010,1.587142,7.424377,7.578864,6.665964,110.0,...,301.0,8.88,11.07,2.240,65.199997,1290.0,14.247012,9.558825,1311.0,-0.662037
2021-04-25 23:59:40,24.111355,3500.0,80.0,50.0,23.0,0.866573,1.585635,7.404055,7.567286,6.663738,110.0,...,301.0,8.88,11.07,2.240,65.199997,1290.0,15.652795,9.573440,1311.0,-0.375158


In [32]:
df_april_lagged = df_april_train.shift(MEAN_REACTION_TIME).dropna().reset_index().set_index(ZMIENNA_CZASU)
df_test_lagged = df_april_test.shift(MEAN_REACTION_TIME).dropna().reset_index().set_index(ZMIENNA_CZASU)

In [33]:
df_april_lagged

Unnamed: 0_level_0,001NIR0SZR0.daca.pv,001FCx00285_SPPV.PV,001XXXCALC01.NUM.PV[3],001SCx00274_SPPV.PV,001FCx00241_sppv.pv,001NIR0SZRG.daca.pv,001NIR0S600.daca.pv,001NIR0S500.daca.pv,001NIR0S300.daca.pv,001NIR0S100.daca.pv,...,001XXXCALC01.NUM.PV[2],prob_corg,prob_s,sita_nadziarno,sita_podziarno,poziom_zuzel,001UCx00274.pv,001NIR0ODS0.daca.pv,temp_zuz,007SxR00555.daca1.pv
Czas,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-04-19 00:05:00,23.442731,2700.0,81.0,50.0,31.0,0.870842,1.513727,7.211709,7.297815,6.446786,...,303.0,8.61,11.27,2.316,62.719999,1349.0,13.706723,9.335643,1305.1,-0.212540
2021-04-19 00:05:10,23.417879,2700.0,81.0,50.0,31.0,0.870579,1.513468,7.223036,7.312535,6.448606,...,303.0,8.61,11.27,2.340,62.799999,1360.0,13.714506,9.349653,1305.0,-0.141969
2021-04-19 00:05:20,23.399783,2700.0,81.0,50.0,31.0,0.870315,1.513208,7.233054,7.327014,6.450426,...,303.0,8.61,11.27,2.340,62.799999,1360.0,13.647700,9.363130,1305.0,-0.300758
2021-04-19 00:05:30,23.392498,2700.0,81.0,50.0,31.0,0.870052,1.512949,7.239280,7.335812,6.452246,...,303.0,8.61,11.27,2.340,62.799999,1360.0,13.608099,9.375699,1305.0,-0.088279
2021-04-19 00:05:40,23.395817,2700.0,81.0,50.0,31.0,0.869789,1.512690,7.245265,7.342650,6.454066,...,303.0,8.61,11.27,2.340,62.799999,1360.0,13.654952,9.374357,1305.0,-0.226348
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-04-25 23:59:10,24.206277,2000.0,80.0,55.0,23.0,0.864561,1.583222,8.076830,7.804704,6.693222,...,301.0,8.88,11.07,2.240,65.200000,1290.0,7.766887,9.483980,1311.0,-0.700044
2021-04-25 23:59:20,24.192292,2000.0,80.0,55.0,23.0,0.864844,1.583911,8.065430,7.807054,6.697528,...,301.0,8.88,11.07,2.240,65.199999,1290.0,7.710614,9.489121,1311.0,-0.242611
2021-04-25 23:59:30,24.141316,2000.0,80.0,55.0,23.0,0.865128,1.584601,8.052659,7.809485,6.701834,...,301.0,8.88,11.07,2.240,65.199997,1290.0,7.691423,9.502505,1311.0,-0.371178
2021-04-25 23:59:40,24.111355,2000.0,80.0,55.0,23.0,0.865412,1.585290,8.039028,7.811503,6.706140,...,301.0,8.88,11.07,2.240,65.199997,1290.0,7.696882,9.519747,1311.0,-0.322218


In [34]:
X_train, Y_train = df_april_lagged.drop(labels=ZMIENNA_CELU, axis=1), df_april_lagged[ZMIENNA_CELU]
X_test, Y_test = df_test_lagged.drop(labels=ZMIENNA_CELU, axis=1), df_test_lagged[ZMIENNA_CELU]

In [35]:
X_test

Unnamed: 0_level_0,001FCx00285_SPPV.PV,001XXXCALC01.NUM.PV[3],001SCx00274_SPPV.PV,001FCx00241_sppv.pv,001NIR0SZRG.daca.pv,001NIR0S600.daca.pv,001NIR0S500.daca.pv,001NIR0S300.daca.pv,001NIR0S100.daca.pv,001FYx00206_SPSUM.pv,...,001XXXCALC01.NUM.PV[2],prob_corg,prob_s,sita_nadziarno,sita_podziarno,poziom_zuzel,001UCx00274.pv,001NIR0ODS0.daca.pv,temp_zuz,007SxR00555.daca1.pv
Czas,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-04-26 00:05:00,3500.0,80.0,50.0,23.0,0.865698,1.582622,7.361827,7.547441,6.659287,110.0,...,301.0,8.88,11.07,2.24,65.199997,1380.0,22.471026,9.569939,1311.9,-0.743311
2021-04-26 00:05:10,3500.0,80.0,50.0,23.0,0.865260,1.581116,7.339526,7.537525,6.657062,110.0,...,301.0,8.88,11.07,2.24,65.199997,1390.0,22.435875,9.569577,1312.0,-0.260609
2021-04-26 00:05:20,3500.0,80.0,50.0,23.0,0.864823,1.579609,7.317166,7.527610,6.654836,110.0,...,301.0,8.88,11.07,2.24,65.199997,1390.0,22.417788,9.576330,1312.0,-0.575574
2021-04-26 00:05:30,3500.0,80.0,50.0,23.0,0.864326,1.578103,7.288108,7.517694,6.652411,110.0,...,301.0,8.88,11.07,2.24,65.199997,1390.0,22.428687,9.591552,1312.0,-0.416580
2021-04-26 00:05:40,3500.0,80.0,50.0,23.0,0.863948,1.576596,7.261646,7.507486,6.649166,110.0,...,301.0,8.88,11.07,2.24,65.199997,1390.0,22.505517,9.595952,1312.0,-0.392037
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-04-28 23:59:10,2800.0,80.0,42.0,25.0,0.857175,1.566068,7.872827,8.150348,7.114658,114.0,...,306.0,8.33,11.37,2.36,62.300000,1270.0,15.692754,9.586449,1309.0,0.173786
2021-04-28 23:59:20,2800.0,80.0,42.0,25.0,0.857283,1.564958,7.853224,8.124750,7.108087,114.0,...,306.0,8.33,11.37,2.36,62.300000,1270.0,15.826648,9.586193,1309.0,0.060793
2021-04-28 23:59:30,2800.0,80.0,42.0,25.0,0.857376,1.563848,7.833620,8.101771,7.101515,114.0,...,306.0,8.33,11.37,2.36,62.299999,1270.0,15.903021,9.587908,1309.0,0.072853
2021-04-28 23:59:40,2800.0,80.0,42.0,25.0,0.856985,1.562738,7.814017,8.111383,7.094943,114.0,...,306.0,8.33,11.37,2.36,62.299999,1270.0,15.925206,9.589624,1309.0,0.267865


In [36]:
model = LinearRegression()
model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)

In [37]:
mean_squared_error(Y_pred, Y_test)

0.42915047856260274

In [38]:
keys_ = list(df_april_train.columns)
values = model.coef_
fi = pd.Series(dict(zip(keys_, values)))

In [39]:
px.bar(fi.sort_values(), orientation='h')

In [40]:
fig = go.Figure()
fig.add_trace(go.Scatter(mode='lines',
                        x=pd.to_datetime(Y_test.index.values),
                        y=Y_test,
                        text='Rzeczywiste wartości straty',
                        name='Rzeczywiste wartości straty'))    
fig.add_trace(go.Scatter(mode='lines',
                        x=pd.to_datetime(Y_test.index.values),
                        y=Y_pred,                            
                        text='Przewidywane wartości straty',
                        name='Przewidywane wartości straty'))

In [21]:
fig.write_image(os.path.join('images','baseline_model.png'))
fig.write_html(os.path.join('images','baseline_model.html'))