# Modelowanie przy użyciu metody najmniejszych kwadratów

W pierwszym podejściu, modelujemy zadane wyjścia przy zastosowaniu metody najmniejszych kwadratów. Przetestowane zostaną modele liniowe oraz nieliniowe o różnych stopniach nielinowości.

### Zaimportuj potrzebne biblioteki

In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

print("pandas version: {}".format(pd.__version__))
print("numpy version: {}".format(np.__version__))
print("matplotlib version: {}".format(mpl.__version__))

pandas version: 1.0.1
numpy version: 1.18.1
matplotlib version: 3.2.0


### Zamień datę na sekundy od początku eksperymentu

In [2]:
def changeDateToSeconds(df):
    first = df["date"][0]
    df["date"] = df["date"].apply(lambda timestamp: (timestamp-first).seconds)
    return df

### Wczytaj dane

In [3]:
def readDataFromExcel(path, sheet):
    df = pd.read_excel(path, sheet_name=sheet)
    df["date"] = pd.to_datetime(df["date"])
    df = changeDateToSeconds(df)
    return df

### Wczytaj zbiór uczący i weryfikacyjny

In [4]:
df_learn = readDataFromExcel("./data/K-1_MI.xlsx", "d2")
df_verif = readDataFromExcel("./data/K-1_MI.xlsx", "d6")

### Zbiór uczący 

In [5]:
df_learn.head()

Unnamed: 0,date,FP05,LT1,LT2,LT3,LT4,TMA,TMB,TMC,TMD,...,PTWS,TW02,TW01,FW03,TW04,TW03,FW04,TTWT,PTWT,PPW
0,0,1068.9575,21.4107,11.6199,11.714,18.8643,43.8947,76.1302,75.8843,74.866,...,19.3311,539.8597,301.2129,9.4894,539.5955,290.892,14.2998,177.7786,10.3202,3.5039
1,10,1068.9575,21.4107,11.6199,11.714,18.8643,43.8947,76.1302,75.8843,74.866,...,19.3311,539.8597,301.2129,9.4894,539.5955,290.892,14.2998,177.7786,10.3202,3.5039
2,20,1068.9575,21.4107,11.6199,11.714,18.8643,43.8947,76.1302,75.8843,74.866,...,19.3147,539.8597,301.2129,9.4894,539.5955,292.1826,14.2998,177.7786,10.3202,3.4937
3,30,1068.9575,21.4107,11.6199,11.714,18.8643,43.8947,76.1302,75.8843,74.866,...,19.3298,539.8597,302.4265,9.4894,539.5955,292.1826,14.2998,177.7786,10.3202,3.4937
4,40,1068.9575,21.4107,11.6199,11.714,18.8643,43.8947,76.1302,75.8843,74.866,...,19.3298,539.8597,302.4265,9.4894,539.5955,292.1826,14.2998,177.7786,10.3202,3.4937


### Zbiór weryfikacyjny

In [6]:
df_verif.head()

Unnamed: 0,date,FP05,LT1,LT2,LT3,LT4,TMA,TMB,TMC,TMD,...,PTWS,TW02,TW01,FW03,TW04,TW03,FW04,TTWT,PTWT,PPW
0,0,1055.1635,21.5507,12.5797,11.714,20.9103,40.9401,72.2017,72.6573,69.6785,...,19.1031,540.7268,293.7237,10.8584,540.5005,277.3242,16.5933,176.8009,10.1627,3.4616
1,2,1055.1635,21.5507,12.5797,11.714,20.9103,40.9401,72.2017,72.6573,69.6785,...,19.1031,540.7268,293.7237,10.8584,540.5005,277.3242,16.5933,176.8009,10.1627,3.4616
2,4,1055.1635,21.5507,12.5797,11.714,20.9103,40.9401,72.2017,72.6573,69.6785,...,19.1031,540.7268,293.7237,10.8584,540.5005,277.3242,16.5933,176.8009,10.1627,3.4616
3,6,1055.1635,21.5507,12.5797,11.714,20.9103,40.9401,72.2017,72.6573,69.6785,...,19.1031,540.7268,293.7237,10.8584,540.5005,277.3242,16.5933,176.8009,10.1627,3.4616
4,8,1055.1635,21.5507,12.5797,11.714,20.9103,40.9401,72.2017,72.6573,69.6785,...,19.0894,540.7268,293.7237,10.8584,540.5005,277.3242,16.5933,176.8009,10.1627,3.4616


### MNK - model liniowy, statyczny

In [7]:
u_learn = df_learn.drop(["LT01", "DP", "date"], axis=1).to_numpy()
y_learn = df_learn[["LT01", "DP"]].to_numpy()

u_verif = df_verif.drop(["LT01", "DP", "date"], axis=1).to_numpy()
y_verif = df_verif[["LT01", "DP"]].to_numpy()


### Weryfikacja modelu liniowego, statycznego


In [8]:
reg = LinearRegression().fit(u_learn, y_learn)
y_model_learn = reg.predict(u_learn)
y_model_verif = reg.predict(u_verif)

print("Score learn: {}".format(r2_score(y_learn, y_model_learn)))
print("Score verif: {}".format(r2_score(y_verif, y_model_verif)))

Score learn: 0.68981181987722
Score verif: -6.578488912345968e+18


### Modele nielinowe, dynamiczne

Funkcja `createModelMatrix` tworzy macierz A do rozwiązywania zadania najmniejszych kwadratów. Macierz jest postaci:

[ y0^1[k] ... y0^D[k] y0^1[k-1] .. y0^D[k-1] ... y0^1[k-N] ... y0^D[k-N] y1^1[k] .... y1^1[k] ... y1^D[k] y1^1[k-1] .. y1^D[k-1] ... y1^1[k-N] ... y1^D[k-N] .... ]

In [9]:
def createModelMatrix(exponent, order, inputs):
    samples = inputs.shape[0]
    modelVariables = inputs.shape[1]
    widthCoefficient = order*exponent
    heightAbsoluteTerm = order-1
    
    A = np.zeros([samples - heightAbsoluteTerm, modelVariables*widthCoefficient])
    
    for i in range(modelVariables):
        for j in range(order):
            for k in range(exponent):
                colIndex = i*widthCoefficient + j*exponent + k
                A[:, colIndex] = np.power(inputs[j:samples-heightAbsoluteTerm+j, i], k+1)
    
    return A

In [21]:
lty_learn = y_learn[:, 0]
dpy_learn = y_learn[:, 1]

lty_verif = y_verif[:, 0]
dpy_verif = y_verif[:, 1]

(999,)


### Badanie MNK wyjścia LT01

In [27]:
exponents = [1,2,3,4,5,6,7,8,9,10]
orders = [1,2,3,4,5,6,7,8,9,10]

for i, exponent in enumerate(exponents):
    for j, order in enumerate(orders):
        A_learn = createModelMatrix(exponent, order, u_learn)
        A_verif = createModelMatrix(exponent, order, u_verif)
        
        lty_learn_cut = lty_learn[(order - 1):]
        lty_verif_cut = lty_verif[(order - 1):]
        
        model = LinearRegression().fit(A_learn, lty_learn_cut)
        lty_model_learn = model.predict(A_learn)
        lty_model_verif = model.predict(A_verif)
        
        learn_score = r2_score(lty_learn_cut, lty_model_learn)
        verif_score = r2_score(lty_verif_cut, lty_model_verif)
        
        print("order: {}, exponent: {}, learn score: {}, verif score: {}".format(order, exponent, learn_score, verif_score))

order: 1, exponent: 1, learn score: 0.6016072426822416, verif score: -3148593564.5211806
order: 2, exponent: 1, learn score: 0.6695302390950537, verif score: -227336226434.68994
order: 3, exponent: 1, learn score: 0.7300829011982106, verif score: -751617618590.7377
order: 4, exponent: 1, learn score: 0.7761174557029188, verif score: -1568584494608.527
order: 5, exponent: 1, learn score: 0.8105915038948383, verif score: -678089653596.9719
order: 6, exponent: 1, learn score: 0.8371676505315799, verif score: -73650317440.27397
order: 7, exponent: 1, learn score: 0.8571607031601024, verif score: -4369380848.069143
order: 8, exponent: 1, learn score: 0.8725929696978859, verif score: -33092892981.3864
order: 9, exponent: 1, learn score: 0.88596823714708, verif score: -269969611772.03717
order: 10, exponent: 1, learn score: 0.8998557884085262, verif score: -4793347366.2385
order: 1, exponent: 2, learn score: 0.7226961932840874, verif score: -8276269438195.485
order: 2, exponent: 2, learn scor

order: 3, exponent: 10, learn score: 0.7454851457652883, verif score: -1142.8417526752412
order: 4, exponent: 10, learn score: 0.765288889484429, verif score: -930.2116043191663
order: 5, exponent: 10, learn score: 0.7700774834625699, verif score: -9976.556586281427
order: 6, exponent: 10, learn score: 0.7848974897822613, verif score: -15015.278703592707
order: 7, exponent: 10, learn score: 0.8549393178057896, verif score: -15701.792154729364
order: 8, exponent: 10, learn score: 0.7640510516518482, verif score: -11765.521124700575
order: 9, exponent: 10, learn score: 0.8556883092099082, verif score: -1296.2607083751848
order: 10, exponent: 10, learn score: 0.7733464150615222, verif score: -614.5993183387823


### Badanie wyjścia DP

In [28]:
exponents = [1,2,3,4,5,6,7,8,9,10]
orders = [1,2,3,4,5,6,7,8,9,10]

for i, exponent in enumerate(exponents):
    for j, order in enumerate(orders):
        A_learn = createModelMatrix(exponent, order, u_learn)
        A_verif = createModelMatrix(exponent, order, u_verif)
        
        dpy_learn_cut = dpy_learn[(order - 1):]
        dpy_verif_cut = dpy_verif[(order - 1):]
        
        model = LinearRegression().fit(A_learn, dpy_learn_cut)
        dpy_model_learn = model.predict(A_learn)
        dpy_model_verif = model.predict(A_verif)
        
        learn_score = r2_score(dpy_learn_cut, dpy_model_learn)
        verif_score = r2_score(dpy_verif_cut, dpy_model_verif)
        
        print("order: {}, exponent: {}, learn score: {}, verif score: {}".format(order, exponent, learn_score, verif_score))

order: 1, exponent: 1, learn score: 0.7780161913957889, verif score: -97779661407.76927
order: 2, exponent: 1, learn score: 0.791439544450639, verif score: -321206782.95596004
order: 3, exponent: 1, learn score: 0.8034056970827225, verif score: -85919517412.82686
order: 4, exponent: 1, learn score: 0.8129683181595424, verif score: -1801280185072.2173
order: 5, exponent: 1, learn score: 0.8239643743714287, verif score: -46382629121.66801
order: 6, exponent: 1, learn score: 0.8362677487405503, verif score: -27663490028.1405
order: 7, exponent: 1, learn score: 0.8466969220392546, verif score: -642126808849.9838
order: 8, exponent: 1, learn score: 0.8589635059332745, verif score: -237219760271.41318
order: 9, exponent: 1, learn score: 0.8683346321391391, verif score: -444498074885.5268
order: 10, exponent: 1, learn score: 0.8784901143923577, verif score: -57908031861.96316
order: 1, exponent: 2, learn score: 0.8114256157550152, verif score: -14781895903971.809
order: 2, exponent: 2, learn 

order: 3, exponent: 10, learn score: 0.8204273532010267, verif score: -17293.470480109318
order: 4, exponent: 10, learn score: 0.7372128552389352, verif score: -58655.25280293833
order: 5, exponent: 10, learn score: 0.7523692584421416, verif score: -79784.66498664717
order: 6, exponent: 10, learn score: 0.846996173739436, verif score: -82865.9017006112
order: 7, exponent: 10, learn score: 0.8758481194054584, verif score: -90294.17403943965
order: 8, exponent: 10, learn score: 0.8836557672908033, verif score: -90210.13205745252
order: 9, exponent: 10, learn score: 0.8842721200815554, verif score: -46138.469475808815
order: 10, exponent: 10, learn score: 0.8607346146569127, verif score: -6244.392222119054
