# Classification: Linear Regression, Ridge Classification, K-nearest Neighbors

In diesem Notebook verwenden wir 3 Ansätze um die Klassifizierung in "Failure"/"No Failure " durchzuführen.

## Daten Laden

In [None]:
#mount data
from google.colab import drive
drive.mount('/content/drive')
datapath = "/content/drive/My Drive/smartdata/2/"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import sys
sys.path.append(datapath)

In [None]:
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

epochs = 150

In [None]:
# Load data

train = {}
test = {}

train["3Hmean"],test["3Hmean"] = pd.read_csv(datapath+"resampled/resampled_3H_mean.csv"), pd.read_csv(datapath+"resampled_test/resampled_3H_mean.csv")
train["3Hmedian"],test["3Hmedian"] = pd.read_csv(datapath+"resampled/resampled_3H_median.csv"), pd.read_csv(datapath+"resampled_test/resampled_3H_median.csv")
train["6Hmedian"],test["6Hmedian"] = pd.read_csv(datapath+"resampled/resampled_6H_median.csv"), pd.read_csv(datapath+"resampled_test/resampled_6H_median.csv")
train["12Hmean"],test["12Hmean"] = pd.read_csv(datapath+"resampled/resampled_12H_mean.csv"), pd.read_csv(datapath+"resampled_test/resampled_12H_mean.csv")
train["12Hmedian"],test["12Hmedian"] = pd.read_csv(datapath+"resampled/resampled_12H_median.csv"), pd.read_csv(datapath+"resampled_test/resampled_12H_median.csv")

In [None]:
drift = ['pitch motor 1 current', 'pitch motor 2 current', 'Pitch motor 3 current', 'x direction vibration value', 'y direction vibration value', 'hydraulic brake pressure', 'generator current', 'Inverter inlet temperature', 'inverter outlet temperature', 'inverter inlet pressure', 'inverter outlet pressure', 'wind tower ambient temperature', 'Wheel temperature', 'Wheel control cabinet temperature', 'Cabin temperature', 'Cabin control cabinet temperature', 'vane 1 pitch motor temperature', 'blade 2 pitch motor temperature', 'blade 3 pitch motor temperature', 'blade 1 inverter box temperature', 'blade 2 inverter box temperature', 'blade 3 inverter box temperature','inverter grid side current', 'Inverter grid side active power', 'inverter generator side power', 'generator operating frequency','generator stator temperature 1', 'generator stator temperature 2',  'generator stator temperature 3', 'generator stator temperature 4', 'Generator stator temperature 5', 'generator stator temperature 6', 'generator air temperature 1', 'generator air temperature 2','main bearing temperature 1', 'main bearing temperature 2', 'Pitch motor 1 power estimation', 'Pitch motor 2 power estimation', 'Pitch motor 3 power estimation', 'blade 1 battery box temperature', 'blade 2 battery box temperature', 'blade 3 battery box temperature','Inverter INU temperature', 'Inverter ISU temperature','atmospheric pressure', 'reactive power control status', 'reactive power set value', 'Inverter INU RMIO temperature','blade 1 angle','blade 2 angle','blade 3 angle','inverter grid side voltage','inverter grid side reactive power']

## Data Preprocessing

In [None]:
#Extract labels
import math
labels_train = {}
labels_test = {}
for key,value in train.items():
  #drop nan values in label
  train[key].dropna(inplace=True)
  #extract labels
  labels_train[key] = train[key]["label"]
  #drop data drift
  train[key].drop(drift, axis = 1, inplace=True)
  train[key].drop("Unnamed: 0", axis = 1, inplace=True)
  train[key].drop("label", axis = 1, inplace=True)
  train[key].drop("area", axis = 1, inplace=True)


for key,value in test.items():
  #drop nan values in label
  test[key].dropna(inplace=True)
  #extract labels
  labels_test[key] = test[key]["label"]
  #drop data drift
  test[key].drop(drift, axis = 1, inplace=True)
  test[key].drop("Unnamed: 0", axis = 1, inplace=True)
  test[key].drop("label", axis = 1, inplace=True)
  test[key].drop("area", axis = 1, inplace=True)

In [None]:
train["3Hmean"]

Unnamed: 0,Wheel speed,hub angle,overspeed sensor speed detection value,5 second yaw against wind average,Aircraft weather station wind speed,wind direction absolute value,generator torque,generator power limit value,Rated hub speed,Fan current status value,hub current status value,yaw state value,yaw request value,blade 1 super capacitor voltage,blade 2 super capacitor voltage,blade 3 super capacitor voltage,drive 1 thyristor temperature,Drive 2 thyristor temperature,Drive 3 thyristor temperature,Drive 1 output torque,Drive 2 output torque,Drive 3 output torque
0,0.80,160.140000,0.820556,-28.800000,1.883333,168.888889,0.0,2067.6,16.83,5.000000,5.000000,4.0,0.444444,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
1,0.18,172.692778,0.786111,-18.733333,2.338889,159.111111,0.0,2067.6,16.83,5.000000,5.000000,4.0,0.333333,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
2,1.29,156.682778,0.803333,-26.716667,2.283333,152.333333,0.0,2067.6,16.83,5.000000,5.000000,4.0,0.222222,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
3,1.32,193.018333,0.815556,-27.016667,2.433333,171.055556,0.0,2067.6,16.83,5.000000,5.000000,4.0,0.111111,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
4,0.52,200.600556,0.736111,-26.477778,1.988889,186.722222,0.0,2067.6,16.83,5.000000,5.000000,4.0,0.333333,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
792186,0.12,68.000000,0.000000,0.150000,1.227778,141.055556,0.0,425.4,10.95,3.888889,4.444444,4.0,0.000000,45.055556,44.977222,44.990000,56.444444,545.555556,5.433333,0.0,0.0,0.0
792187,0.13,68.000000,0.000000,13.344444,1.144444,140.000000,0.0,425.4,10.95,3.444444,4.222222,4.0,0.000000,45.054444,44.977222,44.988333,56.500000,545.000000,5.416667,0.0,0.0,0.0
792188,0.53,68.000000,0.000000,-4.455556,1.116667,106.000000,0.0,425.4,10.95,3.888889,4.055556,4.0,0.000000,45.055000,44.976667,44.988889,56.777778,543.888889,5.433333,0.0,0.0,0.0
792189,1.82,68.000000,0.000000,3.483333,0.938889,179.833333,0.0,425.4,10.95,3.555556,4.277778,4.0,0.000000,45.053333,44.975000,44.988889,56.333333,543.888889,5.427778,0.0,0.0,0.0


## Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

linreg = {}
for key,value in train.items():
  linreg[key] = LinearRegression()
  linreg[key].fit(train[key], labels_train[key])
  pred = linreg[key].predict(test[key])
  print("### " + key + " ###\n")
  print('Coefficients: \n', linreg[key].coef_)
  # The mean squared error
  print('Mean squared error: %.2f'
        % mean_squared_error(labels_test[key], pred))
  # The coefficient of determination: 1 is perfect prediction
  print('Coefficient of determination: %.2f'
        % r2_score(labels_test[key], pred))

### 3Hmean ###

Coefficients: 
 [-7.65331687e-03  2.73400727e-04 -3.14606512e-03 -1.20520369e-03
  2.69467455e-02 -3.13727493e-05  3.26901194e-05 -1.44174628e-04
  3.20208493e-02  3.27582932e-02 -1.43345498e-02 -9.63340999e-03
 -7.26235875e-02 -9.21953329e-02 -1.89220059e-02  1.12820620e-01
  7.99518913e-03  5.92118213e-06 -1.02677576e-01  0.00000000e+00
  0.00000000e+00  0.00000000e+00]
Mean squared error: 0.26
Coefficient of determination: -0.03
### 3Hmedian ###

Coefficients: 
 [-1.20973177e-03  2.37992400e-04 -2.71882780e-03 -1.09716857e-03
  2.65005412e-02 -3.29976056e-05 -3.76921028e-05 -1.25927940e-04
  2.89543734e-02  2.11931062e-02 -1.49408512e-02 -6.36694111e-03
  1.90101738e-02 -7.17089006e-02 -1.76878483e-02  9.13385197e-02
  6.94261931e-03  5.51782240e-06 -9.35758074e-02  0.00000000e+00
  0.00000000e+00  0.00000000e+00]
Mean squared error: 0.26
Coefficient of determination: -0.03
### 6Hmedian ###

Coefficients: 
 [-1.09517381e-03  2.52843102e-04 -2.73024348e-03 -1.09877455

## Ridge Classification

In [None]:
from sklearn.linear_model import Ridge, RidgeClassifier
from sklearn.metrics import mean_squared_error, r2_score

ridge  = {}
for key,value in train.items():
  ridge[key] = RidgeClassifier()
  ridge[key].fit(train[key], labels_train[key])
  score = ridge[key].score(test[key], labels_test[key])
  print("### " + key + " ###\n")
  print('Coefficients: \n', ridge[key].coef_)
  # Score
  print('Score: %.2f'
        % score)

### 3Hmean ###

Coefficients: 
 [[-1.53062385e-02  5.46799217e-04 -6.29234232e-03 -2.41040257e-03
   5.38932263e-02 -6.27464409e-05  6.53853660e-05 -2.88347973e-04
   6.40418466e-02  6.55155186e-02 -2.86690883e-02 -1.92675835e-02
  -1.45231802e-01 -1.84157902e-01 -3.78443350e-02  2.25409326e-01
   1.59768886e-02  1.18415714e-05 -2.05224558e-01  0.00000000e+00
   0.00000000e+00  0.00000000e+00]]
Score: 0.49
### 3Hmedian ###

Coefficients: 
 [[-2.41936770e-03  4.75983419e-04 -5.43780018e-03 -2.19433482e-03
   5.30008676e-02 -6.59966449e-05 -7.53787707e-05 -2.51855732e-04
   5.79090045e-02  4.23857891e-02 -2.98816920e-02 -1.27341491e-02
   3.80121523e-02 -1.43240696e-01 -3.53761446e-02  1.82500753e-01
   1.38750513e-02  1.10350668e-05 -1.87052787e-01  0.00000000e+00
   0.00000000e+00  0.00000000e+00]]
Score: 0.49
### 6Hmedian ###

Coefficients: 
 [[-2.19014078e-03  5.05683428e-04 -5.46078793e-03 -2.19754450e-03
   5.31167977e-02 -6.25071342e-05 -7.81420241e-05 -2.54789246e-04
   5.8544561

## K-nearest neighbor

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import mean_squared_error, r2_score

neigh = {}
for key,value in train.items():
  neigh[key] = KNeighborsClassifier(n_neighbors=1)
  neigh[key].fit(train[key], labels_train[key])
  pred = neigh[key].predict(test[key])
  score = neigh[key].score(test[key], labels_test[key])
  print("### " + key + " ###\n")
  # Score
  print('Score: %.2f'
        % score)
  # The mean squared error
  print('Mean squared error: %.2f'
        % mean_squared_error(labels_test[key], pred))
  # The coefficient of determination: 1 is perfect prediction
  print('Coefficient of determination: %.2f'
        % r2_score(labels_test[key], pred))

### 3Hmean ###

Score: 0.49
Mean squared error: 0.51
Coefficient of determination: -1.04
