# Preparation

In [147]:
import pandas as pd
import seaborn as sns
import numpy as np
from fast_ml.model_development import train_valid_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score
from sklearn import metrics

### Import Data

In [148]:
df = pd.read_csv('C:/Users/xxxx/CCPP_data.csv')
df.rename(columns={"AT":"Temperatur"}, inplace=True)
df.rename(columns={"V":"Exhaust Vacuum"}, inplace=True)
df.rename(columns={"AP":"Ambient Pressure"}, inplace=True)
df.rename(columns={"RH":"Relative Humidity"}, inplace=True)
df.rename(columns={"PE":"PE"}, inplace=True)
df.head(3)

Unnamed: 0,Temperatur,Exhaust Vacuum,Ambient Pressure,Relative Humidity,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56


In [149]:
data_ml = df [["Temperatur", "Exhaust Vacuum","PE"]]
data_ml.head(3)

Unnamed: 0,Temperatur,Exhaust Vacuum,PE
0,14.96,41.76,463.26
1,25.18,62.96,444.37
2,5.11,39.4,488.56


### Splitting Dataset

In [150]:
X_train, y_train, X_valid, y_valid, X_test, y_test = train_valid_test_split(data_ml, 
                                                                            target = 'PE', 
                                                                            train_size=0.6, 
                                                                            valid_size=0.2, 
                                                                            test_size=0.2)

for data in [X_train, y_train, X_valid, y_valid, X_test, y_test]:
    data.reset_index(drop=True, inplace=True)
    
X_train.head(3)

Unnamed: 0,Temperatur,Exhaust Vacuum
0,30.2,56.9
1,13.08,39.82
2,23.46,59.27


## Linear Regression Model

### Data Preparation

In [151]:
X_train_01 = X_train['Temperatur']
X_train_01.head(3)

0    30.20
1    13.08
2    23.46
Name: Temperatur, dtype: float64

In [152]:
y_train_01 = y_train.values.reshape(-1, 1)

In [153]:
X_train_01 = X_train_01.values.reshape(-1, 1)

### Regression Model Training

In [154]:
model = LinearRegression()

In [155]:
model.fit(X_train_01, y_train_01)
print(model.coef_)

[[-2.16456498]]


### Regression Model Validation

In [156]:
X_valid_01 = X_valid['Temperatur']
X_valid_01.head(3)

0    21.02
1    28.64
2     8.84
Name: Temperatur, dtype: float64

In [157]:
X_valid_01 = X_valid_01.values.reshape(-1, 1)

In [158]:
predictions = model.predict(X_valid_01)

## Linear Regression Model Metrics

In [159]:
y_valid_01 = y_valid

### MAE

In [160]:
metrics.mean_absolute_error(y_valid_01, predictions)

4.373439308695175

### MSE



In [178]:
mse_linear = metrics.mean_squared_error(y_valid_01, predictions)
print(mse_linear)

30.446436549331995


### R2

In [174]:
r2_linear = model.score(X_valid_01, y_valid_01)
print(f"R^2 Score: {r2_linear}")

R^2 Score: 0.8958763953289539


## Polynominal Regression

### Data Preparation

In [163]:
X_train_02 = X_train['Exhaust Vacuum']
X_train_02.head(3)

0    56.90
1    39.82
2    59.27
Name: Exhaust Vacuum, dtype: float64

### Polynominal Regression Training Model

In [164]:
model_poly = np.poly1d(np.polyfit(X_train_02, y_train, 2))

### Polynominal Regression Validation 

In [165]:
X_valid_02 = X_valid['Exhaust Vacuum']
X_valid_02.head(3)

0    61.50
1    73.40
2    42.49
Name: Exhaust Vacuum, dtype: float64

In [166]:
predictions_02 = model_poly(X_valid_02)
predictions_02

array([443.33025544, 435.14580056, 468.29143999, ..., 435.02221499,
       440.35983091, 435.84160143])

## Polynominal Regression Model Metrics

In [167]:
y_valid_02 = y_valid

### MSE

In [175]:
mse_polynominal = ((predictions_02 - y_valid_02) ** 2).mean()
print(f"Mean Squared Error: {mse_polynominal}")

Mean Squared Error: 64.49733336339946


### R2

In [170]:
from sklearn.metrics import r2_score

In [173]:
r2_polynominal = (r2_score(y_valid_02, predictions_02))
print(r2_polynominal)

0.7794259163765872


# Comparison

In [179]:
comparetable = {
    'Model': ['Linear', 'Polynomial'],
    'MSE': [mse_linear, mse_polynominal],
    'R2': [r2_linear, r2_polynominal]
}

ct = pd.DataFrame(comparetable)

In [180]:
print(ct)

        Model        MSE        R2
0      Linear  30.446437  0.895876
1  Polynomial  64.497333  0.779426


# Testing

In [184]:
X_test_01 = X_test['Temperatur']
X_test_01.head(3)

0    28.56
1    14.59
2    19.92
Name: Temperatur, dtype: float64

In [185]:
X_test_01 = X_test_01.values.reshape(-1, 1)

In [186]:
predictions_test = model.predict(X_test_01)

### Testing Metrics

In [187]:
mae_test = metrics.mean_absolute_error(y_test, predictions_test)

In [193]:
mse_test

28.833054324843825

In [197]:
mse_test = metrics.mean_squared_error(y_test, predictions_test)

In [196]:
rmse_test = np.sqrt(metrics.mean_squared_error(y_test, predictions_test))

In [194]:
r2_score_test = model.score(X_test_01, y_test)
print(f"R^2 Score: {r2_score}")

R^2 Score: 0.9013428652428742


In [None]:
testing

In [198]:
testing_result = {
    'Model': ['Linear'],
    'MAE': [mae_test],
    'MSE': [mse_test],
    'RMSE': [rmse_test],
    'R2': [r2_score_test]
}

tr = pd.DataFrame(testing_result)

In [199]:
print(tr)

    Model       MAE        MSE      RMSE        R2
0  Linear  4.256454  28.833054  5.369642  0.901343
