In [142]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score,KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import make_scorer, mean_squared_error

In [161]:
ccpp_df = pd.read_csv('CCPP.csv', index_col=False) 
print(data.head())

      AT      V       AP     RH      PE
0   8.34  40.77  1010.84  90.01  480.48
1  23.64  58.49  1011.40  74.20  445.75
2  29.74  56.90  1007.15  41.91  438.76
3  19.07  49.69  1007.22  76.79  453.09
4  11.80  40.66  1017.13  97.20  464.43


In [162]:
print(data.shape)
print(data.describe())

(9568, 5)
                AT            V           AP           RH           PE
count  9568.000000  9568.000000  9568.000000  9568.000000  9568.000000
mean     19.651231    54.305804  1013.259078    73.308978   454.365009
std       7.452473    12.707893     5.938784    14.600269    17.066995
min       1.810000    25.360000   992.890000    25.560000   420.260000
25%      13.510000    41.740000  1009.100000    63.327500   439.750000
50%      20.345000    52.080000  1012.940000    74.975000   451.550000
75%      25.720000    66.540000  1017.260000    84.830000   468.430000
max      37.110000    81.560000  1033.300000   100.160000   495.760000


In [163]:
print(data.isnull().sum())

AT    0
V     0
AP    0
RH    0
PE    0
dtype: int64


In [168]:
X = data[['AT', 'V', 'AP', 'RH']]  # Ambient Temperature, Pressure, etc.
y = data['PE']

In [169]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [170]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

y_pred = lr_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'MSE: {mse:.2f}')

#K-fold
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(lr_model, X, y, cv=kfold, scoring='neg_mean_squared_error')
mse_scores = -scores
print(f'K-fold cross validation MSE: {mse_scores.mean():.2f}')

MSE: 20.00
K-fold cross validation MSE: 20.81


In [171]:
svm_model = SVR(kernel='linear')
svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'MSE: {mse:.2f}')

#K-fold
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(svm_model, X, y, cv=kfold, scoring='neg_mean_squared_error')
mse_scores = -scores
print(f'K-fold cross validation MSE: {mse_scores.mean():.2f}')

MSE: 20.16
K-fold cross validation MSE: 20.93


In [172]:
tree_model = DecisionTreeRegressor(max_depth=10, random_state=42)
tree_model.fit(X_train, y_train)

y_pred=tree_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'MSE: {mse:.2f}')

#K-fold
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(tree_model, X, y, cv=kfold, scoring='neg_mean_squared_error')
mse_scores = -scores
print(f'K-fold cross validation MSE: {mse_scores.mean():.2f}')

MSE: 15.53
K-fold cross validation MSE: 17.41


In [174]:
#mlp_model = MLPRegressor(hidden_layer_sizes=(64, 64), max_iter=1000, random_state=42)
mlp_model = MLPRegressor(hidden_layer_sizes=(100,), max_iter=500, random_state=42)
mlp_model.fit(X_train, y_train)

y_pred=mlp_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'MSE: {mse:.2f}')

#K-fold
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(mlp_model, X, y, cv=kfold, scoring='neg_mean_squared_error')
mse_scores = -scores
print(f'K-fold cross validation MSE: {mse_scores.mean():.2f}')

MSE: 16.91
K-fold cross validation MSE: 24.93
