In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline


In [2]:
file_url = r'https://raw.githubusercontent.com/sedeba19/Chapter-7/main/data/Dataset_ccpp.csv'

In [3]:
df = pd.read_csv(file_url)
df

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.40,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.50,1009.23,96.62,473.90
...,...,...,...,...,...
9563,16.65,49.69,1014.01,91.00,460.03
9564,13.19,39.18,1023.67,66.78,469.62
9565,31.32,74.33,1012.92,36.48,429.57
9566,24.48,69.45,1013.86,62.39,435.74


In [4]:
# Extract the features and target
X = df.drop('PE', axis=1)
y = df['PE']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    random_state=0)

In [5]:
X.dtypes

AT    float64
V     float64
AP    float64
RH    float64
dtype: object

In [6]:
lr_model_1 = LinearRegression()
lr_model_1.fit(X_train, y_train)

In [7]:
# Make predictions
y_pred = lr_model_1.predict(X_test)

In [8]:
# Print r2 score
print('R2 score: ', r2_score(y_test, y_pred))

R2 score:  0.9325315554761303


In [9]:
# Create a pipeline
steps = [('scaler', MinMaxScaler()),
         ('poly', PolynomialFeatures(degree=3)),
         ('lr', LinearRegression())]

lr_model_2 = Pipeline(steps)
lr_model_2.fit(X_train, y_train)
print('lr_model_2 R2 score: {:.2f}'.format(lr_model_2.score(X_test, y_test)))

lr_model_2 R2 score: 0.94


In [10]:
# Generate predictions
y_pred_2 = lr_model_2.predict(X_test)
y_pred_2 

array([434.5928642 , 458.27799158, 460.51399362, ..., 469.81364303,
       439.48303543, 460.87686577])

In [11]:
# Print MSE of y_pred_2
print('Mean Absolute Error: {:.2f}'.format(mean_absolute_error(y_test, y_pred_2)))

Mean Absolute Error: 3.19


In [12]:
#Print coefficients
print('Coefficients: ', lr_model_2.named_steps['lr'].coef_)

Coefficients:  [ 7.72661789e-14 -1.77278028e+02 -4.60337188e+01 -1.60520675e+02
 -1.23076123e+02  6.23358210e+00  8.19655844e+00  1.45478576e+02
  1.88658651e+02  2.43740192e+01  1.80553150e+02 -1.08058561e+02
  1.09713294e+02  1.79121906e+02  1.06460596e+02  2.67290613e+01
  7.79833654e+01  3.69241324e+01 -1.13863997e+02 -1.42673215e+02
 -9.69606773e+01  1.90706809e+02 -5.56429546e+01 -1.32595225e+02
 -9.41682917e+01  9.40112729e+01 -1.18732510e+02 -7.64871610e+01
 -4.18714081e+01  6.36772260e+01  4.42340977e+01 -3.81114691e+01
 -4.71547759e+01 -9.16797074e+01 -2.52346805e+01]


In [13]:
len(lr_model_2.named_steps['lr'].coef_)

35

In [14]:
# Make new Linear Regression model
steps = [('scaler', MinMaxScaler()),
         ('poly', PolynomialFeatures(degree=10)),
         ('lr', LinearRegression())]

lr_model_3 = Pipeline(steps)
lr_model_3.fit(X_train, y_train)
print('lr_model_3 R2 score: {:.2f}'.format(lr_model_3.score(X_test, y_test)))

lr_model_3 R2 score: 0.57


In [15]:
# Evaluate the model
y_pred_3 = lr_model_3.predict(X_test)
print('Mean Absolute Error: {:.2f}'.format(mean_absolute_error(y_test, y_pred_3)))

Mean Absolute Error: 3.53


In [16]:
print(len(lr_model_3[-1].coef_))

1001


In [17]:
steps = [('scaler', MinMaxScaler()),
         ('poly', PolynomialFeatures(degree=10)),
         ('lr', Ridge(alpha=0.9))]

In [18]:
ridge_model = Ridge(alpha=0.9)

# Make a pipeline
ridge_moodel = Pipeline(steps)
ridge_model.fit(X_train, y_train)


In [19]:
print('R2 score: {:.2f}'.format(ridge_model.score(X_test, y_test)))

R2 score: 0.93


In [20]:
ridge_pred = ridge_model.predict(X_test)
ridge_pred

array([431.42757933, 458.56133442, 462.75277407, ..., 469.51838994,
       442.41772121, 461.88284002])

In [21]:
# Print MSE
print('Mean Absolute Error: {:.2f}'.format(mean_absolute_error(y_test, ridge_pred)))

Mean Absolute Error: 3.57


In [22]:
# Print coefficients
print('Coefficients: ', ridge_model[-1].coef_)

TypeError: 'Ridge' object is not subscriptable