In [1]:
import pandas as pd
from sklearn.linear_model import Lasso, LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures

In [2]:
file_url = r'https://raw.githubusercontent.com/sedeba19/Chapter-7/main/data/Dataset_ccpp.csv'

In [3]:
df = pd.read_csv(file_url)
df.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


In [4]:
df.shape

(9568, 5)

In [5]:
# Extract the features and target variables
X = df.drop('PE', axis=1)
y = df['PE']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=123)

In [6]:
# Instantiate the model
lr_model_1 = LinearRegression()

# Fit the model
lr_model_1.fit(X_train, y_train)

In [7]:
# Make predictions
y_pred = lr_model_1.predict(X_test)

In [8]:
# Print the R-squared score
print('R-squared:', lr_model_1.score(X_test, y_test))

R-squared: 0.9235430062617527


In [9]:
# Print MSE
print('MSE:', mean_squared_error(y_test, y_pred))

MSE: 22.127759402996038


In [10]:
# Create an instance pipeline
pipe = Pipeline([('scaler', MinMaxScaler()),    # Step 1 - MinMaxScaler
                    ('poly', PolynomialFeatures(degree=3)),    # Step 2 - PolynomialFeatures
                    ('model', LinearRegression())])    # Step 3 - LinearRegression


In [11]:
lr_model_2 = pipe.fit(X_train, y_train)
lr_model_2

In [12]:
# Print the R-squared score
print('R-squared:', lr_model_2.score(X_test, y_test))

R-squared: 0.9375181912403223


In [13]:
# Print MSE
print('MSE:', mean_squared_error(y_test, y_pred))

MSE: 22.127759402996038


In [14]:
print(lr_model_2[-1].coef_)

[-3.88486786e-14 -1.28063628e+02 -5.36169911e+01 -1.22539281e+02
 -9.39408626e+01 -3.59677531e+01  2.67615054e+01  1.06097975e+02
  1.10951383e+02  1.94557592e+01  1.58160448e+02 -6.86802906e+01
  8.45109541e+01  1.33020061e+02  9.00958950e+01  4.24402554e+01
  6.07845682e+01  3.42906306e+01 -7.06656384e+01 -1.35286712e+02
 -7.77605751e+01  1.53693557e+02 -6.00331944e+01 -7.69209131e+01
 -6.52673474e+01  9.13616369e+01 -1.10377988e+02 -6.99554923e+01
 -1.26257830e+01  3.24544132e+01  2.88409930e+01 -3.11162533e+01
 -3.70888784e+01 -7.15098090e+01 -2.46082421e+01]


In [20]:
print(len(lr_model_2[-1].coef_))

35


In [27]:
# Create an instance pipeline
pipe_1 = Pipeline([('scaler', MinMaxScaler()),    # Step 1 - MinMaxScaler
                    ('poly', PolynomialFeatures(degree=10)),    # Step 2 - PolynomialFeatures
                    ('lr', LinearRegression())])    # Step 3 - Lasso

lr_model_3 = pipe_1.fit(X_train, y_train)
lr_model_3

In [28]:
print('R-squared:', lr_model_3.score(X_test, y_test))

R-squared: 0.17465651273114902


In [29]:
lr_model_3.predict(X_test)

array([453.85801041, 439.93600248, 468.11832097, ..., 435.91778396,
       444.80724977, 493.40503216])

In [30]:
print('MSE:', mean_squared_error(y_test, lr_model_3.predict(X_test)))

MSE: 238.86633803100813


Create a Lasso Model

In [31]:
# Creating a Pipeline Lasso Regression model
pipe_2 = Pipeline([('scaler', MinMaxScaler()),    # Step 1 - MinMaxScaler
                    ('poly', PolynomialFeatures(degree=10)),    # Step 2 - PolynomialFeatures
                    ('lasso', Lasso(alpha=0.01))])    # Step 3 - Lasso

# fit the model
Lasso_model = pipe_2.fit(X_train, y_train)

In [32]:
# Print the R-squared score
print('R-squared:', Lasso_model.score(X_test, y_test))

R-squared: 0.9337706120502614


In [33]:
# Print MSE
print('MSE:', mean_squared_error(y_test, Lasso_model.predict(X_test)))

MSE: 19.167742417086277


In [34]:
# Print the coefficients
print(Lasso_model[-1].coef_)

[  0.         -68.44077794 -12.09742794 ...  -0.          -0.59789808
  -0.        ]


In [35]:
len(Lasso_model[-1].coef_)

1001