In [595]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,r2_score
import warnings
warnings.filterwarnings('ignore')

In [596]:
data = pd.read_excel("test.xlsx")
data.head()

Unnamed: 0,hour,date_miladi,date_shamsi,code,unit_no,fuel_type,mvar,temp,moisture,power
0,1,2020-01-13,1398/10/23,SO,1,A,11,3,94,119
1,2,2020-01-13,1398/10/23,SO,1,A,11,3,96,119
2,3,2020-01-13,1398/10/23,SO,1,A,10,2,95,120
3,4,2020-01-13,1398/10/23,SO,1,A,11,2,95,120
4,5,2020-01-13,1398/10/23,SO,1,A,11,2,95,121


In [597]:
data=data[data["power"]>100]
data.shape

(756, 10)

In [598]:
# Change object to integer:
data["fuel_type"][data["fuel_type"]=="A"] = 1; 
data["fuel_type"][data["fuel_type"]=="B"] = 2;
data["fuel_type"][data["fuel_type"]=="C"] = 3;

In [599]:
data['fuel_type'].value_counts()

1    508
Name: fuel_type, dtype: int64

In [600]:
data["fuel_type"].fillna(method='bfill', inplace=True)

In [601]:
data.isnull().sum()

hour           0
date_miladi    0
date_shamsi    0
code           0
unit_no        0
fuel_type      0
mvar           0
temp           0
moisture       0
power          0
dtype: int64

In [602]:
X = data[["moisture","unit_no","temp","fuel_type"]]
y = data[["power"]]
y

Unnamed: 0,power
0,119
1,119
2,120
3,120
4,121
...,...
994,110
995,127
996,127
997,124


In [603]:
X

Unnamed: 0,moisture,unit_no,temp,fuel_type
0,94,1,3,1
1,96,1,3,1
2,95,1,2,1
3,95,1,2,1
4,95,1,2,1
...,...,...,...,...
994,6,6,15,1
995,8,6,16,1
996,21,6,18,1
997,11,6,19,1


In [604]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

In [605]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

In [606]:
# Define Ridge Regression model with alpha=0.5 (regularization strength)
lasso_model = Lasso(alpha=0.5)

In [607]:
# Fit the model to the training data 
lasso_model.fit(X_train_scaled, y_train)

Lasso(alpha=0.5)

In [608]:
# Make predictions on the testing data 
y_pred = lasso_model.predict(X_test_scaled)

In [609]:
# Calculate Mean Squared Error (MSE) of the predictions 
mse=mean_squared_error(y_pred,y_test)
r2=r2_score(y_pred,y_test)
print("Mean squared error:",mse)
print("R-squared:",r2)

Mean squared error: 138.13446436970685
R-squared: -4.248850868354252


# Tuning hyperparameters

In [610]:
# Define the Ridge model
lasso_model = Lasso()

# Define the hyperparameters to tune
param_grid = {'selection': ['cyclic','random'],
              'alpha': [0.01,0.1, 1, 10,100]}

# Perform grid search to find the best hyperparameters
grid_search = GridSearchCV(lasso_model, param_grid, cv=5)
grid_search.fit(X_train_scaled, y_train)

# Print the best hyperparameters
print("Best hyperparameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

Best hyperparameters: {'alpha': 0.1, 'selection': 'random'}
Best score: 0.2027331343084608


# Testing Tuned hyperparameters

In [631]:
# Define Ridge Regression model with hyperparameters
lasso_model = Lasso(alpha=0.1,selection='cyclic')

In [632]:
# Fit the model to the training data 
lasso_model.fit(X_train_scaled, y_train)

Lasso(alpha=0.1)

In [633]:
# Make predictions on the testing data 
y_pred = lasso_model.predict(X_test_scaled)

# Evaluation

In [634]:
# Calculate Mean Squared Error (MSE) of the predictions 
mse=mean_squared_error(y_pred,y_test)
r2=r2_score(y_pred,y_test)
print("Mean squared error:",mse)
print("R-squared:",r2)

Mean squared error: 138.56354516902232
R-squared: -3.5080319300757923
