In [145]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.linear_model import ElasticNet
import warnings
warnings.filterwarnings('ignore')

In [146]:
data = pd.read_excel("test.xlsx")
data.head()

Unnamed: 0,hour,date_miladi,date_shamsi,code,unit_no,fuel_type,mvar,temp,moisture,power
0,1,2020-01-13,1398/10/23,SO,1,A,11,3,94,119
1,2,2020-01-13,1398/10/23,SO,1,A,11,3,96,119
2,3,2020-01-13,1398/10/23,SO,1,A,10,2,95,120
3,4,2020-01-13,1398/10/23,SO,1,A,11,2,95,120
4,5,2020-01-13,1398/10/23,SO,1,A,11,2,95,121


In [147]:
data=data[data["power"]>100]
data.shape

(756, 10)

In [148]:
# Change object to integer:
data["fuel_type"][data["fuel_type"]=="A"] = 1; 
data["fuel_type"][data["fuel_type"]=="B"] = 2;
data["fuel_type"][data["fuel_type"]=="C"] = 3;

In [149]:
data['fuel_type'].value_counts()

1    508
Name: fuel_type, dtype: int64

In [150]:
data["fuel_type"].fillna(method='bfill', inplace=True)

In [151]:
data.isnull().sum()

hour           0
date_miladi    0
date_shamsi    0
code           0
unit_no        0
fuel_type      0
mvar           0
temp           0
moisture       0
power          0
dtype: int64

In [152]:
X = data[["moisture","unit_no","temp","fuel_type"]]
y = data[["power"]]
y

Unnamed: 0,power
0,119
1,119
2,120
3,120
4,121
...,...
994,110
995,127
996,127
997,124


In [153]:
X

Unnamed: 0,moisture,unit_no,temp,fuel_type
0,94,1,3,1
1,96,1,3,1
2,95,1,2,1
3,95,1,2,1
4,95,1,2,1
...,...,...,...,...
994,6,6,15,1
995,8,6,16,1
996,21,6,18,1
997,11,6,19,1


In [154]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

In [155]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [156]:
# Training the Decision Tree Regression model on the whole dataset
model = ElasticNet()
model.fit(X_train, y_train)
y_pred=model.predict(X_test)

In [157]:
mse=mean_squared_error(y_pred,y_test)
r2=r2_score(y_pred,y_test)
print("Mean squared error:",mse)
print("R-squared:",r2)

Mean squared error: 140.39720868307649
R-squared: -11.003369554909197


# Tuning the hyperparameters

### with GridSearchView

In [None]:
# Define the SVR model
model1 = ElasticNet()

# Define the hyperparameters to tune
param_grid = {'alpha': np.logspace(-4, 4, 100),
              'l1_ratio': np.linspace(0, 1, 100)}

# Perform grid search to find the best hyperparameters
grid_search = GridSearchCV(model1, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Print the best hyperparameters
print("Best hyperparameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

In [None]:
regressor =ElasticNet(**grid_search.best_params_)
regressor.fit(X_train, y_train)
y_pred=regressor.predict(X_test)

In [None]:
mse=mean_squared_error(y_pred,y_test)
r2=r2_score(y_pred,y_test)
print("Mean squared error:",mse)
print("R-squared:",r2)

### with RandomizedSearchCV

In [None]:
model2 = ElasticNet()
search = RandomizedSearchCV(model2, param_grid, cv=5, n_iter=50, random_state=42)

In [None]:
search.fit(X_train, y_train)

In [None]:
print("Best hyperparameters: ", search.best_params_)
print("Best score: ", search.best_score_)

In [None]:
regressor =ElasticNet(l1_ratio=0.9090909090909092,alpha= 0.1176811952434999)
regressor.fit(X_train, y_train)
y_pred=regressor.predict(X_test)

In [None]:
mse=mean_squared_error(y_pred,y_test)
r2=r2_score(y_pred,y_test)
print("Mean squared error:",mse)
print("R-squared:",r2)

# Trying dimension reduction for the model

## PCA

In [None]:
# Apply PCA to reduce the number of features to 3
pca = PCA(n_components=3)
X_train_pca = pca.fit_transform(X_train)

In [None]:
plt.figure(figsize=(8,6))
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlim(0,14,1)
plt.xlabel('Number of components')
plt.ylabel('Cumulative explained variance')
plt.show()

In [None]:
cumulative=np.cumsum(pca.explained_variance_ratio_)
plt.step([i for i in range(len(cumulative))],cumulative)
plt.show()

In [None]:
pca=PCA().fit(X_train)
print(pca.explained_variance_ratio_)
print()


In [None]:
pipeline = Pipeline([
    ('pca', PCA(n_components=4)),
    ('dt', ElasticNet(**grid_search.best_params_))
])

In [None]:
pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)

In [None]:
# Calculate Mean Squared Error (MSE) of the predictions 
mse=mean_squared_error(y_pred,y_test)
r2=r2_score(y_pred,y_test)
print("Mean squared error:",mse)
print("R-squared:",r2)

## t-SNE

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)
tsne = TSNE(n_components=3, random_state=42)

In [None]:
X_train = tsne.fit_transform(X_train)
X_test = tsne.fit_transform(X_test)

In [None]:
regressor =ElasticNet(l1_ratio=0.9090909090909092,alpha= 0.1176811952434999)
regressor.fit(X_train, y_train)
y_pred=regressor.predict(X_test)
len(y_test)

In [None]:
mse=mean_squared_error(y_pred,y_test)
r2=r2_score(y_pred,y_test)
print("Mean squared error:",mse)
print("R-squared:",r2)