In [291]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,r2_score
import warnings
warnings.filterwarnings('ignore')

In [292]:
data = pd.read_excel("test.xlsx")
data.head()

Unnamed: 0,hour,date_miladi,date_shamsi,code,unit_no,fuel_type,mvar,temp,moisture,power
0,1,2020-01-13,1398/10/23,SO,1,A,11,3,94,119
1,2,2020-01-13,1398/10/23,SO,1,A,11,3,96,119
2,3,2020-01-13,1398/10/23,SO,1,A,10,2,95,120
3,4,2020-01-13,1398/10/23,SO,1,A,11,2,95,120
4,5,2020-01-13,1398/10/23,SO,1,A,11,2,95,121


In [293]:
data=data[data["power"]>100]
data.shape

(756, 10)

In [294]:
# Change object to integer:
data["fuel_type"][data["fuel_type"]=="A"] = 1; 
data["fuel_type"][data["fuel_type"]=="B"] = 2;
data["fuel_type"][data["fuel_type"]=="C"] = 3;

In [295]:
data['fuel_type'].value_counts()

1    508
Name: fuel_type, dtype: int64

In [296]:
data["fuel_type"].fillna(method='bfill', inplace=True)

In [297]:
data.isnull().sum()

hour           0
date_miladi    0
date_shamsi    0
code           0
unit_no        0
fuel_type      0
mvar           0
temp           0
moisture       0
power          0
dtype: int64

In [298]:
X = data[["moisture","unit_no","temp","fuel_type"]]
y = data[["power"]]
y

Unnamed: 0,power
0,119
1,119
2,120
3,120
4,121
...,...
994,110
995,127
996,127
997,124


In [299]:
X

Unnamed: 0,moisture,unit_no,temp,fuel_type
0,94,1,3,1
1,96,1,3,1
2,95,1,2,1
3,95,1,2,1
4,95,1,2,1
...,...,...,...,...
994,6,6,15,1
995,8,6,16,1
996,21,6,18,1
997,11,6,19,1


In [300]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

In [301]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

In [302]:
# Define Gradient Boosting Regression model
gb_model = GradientBoostingRegressor()

In [303]:
# Define hyperparameters to tune
params = {
'learning_rate': [0.01, 0.1, 1],
'max_depth': [3, 5, 7],
'n_estimators': [50, 100, 200]
}

In [304]:
# Perform Grid Search Cross Validation
grid_search = GridSearchCV(gb_model, params, cv=5)
grid_search.fit(X_train_scaled, y_train)

GridSearchCV(cv=5, estimator=GradientBoostingRegressor(),
             param_grid={'learning_rate': [0.01, 0.1, 1],
                         'max_depth': [3, 5, 7],
                         'n_estimators': [50, 100, 200]})

In [305]:
# Print the best hyperparameters 
print(grid_search.best_params_)

{'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 50}


In [306]:
gb_model = GradientBoostingRegressor(n_estimators=50,learning_rate=0.1,max_depth=5,random_state=42)
gb_model.fit(X_train_scaled,y_train)
y_pred=gb_model.predict(X_test_scaled)

In [307]:
mse=mean_squared_error(y_pred,y_test)
r2=r2_score(y_pred,y_test)
print("Mean squared error:",mse)
print("R-squared:",r2)

Mean squared error: 76.31266834702195
R-squared: 0.3372013016469434


In [308]:
y_pred = gb_model.predict([[45,1,30,1]])
print(y_pred[0])

124.76207448393717


In [309]:
# Define Gradient Boosting Regression model
gb_model2 = GradientBoostingRegressor()
# Perform Grid Search Cross Validation
grid_search = GridSearchCV(gb_model2, params, cv=5)
grid_search.fit(X_train, y_train)
print(grid_search.best_params_)

{'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 50}


In [310]:
gb_model2 = GradientBoostingRegressor(n_estimators=50,learning_rate=0.1,max_depth=5,random_state=42)
gb_model2.fit(X_train,y_train)
y_pred=gb_model2.predict(X_test)

In [311]:
mse=mean_squared_error(y_pred,y_test)
r2=r2_score(y_pred,y_test)
print("Mean squared error:",mse)
print("R-squared:",r2)

Mean squared error: 57.36404548268841
R-squared: 0.4988703682124188


In [313]:
y_pred = gb_model2.predict([[45,1,30,1]])
print(y_pred[0])

110.1424815276293
