In [433]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,r2_score
import warnings
warnings.filterwarnings('ignore')

In [434]:
data = pd.read_excel("test.xlsx")
data.head()

Unnamed: 0,hour,date_miladi,date_shamsi,code,unit_no,fuel_type,mvar,temp,moisture,power
0,1,2020-01-13,1398/10/23,SO,1,A,11,3,94,119
1,2,2020-01-13,1398/10/23,SO,1,A,11,3,96,119
2,3,2020-01-13,1398/10/23,SO,1,A,10,2,95,120
3,4,2020-01-13,1398/10/23,SO,1,A,11,2,95,120
4,5,2020-01-13,1398/10/23,SO,1,A,11,2,95,121


In [435]:
data=data[data["power"]>100]
data.shape

(756, 10)

In [436]:
# Change object to integer:
data["fuel_type"][data["fuel_type"]=="A"] = 1; 
data["fuel_type"][data["fuel_type"]=="B"] = 2;
data["fuel_type"][data["fuel_type"]=="C"] = 3;

In [437]:
data['fuel_type'].value_counts()

1    508
Name: fuel_type, dtype: int64

In [438]:
data["fuel_type"].fillna(method='bfill', inplace=True)

In [439]:
data.isnull().sum()

hour           0
date_miladi    0
date_shamsi    0
code           0
unit_no        0
fuel_type      0
mvar           0
temp           0
moisture       0
power          0
dtype: int64

In [440]:
X = data[["moisture","unit_no","temp","fuel_type"]]
y = data[["power"]]
y

Unnamed: 0,power
0,119
1,119
2,120
3,120
4,121
...,...
994,110
995,127
996,127
997,124


In [441]:
X

Unnamed: 0,moisture,unit_no,temp,fuel_type
0,94,1,3,1
1,96,1,3,1
2,95,1,2,1
3,95,1,2,1
4,95,1,2,1
...,...,...,...,...
994,6,6,15,1
995,8,6,16,1
996,21,6,18,1
997,11,6,19,1


In [442]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

In [443]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

## Tuning Hyperparameters by using GridSearchCV

In [444]:
# Define the SVR model
svr = SVR()

# Define the hyperparameters to tune
param_grid = {'kernel': ['linear', 'rbf', 'poly'],
              'C': [0.001,0.01,0.1, 1, 10,100,1000],
              'epsilon': [0.001,0.01,0.1, 1, 10,100,1000]}

# Perform grid search to find the best hyperparameters
grid_search = GridSearchCV(svr, param_grid, cv=5)
grid_search.fit(X_train_scaled, y_train)

# Print the best hyperparameters
print("Best hyperparameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)


Best hyperparameters: {'C': 1000, 'epsilon': 10, 'kernel': 'rbf'}
Best score: 0.44384067195641014


# Evaluation

In [445]:
svr = SVR(C=1000, epsilon=10, kernel='rbf')
svr.fit(X_train_scaled,y_train)
y_pred=svr.predict(X_test_scaled)

In [446]:
mse=mean_squared_error(y_pred,y_test)
r2=r2_score(y_pred,y_test)
print("Mean squared error:",mse)
print("R-squared:",r2)

Mean squared error: 98.46952455407842
R-squared: 0.013041958241727714


In [453]:
row = np.array([45,6,30,1])
X_test_scaled = scaler.transform([row])
y_pred = svr.predict(X_test_scaled)
print(y_pred[0])

123.16746157902622


### Evaluation without scaling

In [None]:
# we can see the result without scaling.
svr2 =SVR(C=1000, epsilon=10, kernel='rbf')
svr2.fit(X_train,y_train)
y_pred=svr2.predict(X_test)

In [None]:
mse=mean_squared_error(y_pred,y_test)
r2=r2_score(y_pred,y_test)
print("Mean squared error:",mse)
print("R-squared:",r2)