In [1]:
import numpy as np
import pandas as pd
from sklearn import model_selection, metrics
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import make_scorer
import timeit

In [2]:
Data = pd.read_excel("C:/Users/ftum/Downloads/Data diolah darin.xlsx")

In [3]:
Data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37528 entries, 0 to 37527
Data columns (total 30 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Backhoe      37528 non-null  int64  
 1   HP(watt)     37528 non-null  float64
 2   Norm_MAP     37528 non-null  float64
 3   RPM          37527 non-null  float64
 4   Age          37528 non-null  int64  
 5   Load_Factor  37528 non-null  float64
 6   Engine_Tier  37528 non-null  int64  
 7   TEMP[C]      37527 non-null  float64
 8   Fuel[g/s]    37528 non-null  float64
 9   NOx[g/s]     37528 non-null  float64
 10  HC[g/s]      37528 non-null  float64
 11  CO[g/s]      37528 non-null  float64
 12  CO2[g/s]     37528 non-null  float64
 13  PM[mg/s]     37528 non-null  float64
 14  Nox (g/l)    37528 non-null  float64
 15  HC (g/l)     37528 non-null  float64
 16  CO (g/l)     37528 non-null  float64
 17  CO2 (g/l)    37528 non-null  float64
 18  PM (g/l)     37528 non-null  float64
 19  Fuel

In [4]:
Data1 = Data.copy()
Data1.dropna(inplace=True)

X = Data1[['Backhoe', 'HP(watt)', 'Norm_MAP', 'RPM', 'Age', 'Load_Factor', 'Engine_Tier']]
y = Data1['CO[g/s]']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [5]:
gbr = GradientBoostingRegressor(learning_rate=0.1, 
                                n_estimators=100, 
                                subsample=1.0, 
                                min_samples_split=2, 
                                min_samples_leaf=1, 
                                max_depth=3)

gbr.fit(X_train, y_train.values.ravel())
y_pred = gbr.predict(X_train)

print('Mean Absolute Error:', metrics.mean_absolute_error(y_train, y_pred))  
print('Mean Squared Error:', metrics.mean_squared_error(y_train, y_pred))  
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_train, y_pred)))

Mean Absolute Error: 0.003769074881082962
Mean Squared Error: 0.00020219256935264843
Root Mean Squared Error: 0.014219443355935155


## Particle Swarm Optimization

In [6]:
start = timeit.default_timer() # catat waktu mulai

# Import PSO
from pyMetaheuristic.algorithm import particle_swarm_optimization

# Define parameters to be tuned and their ranges
X = X_train
y = y_train

def create_fitness_function(X, y):
    def fitness_function(variables):
        learning_rate=variables[0]
        n_estimators=int(round(variables[1]))
        subsample=variables[2]
        min_samples_split=int(round(variables[3]))
        min_samples_leaf=int(round(variables[4]))
        max_depth=int(round(variables[5]))
        
        gbr_tune = GradientBoostingRegressor(learning_rate=learning_rate, n_estimators=n_estimators, subsample=subsample, 
                                             min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, max_depth=max_depth)
        gbr_tune.fit(X, y)
        y_pred = gbr_tune.predict(X)
        scores = np.sqrt(mean_squared_error(y, y_pred))
        return scores
    return fitness_function


fitness_function = create_fitness_function(X, y)

# Run PSO
parameters = {
    'swarm_size': 20,
    'min_values': [0.01, 10, 0.5, 2, 1, 1],  # min values for learning_rate, n_estimators, subsample, min_samples_split, min_samples_leaf, max_depth
    'max_values': [1.0, 50, 1.0, 20, 20, 50],   # max values for learning_rate, n_estimators, subsample, min_samples_split, min_samples_leaf, max_depth
    'iterations': 1000,
    'decay': 0, 
    'w': 0.9, 
    'c1': 2, 
    'c2': 2, 
    'verbose': True, 
    'start_init': None,
    'target_value': None
}
pso = particle_swarm_optimization(target_function = fitness_function, **parameters)

# Print Solution
variables = pso[:-1]
minimum   = pso[ -1]
print('Variables: ', np.around(variables, 4) , ' Minimum Value Found: ', round(minimum, 4) )

stop = timeit.default_timer() # catat waktu selesai
lama_eksekusi = stop - start # lama eksekusi dalam satuan detik
print("Lama eksekusi: ",lama_eksekusi,"detik")

Iteration =  0  f(x) =  0.004217894213897634
Iteration =  1  f(x) =  0.003975055422934484
Iteration =  2  f(x) =  0.003907455091366977
Iteration =  3  f(x) =  0.00390745486806772
Iteration =  4  f(x) =  0.003907454868067715
Iteration =  5  f(x) =  0.003907454868067715
Iteration =  6  f(x) =  0.003907454868067715
Iteration =  7  f(x) =  0.003907454868067715
Iteration =  8  f(x) =  0.003907454868067715
Iteration =  9  f(x) =  0.003907454868067715
Iteration =  10  f(x) =  0.003907454868067715
Iteration =  11  f(x) =  0.003907454868067715
Iteration =  12  f(x) =  0.003907454868067715
Iteration =  13  f(x) =  0.003907454868067715
Iteration =  14  f(x) =  0.003907454868067715
Iteration =  15  f(x) =  0.003907454868067715
Iteration =  16  f(x) =  0.003907454868067715
Iteration =  17  f(x) =  0.003907454868067715
Iteration =  18  f(x) =  0.003907454868067715
Iteration =  19  f(x) =  0.003907454868067715
Iteration =  20  f(x) =  0.003907454868067715
Iteration =  21  f(x) =  0.003907454868067715

In [7]:
pip install pymetaheuristic

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.
