In [25]:
import optuna
import time
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

from sklearn.metrics import accuracy_score, precision_score, recall_score

# Hospital Readmissions (Classification)

In [26]:
readmissions = pd.read_csv('readmissions_clean.csv')

# Split dataset into X and Y
X = readmissions.drop(['readmitted'], axis=1)
y = readmissions.readmitted

# splitting X and Y into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=3, test_size=0.2)

def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 2, 500,1)
    max_depth = trial.suggest_int('max_depth', 1, 50,1, log=True)
    max_features = trial.suggest_int('max_features',3,10,1)
    
    rf = RandomForestClassifier(n_estimators, max_depth= max_depth,\
                                                 max_features= max_features)
    rf.fit(X_train, y_train)
    
    return rf.score(X_test, y_test)
start = time.time()                                
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)
end = time.time()

print("Optuna execution time: ", (end-start), "seconds")

[32m[I 2023-04-20 11:14:27,862][0m A new study created in memory with name: no-name-e2da967f-51e6-486d-b78c-80d790ab8a1c[0m
[32m[I 2023-04-20 11:14:39,386][0m Trial 0 finished with value: 0.6048 and parameters: {'n_estimators': 402, 'max_depth': 17, 'max_features': 10}. Best is trial 0 with value: 0.6048.[0m
[32m[I 2023-04-20 11:14:42,501][0m Trial 1 finished with value: 0.5934 and parameters: {'n_estimators': 132, 'max_depth': 20, 'max_features': 7}. Best is trial 0 with value: 0.6048.[0m
[32m[I 2023-04-20 11:14:45,124][0m Trial 2 finished with value: 0.5876 and parameters: {'n_estimators': 108, 'max_depth': 27, 'max_features': 7}. Best is trial 0 with value: 0.6048.[0m
[32m[I 2023-04-20 11:14:45,434][0m Trial 3 finished with value: 0.6232 and parameters: {'n_estimators': 23, 'max_depth': 7, 'max_features': 10}. Best is trial 3 with value: 0.6232.[0m
[32m[I 2023-04-20 11:14:47,707][0m Trial 4 finished with value: 0.6276 and parameters: {'n_estimators': 299, 'max_depth

Optuna execution time:  386.88699293136597 seconds


In [27]:
#Getting the best trial..
study.best_trial

FrozenTrial(number=36, state=TrialState.COMPLETE, values=[0.631], datetime_start=datetime.datetime(2023, 4, 20, 11, 16, 32, 624884), datetime_complete=datetime.datetime(2023, 4, 20, 11, 16, 38, 220085), params={'n_estimators': 445, 'max_depth': 9, 'max_features': 7}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_estimators': IntDistribution(high=500, log=False, low=2, step=1), 'max_depth': IntDistribution(high=50, log=True, low=1, step=1), 'max_features': IntDistribution(high=10, log=False, low=3, step=1)}, trial_id=36, value=None)

In [28]:
"""OPTUNA BASED ON MAXIMIZED ACCURACY"""

rf = RandomForestClassifier(max_depth=9 ,max_features=7 ,n_estimators=445)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)


print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)

Accuracy: 0.6266
Precision: 0.6290231507622812
Recall: 0.4793459552495697


# Car Emissions Data (Regression)

In [29]:
emissions = pd.read_csv("emissions_cleaned.csv")

# Split dataset into X and Y
X = emissions.drop('co2_emissions', axis=1)
y = emissions["co2_emissions"]

X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=3, test_size=0.2)

def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 2, 500,1)
    max_depth = trial.suggest_int('max_depth', 1, 50,1)
    max_features = trial.suggest_int('max_features',3,10,1)
    
    rf = RandomForestRegressor(n_estimators, max_depth= max_depth,\
                                                 max_features= max_features,
                              random_state=3)
    rf.fit(X_train, y_train)
    
    return mean_squared_error(y_test, rf.predict(X_test))#.mean()

start = time.time()                              
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)
end = time.time()

print('Optuna execution time: ', (end-start), 'seconds')

[32m[I 2023-04-20 11:23:53,928][0m A new study created in memory with name: no-name-dce1a184-5906-416a-a14b-36b2747e1367[0m
[32m[I 2023-04-20 11:23:55,697][0m Trial 0 finished with value: 10.220184515543837 and parameters: {'n_estimators': 282, 'max_depth': 45, 'max_features': 9}. Best is trial 0 with value: 10.220184515543837.[0m
[32m[I 2023-04-20 11:23:55,738][0m Trial 1 finished with value: 23.53022348746429 and parameters: {'n_estimators': 15, 'max_depth': 7, 'max_features': 6}. Best is trial 0 with value: 10.220184515543837.[0m
[32m[I 2023-04-20 11:23:55,927][0m Trial 2 finished with value: 10.56492328065759 and parameters: {'n_estimators': 44, 'max_depth': 9, 'max_features': 10}. Best is trial 0 with value: 10.220184515543837.[0m
[32m[I 2023-04-20 11:23:56,129][0m Trial 3 finished with value: 11.443149514525297 and parameters: {'n_estimators': 36, 'max_depth': 35, 'max_features': 7}. Best is trial 0 with value: 10.220184515543837.[0m
[32m[I 2023-04-20 11:23:57,144

Optuna execution time:  173.46114015579224 seconds


In [30]:
#Getting the best trial..
study.best_trial

FrozenTrial(number=81, state=TrialState.COMPLETE, values=[9.663648383264043], datetime_start=datetime.datetime(2023, 4, 20, 11, 26, 16, 144677), datetime_complete=datetime.datetime(2023, 4, 20, 11, 26, 17, 948914), params={'n_estimators': 275, 'max_depth': 17, 'max_features': 10}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'n_estimators': IntDistribution(high=500, log=False, low=2, step=1), 'max_depth': IntDistribution(high=50, log=False, low=1, step=1), 'max_features': IntDistribution(high=10, log=False, low=3, step=1)}, trial_id=81, value=None)

In [31]:

"""OPTUNA BASED ON MINIMIZED LOSS"""

rf = RandomForestRegressor(n_estimators=275, max_depth= 17,\
                                                 max_features= 10, random_state=3)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

print('Mean Absolute Error (MAE):', mean_absolute_error(y_test, y_pred))
print('Mean Absolute Percentage Error (MAPE):', mean_absolute_percentage_error(y_test, y_pred))
print('Mean Squared Error (MSE):', mean_squared_error(y_test, y_pred))

Mean Absolute Error (MAE): 1.6684159642573464
Mean Absolute Percentage Error (MAPE): 0.006910365818076888
Mean Squared Error (MSE): 9.663648383264043
