In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

In [2]:
df = pd.read_csv('Forest_Fire.csv')
df

Unnamed: 0,temp,RH,wind,rain,Fire
0,8.2,51,6.7,0,0
1,18.0,33,0.9,0,0
2,14.6,33,1.3,0,0
3,8.3,97,4.0,1,0
4,11.4,99,1.8,0,0
...,...,...,...,...,...
512,27.8,32,2.7,0,1
513,21.9,71,5.8,0,1
514,21.2,70,6.7,0,1
515,25.6,42,4.0,0,0


In [3]:
X = df[['temp', 'RH', 'wind', 'rain']]
y = df['Fire']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Train Random Forest classifier with best hyperparameters
rf_classifier = RandomForestClassifier(n_estimators = 701,
                                        max_depth = 19,
                                        min_samples_split = 3,
                                        min_samples_leaf = 1,
                                        random_state = 42)
rf_classifier.fit(X_train, y_train)

# Make predictions on test set
y_pred = rf_classifier.predict(X_test)

# Display confusion matrix
print('Confusion matrix:')
print(confusion_matrix(y_test, y_pred))

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Confusion matrix:
[[35 16]
 [19 34]]
Accuracy: 0.6634615384615384


In [6]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from bayes_opt import BayesianOptimization

# Assuming you have already loaded and split your dataset into X_train, X_test, y_train, and y_test

# Define Decision Tree cross-validation function with hyperparameters
def dt_cv(max_depth, min_samples_split, min_samples_leaf, max_features):
    dt_classifier = DecisionTreeClassifier(max_depth=int(max_depth),
                                           min_samples_split=int(min_samples_split),
                                           min_samples_leaf=int(min_samples_leaf),
                                           max_features=max_features,
                                           random_state=42)
    
    dt_classifier.fit(X_train, y_train)
    y_pred = dt_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Define parameter space for Bayesian optimization
pbounds = {
    'max_depth': (3, 20),
    'min_samples_split': (2, 20),
    'min_samples_leaf': (1, 20),
    'max_features': (0.1, 1.0)
}

# Perform Bayesian optimization
optimizer = BayesianOptimization(
    f=dt_cv,
    pbounds=pbounds,
    random_state=1,
)

optimizer.maximize(init_points=20000, n_iter=0)


|   iter    |  target   | max_depth | max_fe... | min_sa... | min_sa... |
-------------------------------------------------------------------------
| [0m1        [0m | [0m0.5962   [0m | [0m10.09    [0m | [0m0.7483   [0m | [0m1.002    [0m | [0m7.442    [0m |
| [0m2        [0m | [0m0.5288   [0m | [0m5.495    [0m | [0m0.1831   [0m | [0m4.539    [0m | [0m8.22     [0m |
| [95m3        [0m | [95m0.6058   [0m | [95m9.745    [0m | [95m0.5849   [0m | [95m8.965    [0m | [95m14.33    [0m |
| [0m4        [0m | [0m0.5962   [0m | [0m6.476    [0m | [0m0.8903   [0m | [0m1.52     [0m | [0m14.07    [0m |
| [0m5        [0m | [0m0.4712   [0m | [0m10.09    [0m | [0m0.6028   [0m | [0m3.667    [0m | [0m5.566    [0m |
| [0m6        [0m | [0m0.5769   [0m | [0m16.61    [0m | [0m0.9714   [0m | [0m6.955    [0m | [0m14.46    [0m |
| [0m7        [0m | [0m0.4904   [0m | [0m17.9     [0m | [0m0.9051   [0m | [0m2.616    [0m | [0m2.703   

In [8]:

# Get best hyperparameters
best_params = optimizer.max['params']

print("Best Hyperparameters:")
print(best_params)

Best Hyperparameters:
{'max_depth': 19.075782844768653, 'max_features': 0.44552281778068414, 'min_samples_leaf': 4.590289271670417, 'min_samples_split': 10.439101093559257}


In [9]:
# Train Decision Tree classifier with best hyperparameters
rf_classifier = RandomForestClassifier(n_estimators = 701,
                                        max_depth = 19,
                                        min_samples_split = 3,
                                        min_samples_leaf = 1,
                                        random_state = 42)
rf_classifier.fit(X_train, y_train)

# Make predictions on test set
y_pred = rf_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.6634615384615384
