In [2]:
from google.colab import drive
drive.mount('/content/data')

Drive already mounted at /content/data; to attempt to forcibly remount, call drive.mount("/content/data", force_remount=True).


In [3]:
import numpy as np
import pandas as pd
import cv2
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
import joblib

In [4]:
# Define the directories containing the fire and no-fire images
fire_images_dir = "/content/data/MyDrive/fire_dataset/fire_images/"
no_fire_images_dir = "/content/data/MyDrive/fire_dataset/non_fire_images/"

In [5]:
# Load the fire images into arrays and assign target labels
fire_images = []
fire_targets = []
for image_file in os.listdir(fire_images_dir):
    image_path = os.path.join(fire_images_dir, image_file)
    image = cv2.imread(image_path)

    # Resize the image to 100x100 pixels
    resized_image = cv2.resize(image, (100, 100)) 

    # Normalize pixel values between 0 and 1
    normalized_image = resized_image / 255.0  
    fire_images.append(normalized_image)

    # Target 1 represents fire
    fire_targets.append(1)

In [6]:
# Load the no-fire images into arrays and assign target labels
no_fire_images = []
no_fire_targets = []
for image_file in os.listdir(no_fire_images_dir):
    image_path = os.path.join(no_fire_images_dir, image_file)
    image = cv2.imread(image_path)

    # Resize the image to 100x100 pixels
    resized_image = cv2.resize(image, (100, 100))  

    # Normalize pixel values between 0 and 1
    normalized_image = resized_image / 255.0  
    no_fire_images.append(normalized_image)

    # Target 0 represents no-fire
    no_fire_targets.append(0)

In [7]:
# Combine the fire and no-fire images and targets into arrays
x = np.array(fire_images + no_fire_images)
y = np.array(fire_targets + no_fire_targets)

In [14]:
# Split the data into train and test sets with a 80:20 split ratio
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Print the shape of the train and test sets
print(f"Train set shape: images-> X={x_train.shape}, labels-> y={y_train.shape}")
print(f"Test set shape: images-> X={x_test.shape}, labels-> y={y_test.shape}")

Train set shape: images-> X=(813, 100, 100, 3), labels-> y=(813,)
Test set shape: images-> X=(204, 100, 100, 3), labels-> y=(204,)


In [15]:
# Initialize the logistic regression model
lgreg = LogisticRegression(random_state=42, max_iter=1000)

# Fit the model to the training data
lgreg.fit(x_train.reshape(len(x_train), -1), y_train)

# Predict the target values for the test data
y_pred = lgreg.predict(x_test.reshape(len(x_test), -1))

In [16]:
# Compute the accuracy score
accuracy = accuracy_score(y_test, y_pred)

# Compute the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Print the results
print(f"Accuracy is: {accuracy}")
print(f"Confusion matrix is:\n{conf_matrix}")

Accuracy is: 0.9362745098039216
Confusion matrix is:
[[ 38  10]
 [  3 153]]


In [17]:
# Compute predicted probabilities for the training data
y_proba = lgreg.predict_proba(x_train.reshape(len(x_train), -1))

# Define a range of probability thresholds to evaluate
thresholds = np.array([1.0,0.9,0.8,0.7,0.6,0.5,0.4,0.3,0.2,0.1])

# Initialize variables for storing the best threshold and accuracy
best_threshold = None
best_accuracy = 0.0

# Iterate over the thresholds and compute the accuracy for each one
for threshold in thresholds:
    # Convert predicted probabilities to binary predictions using the threshold
    y_pred = (y_proba[:, 1] > threshold).astype(int)

    # Compute the accuracy score
    accuracy = accuracy_score(y_train, y_pred)

    # Update the best threshold and accuracy if necessary
    if accuracy > best_accuracy:
        best_threshold = threshold
        best_accuracy = accuracy

# Print the best threshold and accuracy
print(f"Best threshold: {best_threshold}")
print(f"Best accuracy: {best_accuracy}")

Best threshold: 0.9
Best accuracy: 1.0


In [18]:
# Compute predicted probabilities and binary predictions for the test data using the best threshold
y_proba_test = lgreg.predict_proba(x_test.reshape(len(x_test), -1))
y_pred_test = (y_proba_test[:, 1] > best_threshold).astype(int)

# Compute the accuracy score and confusion matrix for the test data
accuracy = accuracy_score(y_test, y_pred_test)
conf_matrix = confusion_matrix(y_test, y_pred_test)

# Print the results
print(f"Accuracy: {accuracy}")
print(f"Confusion matrix:\n{conf_matrix}", )

Accuracy: 0.9411764705882353
Confusion matrix:
[[ 43   5]
 [  7 149]]


In [13]:
# Save the best model to a file
joblib.dump(lgreg, 'best_model.pkl')

['best_model.pkl']