In [2]:
import cv2
from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import os
import numpy as np

In [None]:
train_folder = "/kaggle/input/firedataset/dataset/FOREST_FIRE_SMOKE_AND_NON_FIRE_DATASET/train"
test_folder = "/kaggle/input/firedataset/dataset/FOREST_FIRE_SMOKE_AND_NON_FIRE_DATASET/test"
# Tên các thư mục
categories = ["Smoke", "fire", "non fire"]
# Đếm số lượng ảnh trong mỗi thư mục
data_distribution = {}
def count_file(base_dir):
    for category in categories:
        folder_path = os.path.join(base_dir, category)
        if os.path.exists(folder_path):
            data_distribution[category] = len([file for file in os.listdir(folder_path) if file.endswith(('jpg', 'jpeg', 'png'))])
        else:
            data_distribution[category] = 0  # Thư mục không tồn tại
    
    # In kết quả
    for category, count in data_distribution.items():
        print(f"{category}: {count} images")
print('Số lượng ảnh tập train:')
count_file(train_folder)
print('Số lượng ảnh tập test:')
count_file(test_folder)

In [3]:
def hog_feature(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    hog = cv2.HOGDescriptor(winSize=(128, 128),
                        blockSize=(8, 8),
                        blockStride=(4, 4),
                        cellSize=(4, 4),
                        nbins=9)
    features = hog.compute(gray)
    return features.flatten()

In [4]:
def process_image(file_path, label):
    image = cv2.imread(file_path)
    if image is not None:
        image = cv2.resize(image, (128, 128))  # Resize image
        img_hog = hog_feature(image)  # Extract HOG features
        return img_hog, label
    return None

In [5]:
from concurrent.futures import ThreadPoolExecutor
def load_dataset(base_path):
    data = []
    labels = []
    classes = {'Smoke': 0, 'fire': 1, 'non fire': 2}
# Load ảnh và xử lý
    image_paths = []
    for category, label in classes.items():
        folder_path = os.path.join(base_path, category)
        for filename in os.listdir(folder_path):
            file_path = os.path.join(folder_path, filename)
            image_paths.append((file_path, label))
    
    with ThreadPoolExecutor() as executor:
        results = executor.map(lambda p: process_image(*p), image_paths)

    # Collect results
    for result in results:
        if result is not None:
            img_hog, label = result
            data.append(img_hog)
            labels.append(label)

    return np.array(data), np.array(labels)

In [6]:
X_train, y_train = load_dataset(train_folder)

In [7]:
X_test, y_test = load_dataset(test_folder)

In [9]:
from sklearn.preprocessing import StandardScaler

# Chuẩn hóa dữ liệu
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

from sklearn.linear_model import SGDClassifier

# Sử dụng SGDClassifier cho bộ dữ liệu lớn (hỗ trợ huấn luyện theo lô nhỏ)
sgd = SGDClassifier(loss='hinge', alpha=1e-3, max_iter=10, random_state=42)
sgd.fit(X_train_scaled, y_train)

# Test and Evaluate
y_pred = sgd.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))



Accuracy: 0.7961710639108487


In [12]:
from sklearn.metrics import accuracy_score, classification_report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.62      0.73      3500
           1       0.86      0.92      0.89      3500
           2       0.69      0.85      0.76      3499

    accuracy                           0.80     10499
   macro avg       0.81      0.80      0.79     10499
weighted avg       0.81      0.80      0.79     10499



In [13]:
from joblib import dump, load

# Lưu model
dump(sgd, 'svm.joblib')

# Tải lại model
loaded_model = load('svm.joblib')


