In [4]:
import cv2
import os
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [5]:
# Function to load and preprocess images
def load_and_preprocess_images(folder_path):
    data = []
    labels = []
    for label, category in enumerate(CATEGORIES):
        category_path = os.path.join(folder_path, category)
        for filename in os.listdir(category_path):
            img_path = os.path.join(category_path, filename)
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))  # Resize
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
                img = img / 255.0  # Normalize to range 0-1
                img = img.flatten()  # Flatten to 1D array
                data.append(img)
                labels.append(label)
    return np.array(data), np.array(labels)

In [12]:
# Define image size and categories
IMG_SIZE = 64  # Resize images to 64x64 pixels
CATEGORIES = ['earthquake', 'flood', 'wildfire', 'cyclone']

# Load and preprocess training and validation data
train_folder = '/kaggle/input/disasterclassification/train'
val_folder = '/kaggle/input/disasterclassification/validation'
X_train, y_train = load_and_preprocess_images(train_folder)
X_val, y_val = load_and_preprocess_images(val_folder)

In [13]:
# Create an SVM model pipeline with data scaling
# SVM with an RBF kernel, commonly effective for image data
svm_model = make_pipeline(StandardScaler(), SVC(kernel='rbf', C=1, gamma='scale'))

# Train the SVM model on the training data
svm_model.fit(X_train, y_train)

In [14]:
y_train_pred = svm_model.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)
train_f1_score = f1_score(y_train, y_train_pred, average='weighted')
print("Training Accuracy:", train_accuracy)
print("Training F1 Score:", train_f1_score)

# Evaluate on validation data
y_val_pred = svm_model.predict(X_val)
val_accuracy = accuracy_score(y_val, y_val_pred)
val_f1_score = f1_score(y_val, y_val_pred, average='weighted')
print("Validation Accuracy:", val_accuracy)
print("Validation F1 Score:", val_f1_score)

# Display classification report for validation data
print("\nClassification Report for Validation Data:\n")
print(classification_report(y_val, y_val_pred, target_names=CATEGORIES))

Training Accuracy: 0.900625
Training F1 Score: 0.9011629689008369
Validation Accuracy: 0.705
Validation F1 Score: 0.7070048954161103

Classification Report for Validation Data:

              precision    recall  f1-score   support

  earthquake       0.64      0.69      0.67       100
       flood       0.65      0.68      0.67       100
    wildfire       0.68      0.78      0.73       100
     cyclone       0.89      0.67      0.77       100

    accuracy                           0.70       400
   macro avg       0.72      0.71      0.71       400
weighted avg       0.72      0.70      0.71       400



In [15]:
from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid = {
    'svc__C': [0.1, 1, 10, 100],           # Regularization parameter
    'svc__kernel': ['linear', 'rbf', 'poly'],  # Kernel type
    'svc__gamma': ['scale', 'auto'],        # Gamma parameter for 'rbf' and 'poly' kernels
    'svc__degree': [2, 3, 4]                # Degree parameter for 'poly' kernel (ignored for others)
}

# Initialize the pipeline with StandardScaler and SVC
pipeline = make_pipeline(StandardScaler(), SVC())

# Set up the GridSearchCV with cross-validation
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='f1_weighted', verbose=2, n_jobs=-1)

# Run grid search on the training data
grid_search.fit(X_train, y_train)

# Get the best parameters and model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

print("Best Parameters:", best_params)
print("Best Cross-Validation Score:", grid_search.best_score_)

# Evaluate the best model on the validation set
y_val_pred = best_model.predict(X_val)
val_accuracy = accuracy_score(y_val, y_val_pred)
val_f1_score = f1_score(y_val, y_val_pred, average='weighted')
print("Validation Accuracy:", val_accuracy)
print("Validation F1 Score:", val_f1_score)


Fitting 5 folds for each of 72 candidates, totalling 360 fits
Best Parameters: {'svc__C': 100, 'svc__degree': 2, 'svc__gamma': 'scale', 'svc__kernel': 'rbf'}
Best Cross-Validation Score: 0.688203273762962
Validation Accuracy: 0.72
Validation F1 Score: 0.7180441079077593
[CV] END svc__C=0.1, svc__degree=2, svc__gamma=scale, svc__kernel=linear; total time=  10.8s
[CV] END svc__C=0.1, svc__degree=2, svc__gamma=scale, svc__kernel=linear; total time=  11.2s
[CV] END svc__C=0.1, svc__degree=2, svc__gamma=scale, svc__kernel=rbf; total time=  12.4s
[CV] END svc__C=0.1, svc__degree=2, svc__gamma=scale, svc__kernel=poly; total time=  10.1s
[CV] END svc__C=0.1, svc__degree=2, svc__gamma=auto, svc__kernel=linear; total time=  11.3s
[CV] END svc__C=0.1, svc__degree=2, svc__gamma=auto, svc__kernel=rbf; total time=  13.1s
[CV] END svc__C=0.1, svc__degree=2, svc__gamma=auto, svc__kernel=rbf; total time=  11.3s
[CV] END svc__C=0.1, svc__degree=2, svc__gamma=auto, svc__kernel=poly; total time=  12.5s
[C

In [None]:
from sklearn.decomposition import PCA
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform

# Define the pipeline with StandardScaler, PCA, and SVC
pipeline = make_pipeline(
    StandardScaler(),
    PCA(n_components=50),  # Experiment with number of components
    SVC()
)

# Define the parameter distribution
param_dist = {
    'svc__C': uniform(0.1, 100),  # Range of C values to try
    'svc__kernel': ['linear', 'rbf', 'poly'],  # Kernel types
    'svc__gamma': ['scale', 'auto'],           # Gamma values for 'rbf' and 'poly'
    'svc__degree': [2, 3, 4]                   # Degrees for polynomial kernel
}

# Set up the RandomizedSearchCV with cross-validation
random_search = RandomizedSearchCV(
    pipeline, param_distributions=param_dist, n_iter=20,  # n_iter limits to 20 random combinations
    cv=5, scoring='f1_weighted', verbose=2, n_jobs=-1, random_state=42
)

# Fit random search on the training data
random_search.fit(X_train, y_train)

# Get the best parameters and model
best_params = random_search.best_params_
best_model = random_search.best_estimator_

print("Best Parameters:", best_params)
print("Best Cross-Validation Score:", random_search.best_score_)

# Evaluate the best model on the validation set
y_val_pred = best_model.predict(X_val)
val_accuracy = accuracy_score(y_val, y_val_pred)
val_f1_score = f1_score(y_val, y_val_pred, average='weighted')
print("Validation Accuracy:", val_accuracy)
print("Validation F1 Score:", val_f1_score)

# Display classification report for validation data
print("\nClassification Report for Validation Data:\n")
print(classification_report(y_val, y_val_pred, target_names=CATEGORIES))


Fitting 5 folds for each of 20 candidates, totalling 100 fits
