In [3]:
import os
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV
from torchvision import models, transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
import torch

# Paths to your dataset
train_dir = '/kaggle/input/disasterclassification/train'
val_dir = '/kaggle/input/disasterclassification/validation'

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to ResNet input size
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # ResNet normalization
])

# Load datasets
train_dataset = ImageFolder(train_dir, transform=transform)
val_dataset = ImageFolder(val_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Load pre-trained ResNet model (feature extractor)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet = models.resnet18(pretrained=True)
resnet = torch.nn.Sequential(*(list(resnet.children())[:-1]))  # Remove classification head
resnet.to(device)
resnet.eval()

# Function to extract features
def extract_features(dataloader, model):
    features, labels = [], []
    with torch.no_grad():
        for images, label_batch in dataloader:
            images = images.to(device)
            outputs = model(images)
            outputs = outputs.view(outputs.size(0), -1)  # Flatten the features
            features.append(outputs.cpu().numpy())
            labels.append(label_batch.numpy())
    return np.vstack(features), np.hstack(labels)

# Extract features from train and validation sets
print("Extracting features...")
train_features, train_labels = extract_features(train_loader, resnet)
val_features, val_labels = extract_features(val_loader, resnet)

# Hyperparameter tuning for kNN
print("Tuning kNN hyperparameters...")
param_grid = {'n_neighbors': [ 7, 9,11], 'weights': ['uniform', 'distance']}
knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=3, scoring='accuracy', n_jobs=-1)
grid_search.fit(train_features, train_labels)

# Best parameters and validation performance
print(f"Best Parameters: {grid_search.best_params_}")
best_knn = grid_search.best_estimator_
val_predictions = best_knn.predict(val_features)

print("Validation Accuracy:", accuracy_score(val_labels, val_predictions))
print("Classification Report:")
print(classification_report(val_labels, val_predictions, target_names=train_dataset.classes))

# Optionally, save the best model
# import joblib
# joblib.dump(best_knn, "best_knn_model.pkl")




Extracting features...
Tuning kNN hyperparameters...
Best Parameters: {'n_neighbors': 9, 'weights': 'distance'}
Validation Accuracy: 0.9625
Classification Report:
              precision    recall  f1-score   support

     cyclone       0.97      0.99      0.98       100
  earthquake       0.97      0.97      0.97       100
       flood       0.97      0.93      0.95       100
    wildfire       0.94      0.96      0.95       100

    accuracy                           0.96       400
   macro avg       0.96      0.96      0.96       400
weighted avg       0.96      0.96      0.96       400



In [2]:
from sklearn.metrics import classification_report, accuracy_score

# Classification Report for Training Set
train_predictions = best_knn.predict(train_features)

print("\nTraining Accuracy:", accuracy_score(train_labels, train_predictions))
print("\nTraining Classification Report:")
print(classification_report(train_labels, train_predictions, target_names=train_dataset.classes))

# Classification Report for Validation Set
val_predictions = best_knn.predict(val_features)

print("\nValidation Accuracy:", accuracy_score(val_labels, val_predictions))
print("\nValidation Classification Report:")
print(classification_report(val_labels, val_predictions, target_names=train_dataset.classes))



Training Accuracy: 1.0

Training Classification Report:
              precision    recall  f1-score   support

     cyclone       1.00      1.00      1.00       400
  earthquake       1.00      1.00      1.00       400
       flood       1.00      1.00      1.00       400
    wildfire       1.00      1.00      1.00       400

    accuracy                           1.00      1600
   macro avg       1.00      1.00      1.00      1600
weighted avg       1.00      1.00      1.00      1600


Validation Accuracy: 0.9625

Validation Classification Report:
              precision    recall  f1-score   support

     cyclone       0.97      0.99      0.98       100
  earthquake       0.97      0.97      0.97       100
       flood       0.97      0.93      0.95       100
    wildfire       0.94      0.96      0.95       100

    accuracy                           0.96       400
   macro avg       0.96      0.96      0.96       400
weighted avg       0.96      0.96      0.96       400



In [6]:
import os
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV
from PIL import Image

# Paths to your dataset
train_dir = '/kaggle/input/disasterclassification/train'
val_dir = '/kaggle/input/disasterclassification/validation'

# Image preprocessing: Resize images to a smaller size (e.g., 64x64) for simplicity
def load_images_from_folder(folder):
    images = []
    labels = []
    classes = sorted(os.listdir(folder))  # Assumes subfolders are class names
    for idx, class_name in enumerate(classes):
        class_path = os.path.join(folder, class_name)
        if os.path.isdir(class_path):
            for image_name in os.listdir(class_path):
                image_path = os.path.join(class_path, image_name)
                try:
                    # Load image, resize, and flatten into a 1D array
                    img = Image.open(image_path).resize((64, 64))
                    img_array = np.array(img).flatten()
                    images.append(img_array)
                    labels.append(idx)
                except Exception as e:
                    print(f"Error loading image {image_path}: {e}")
    return np.array(images), np.array(labels), classes

# Load train and validation datasets
print("Loading training data...")
train_features, train_labels, class_names = load_images_from_folder(train_dir)

print("Loading validation data...")
val_features, val_labels, _ = load_images_from_folder(val_dir)

# Normalize features to [0, 1] range
train_features = train_features / 255.0
val_features = val_features / 255.0

# Hyperparameter tuning for kNN
print("Tuning kNN hyperparameters...")
param_grid = {'n_neighbors': [3, 4,5,6, 7, 9, 15,20], 'weights': ['uniform', 'distance']}
knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=3, scoring='accuracy', n_jobs=-1)
grid_search.fit(train_features, train_labels)

# Best parameters and validation performance
print(f"Best Parameters: {grid_search.best_params_}")
best_knn = grid_search.best_estimator_

# Training performance
train_predictions = best_knn.predict(train_features)
print("\nTraining Accuracy:", accuracy_score(train_labels, train_predictions))
print("\nTraining Classification Report:")
print(classification_report(train_labels, train_predictions, target_names=class_names))

# Validation performance
val_predictions = best_knn.predict(val_features)
print("\nValidation Accuracy:", accuracy_score(val_labels, val_predictions))
print("\nValidation Classification Report:")
print(classification_report(val_labels, val_predictions, target_names=class_names))


Loading training data...
Loading validation data...
Tuning kNN hyperparameters...
Best Parameters: {'n_neighbors': 5, 'weights': 'distance'}

Training Accuracy: 1.0

Training Classification Report:
              precision    recall  f1-score   support

     cyclone       1.00      1.00      1.00       400
  earthquake       1.00      1.00      1.00       400
       flood       1.00      1.00      1.00       400
    wildfire       1.00      1.00      1.00       400

    accuracy                           1.00      1600
   macro avg       1.00      1.00      1.00      1600
weighted avg       1.00      1.00      1.00      1600


Validation Accuracy: 0.48

Validation Classification Report:
              precision    recall  f1-score   support

     cyclone       0.96      0.43      0.59       100
  earthquake       0.42      0.05      0.09       100
       flood       0.41      0.54      0.47       100
    wildfire       0.43      0.90      0.58       100

    accuracy                     

In [8]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV
from PIL import Image
import os
import numpy as np

# Paths to your dataset
train_dir = '/kaggle/input/disasterclassification/train'
val_dir = '/kaggle/input/disasterclassification/validation'

# Image preprocessing: Resize images to a smaller size (e.g., 64x64) for simplicity
def load_images_from_folder(folder):
    images = []
    labels = []
    classes = sorted(os.listdir(folder))  # Assumes subfolders are class names
    for idx, class_name in enumerate(classes):
        class_path = os.path.join(folder, class_name)
        if os.path.isdir(class_path):
            for image_name in os.listdir(class_path):
                image_path = os.path.join(class_path, image_name)
                try:
                    # Load image, resize, and flatten into a 1D array
                    img = Image.open(image_path).resize((64, 64))
                    img_array = np.array(img).flatten()
                    images.append(img_array)
                    labels.append(idx)
                except Exception as e:
                    print(f"Error loading image {image_path}: {e}")
    return np.array(images), np.array(labels), classes

# Load train and validation datasets
print("Loading training data...")
train_features, train_labels, class_names = load_images_from_folder(train_dir)

print("Loading validation data...")
val_features, val_labels, _ = load_images_from_folder(val_dir)

# Normalize features to [0, 1] range
train_features = train_features / 255.0
val_features = val_features / 255.0

# Apply LDA
print("Applying LDA for dimensionality reduction...")
lda = LinearDiscriminantAnalysis(n_components=3)  # Max components for 4 classes is 3
train_features_lda = lda.fit_transform(train_features, train_labels)
val_features_lda = lda.transform(val_features)

# Hyperparameter tuning for kNN
print("Tuning kNN hyperparameters...")
param_grid = {'n_neighbors': [1, 3, 5, 7, 9], 'weights': ['uniform', 'distance']}
knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=3, scoring='accuracy', n_jobs=-1)
grid_search.fit(train_features_lda, train_labels)

# Best parameters and validation performance
print(f"Best Parameters: {grid_search.best_params_}")
best_knn = grid_search.best_estimator_

# Training performance
train_predictions = best_knn.predict(train_features_lda)
print("\nTraining Accuracy:", accuracy_score(train_labels, train_predictions))
print("\nTraining Classification Report:")
print(classification_report(train_labels, train_predictions, target_names=class_names))

# Validation performance
val_predictions = best_knn.predict(val_features_lda)
print("\nValidation Accuracy:", accuracy_score(val_labels, val_predictions))
print("\nValidation Classification Report:")
print(classification_report(val_labels, val_predictions, target_names=class_names))


Loading training data...
Loading validation data...
Applying LDA for dimensionality reduction...


ValueError: n_components cannot be larger than min(n_features, n_classes - 1).

In [11]:
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV
from PIL import Image
import os
import numpy as np


# Image preprocessing: Resize images to a smaller size (e.g., 64x64) for simplicity
def load_images_from_folder(folder):
    images = []
    labels = []
    classes = sorted(os.listdir(folder))  # Assumes subfolders are class names
    for idx, class_name in enumerate(classes):
        class_path = os.path.join(folder, class_name)
        if os.path.isdir(class_path):
            for image_name in os.listdir(class_path):
                image_path = os.path.join(class_path, image_name)
                try:
                    # Load image, resize, and flatten into a 1D array
                    img = Image.open(image_path).resize((128, 128))
                    img_array = np.array(img).flatten()
                    images.append(img_array)
                    labels.append(idx)
                except Exception as e:
                    print(f"Error loading image {image_path}: {e}")
    return np.array(images), np.array(labels), classes

# Load train and validation datasets
print("Loading training data...")
train_features, train_labels, class_names = load_images_from_folder(train_dir)

print("Loading validation data...")
val_features, val_labels, _ = load_images_from_folder(val_dir)

# Normalize features to [0, 1] range
train_features = train_features / 255.0
val_features = val_features / 255.0

# Apply PCA for dimensionality reduction
print("Applying PCA for dimensionality reduction...")
pca = PCA(n_components=70)  # Adjust the number of components as needed
train_features_pca = pca.fit_transform(train_features)
val_features_pca = pca.transform(val_features)

# Hyperparameter tuning for kNN
print("Tuning kNN hyperparameters...")
param_grid = {'n_neighbors': [3, 5, 7, 9, 11, 15], 'weights': ['uniform', 'distance']}
knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=3, scoring='accuracy', n_jobs=-1)
grid_search.fit(train_features_pca, train_labels)

# Best parameters and validation performance
print(f"Best Parameters: {grid_search.best_params_}")
best_knn = grid_search.best_estimator_

# Training performance
train_predictions = best_knn.predict(train_features_pca)
print("\nTraining Accuracy:", accuracy_score(train_labels, train_predictions))
print("\nTraining Classification Report:")
print(classification_report(train_labels, train_predictions, target_names=class_names))

# Validation performance
val_predictions = best_knn.predict(val_features_pca)
print("\nValidation Accuracy:", accuracy_score(val_labels, val_predictions))
print("\nValidation Classification Report:")
print(classification_report(val_labels, val_predictions, target_names=class_names))


Loading training data...
Loading validation data...
Applying PCA for dimensionality reduction...
Tuning kNN hyperparameters...
Best Parameters: {'n_neighbors': 7, 'weights': 'distance'}

Training Accuracy: 1.0

Training Classification Report:
              precision    recall  f1-score   support

     cyclone       1.00      1.00      1.00       400
  earthquake       1.00      1.00      1.00       400
       flood       1.00      1.00      1.00       400
    wildfire       1.00      1.00      1.00       400

    accuracy                           1.00      1600
   macro avg       1.00      1.00      1.00      1600
weighted avg       1.00      1.00      1.00      1600


Validation Accuracy: 0.6

Validation Classification Report:
              precision    recall  f1-score   support

     cyclone       0.98      0.50      0.66       100
  earthquake       0.52      0.25      0.34       100
       flood       0.49      0.78      0.60       100
    wildfire       0.62      0.87      0.72 