# Image Classification: Train, Test, and Evaluate Model

This notebook trains an image classification model on 4 classes (cloudy, desert, green_area, water), tests it, and evaluates its accuracy.


In [1]:
# Import necessary libraries
import os
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Deep learning libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

print("TensorFlow version:", tf.__version__)
print("GPU Available:", tf.config.list_physical_devices('GPU'))


TensorFlow version: 2.17.0-dev20240417
GPU Available: []


## 1. Load and Preprocess Data


In [2]:
# Define parameters
IMG_SIZE = (224, 224)  # Standard size for transfer learning
BATCH_SIZE = 32
EPOCHS = 10
TEST_SIZE = 0.2
RANDOM_STATE = 42

# Define class folders
class_folders = ['cloudy', 'desert', 'green_area', 'water']
class_labels = {folder: idx for idx, folder in enumerate(class_folders)}
print("Class labels:", class_labels)


Class labels: {'cloudy': 0, 'desert': 1, 'green_area': 2, 'water': 3}


In [None]:
# Load images and labels with memory optimization
def load_images_from_folder(folder_path, label, img_size=IMG_SIZE):
    """Load all images from a folder and assign labels - using float32 for memory efficiency"""
    images = []
    labels = []
    folder_full_path = os.path.join('.', folder_path)
    
    if not os.path.exists(folder_full_path):
        print(f"Warning: Folder {folder_full_path} does not exist")
        return images, labels
    
    image_files = [f for f in os.listdir(folder_full_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    
    for img_file in image_files:
        try:
            img_path = os.path.join(folder_full_path, img_file)
            img = Image.open(img_path).convert('RGB')
            img = img.resize(img_size)
            # Convert to float32 immediately to save memory (half the size of float64)
            img_array = (np.array(img, dtype=np.float32) / 255.0).astype(np.float32)
            images.append(img_array)
            labels.append(label)
        except Exception as e:
            print(f"Error loading {img_path}: {e}")
            continue
    
    return images, labels

# Load all data
all_images = []
all_labels = []

print("Loading images...")
for folder in class_folders:
    print(f"Loading {folder}...")
    images, labels = load_images_from_folder(folder, class_labels[folder])
    all_images.extend(images)
    all_labels.extend(labels)
    print(f"  Loaded {len(images)} images from {folder}")

# Convert to numpy arrays with explicit float32 dtype
X = np.array(all_images, dtype=np.float32)
y = np.array(all_labels, dtype=np.int32)

print(f"\nTotal images loaded: {X.shape[0]}")
print(f"Image shape: {X.shape[1:]}")
print(f"Data type: {X.dtype} (memory efficient)")
print(f"Number of classes: {len(class_folders)}")
print(f"Class distribution: {np.bincount(y)}")


Loading images...
Loading cloudy...
  Loaded 1500 images from cloudy
Loading desert...
  Loaded 1131 images from desert
Loading green_area...
Error loading .\green_area\Forest_1955.jpg: Unable to allocate 1.15 MiB for an array with shape (224, 224, 3) and data type float64
Error loading .\green_area\Forest_1995.jpg: Unable to allocate 1.15 MiB for an array with shape (224, 224, 3) and data type float64
Error loading .\green_area\Forest_2005.jpg: Unable to allocate 1.15 MiB for an array with shape (224, 224, 3) and data type float64
Error loading .\green_area\Forest_2007.jpg: Unable to allocate 1.15 MiB for an array with shape (224, 224, 3) and data type float64
Error loading .\green_area\Forest_2009.jpg: Unable to allocate 1.15 MiB for an array with shape (224, 224, 3) and data type float64
Error loading .\green_area\Forest_2010.jpg: Unable to allocate 1.15 MiB for an array with shape (224, 224, 3) and data type float64
Error loading .\green_area\Forest_2011.jpg: Unable to allocate 1.1

In [None]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=TEST_SIZE, 
    random_state=RANDOM_STATE, 
    stratify=y  # Ensure balanced split
)

print(f"Training set size: {X_train.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")
print(f"Training class distribution: {np.bincount(y_train)}")
print(f"Test class distribution: {np.bincount(y_test)}")


## 2. Build Model (Transfer Learning with VGG16)


In [None]:
# Build model using transfer learning
def create_model(num_classes=4):
    # Load pre-trained VGG16 model
    base_model = VGG16(
        weights='imagenet',
        include_top=False,
        input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)
    )
    
    # Freeze base model layers
    base_model.trainable = False
    
    # Add custom classification head
    inputs = keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
    x = base_model(inputs, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs, outputs)
    return model

# Create model
model = create_model(num_classes=len(class_folders))
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Display model architecture
model.summary()


## 3. Data Augmentation


In [None]:
# Create data augmentation generator for training
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    zoom_range=0.2,
    fill_mode='nearest'
)

# For validation/test, only rescale
test_datagen = ImageDataGenerator()

# Create generators
train_generator = train_datagen.flow(
    X_train, y_train,
    batch_size=BATCH_SIZE,
    shuffle=True
)

test_generator = test_datagen.flow(
    X_test, y_test,
    batch_size=BATCH_SIZE,
    shuffle=False
)


## 4. Train the Model


In [None]:
# Define callbacks
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=3,
        restore_best_weights=True
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=2,
        min_lr=0.00001
    )
]

# Train the model
print("Starting training...")
history = model.fit(
    train_generator,
    steps_per_epoch=len(X_train) // BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=test_generator,
    validation_steps=len(X_test) // BATCH_SIZE,
    callbacks=callbacks,
    verbose=1
)

print("\nTraining completed!")


## 5. Visualize Training History


In [None]:
# Plot training history
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Plot accuracy
axes[0].plot(history.history['accuracy'], label='Training Accuracy', marker='o')
axes[0].plot(history.history['val_accuracy'], label='Validation Accuracy', marker='s')
axes[0].set_title('Model Accuracy')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].legend()
axes[0].grid(True)

# Plot loss
axes[1].plot(history.history['loss'], label='Training Loss', marker='o')
axes[1].plot(history.history['val_loss'], label='Validation Loss', marker='s')
axes[1].set_title('Model Loss')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()


## 6. Test and Evaluate the Model


In [None]:
# Make predictions on test set
print("Making predictions on test set...")
y_pred_proba = model.predict(X_test, verbose=1)
y_pred = np.argmax(y_pred_proba, axis=1)

# Calculate accuracy
test_accuracy = accuracy_score(y_test, y_pred)
print(f"\n{'='*50}")
print(f"Test Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print(f"{'='*50}")


In [None]:
# Detailed classification report
print("\nClassification Report:")
print("="*50)
print(classification_report(
    y_test, 
    y_pred, 
    target_names=class_folders,
    digits=4
))


In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(10, 8))
sns.heatmap(
    cm, 
    annot=True, 
    fmt='d', 
    cmap='Blues',
    xticklabels=class_folders,
    yticklabels=class_folders
)
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.show()

# Calculate per-class accuracy
print("\nPer-Class Accuracy:")
print("="*50)
for i, class_name in enumerate(class_folders):
    class_mask = y_test == i
    if np.sum(class_mask) > 0:
        class_accuracy = accuracy_score(y_test[class_mask], y_pred[class_mask])
        print(f"{class_name:15s}: {class_accuracy:.4f} ({class_accuracy*100:.2f}%)")


## 7. Visualize Sample Predictions


In [None]:
# Display sample predictions
def plot_sample_predictions(X_test, y_test, y_pred, y_pred_proba, num_samples=12):
    fig, axes = plt.subplots(3, 4, figsize=(16, 12))
    axes = axes.ravel()
    
    # Select random samples
    indices = np.random.choice(len(X_test), num_samples, replace=False)
    
    for idx, ax in enumerate(axes):
        i = indices[idx]
        ax.imshow(X_test[i])
        true_label = class_folders[y_test[i]]
        pred_label = class_folders[y_pred[i]]
        confidence = y_pred_proba[i][y_pred[i]] * 100
        
        color = 'green' if y_test[i] == y_pred[i] else 'red'
        ax.set_title(f'True: {true_label}\nPred: {pred_label}\nConf: {confidence:.1f}%', 
                    color=color, fontsize=10)
        ax.axis('off')
    
    plt.tight_layout()
    plt.show()

plot_sample_predictions(X_test, y_test, y_pred, y_pred_proba)


## 8. Summary Statistics


In [None]:
# Final summary
print("\n" + "="*60)
print("FINAL MODEL EVALUATION SUMMARY")
print("="*60)
print(f"Total Training Samples: {len(X_train)}")
print(f"Total Test Samples: {len(X_test)}")
print(f"Number of Classes: {len(class_folders)}")
print(f"Classes: {', '.join(class_folders)}")
print(f"\nTest Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")
print(f"Final Training Accuracy: {history.history['accuracy'][-1]:.4f} ({history.history['accuracy'][-1]*100:.2f}%)")
print(f"Final Validation Accuracy: {history.history['val_accuracy'][-1]:.4f} ({history.history['val_accuracy'][-1]*100:.2f}%)")
print("="*60)
