In [2]:
import torch
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset, Subset
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from PIL import Image, ImageFile
import os
import numpy as np
from tqdm.notebook import tqdm
import json
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
import random
import time
from datetime import datetime

# Parameters to tweak
batch_size = 64  # Reduced from 128 due to larger model
learning_rate = 1e-4  # before 1e-3
num_epochs = 100
checkpoint_interval = 10
max_images_per_class = 25000  # Set to use all images

# Directories
current_time = datetime.now().strftime("%Y-%m-%d_%H-%M")
identifier = f"softmax-resnet-18_{num_epochs}-ep_{batch_size}-bs_{max_images_per_class}-images_{current_time}"
class_names = ['Boston', 'Charlotte', 'Manhattan', 'Pittsburgh']
folders = { # building outlines on satellite background
    'Boston': '../data/ma-boston/buildings',
    'Charlotte': '../data/nc-charlotte/buildings',
    'Manhattan': '../data/ny-manhattan/buildings',
    'Pittsburgh': '../data/pa-pittsburgh/buildings'
}
output_folder = os.path.join('softmax-output', identifier)
checkpoint_dir = os.path.join(output_folder, 'checkpoints')
model_save_path = os.path.join(output_folder, f'trained-model_{identifier}.pth')
loss_log_path = os.path.join(output_folder, f'loss-log_{identifier}.json')
training_curves_path = os.path.join(output_folder, f'training-curves_{identifier}.png')
confusion_matrix_path = os.path.join(output_folder, f'confusion-matrix_{identifier}.png')
cross_validation_path = os.path.join(output_folder, f'cross-validation_{identifier}.png')
misclassified_samples_path = os.path.join(output_folder, f'misclassified-samples_{identifier}.png')
report_path = os.path.join(output_folder, f'report_{identifier}.txt')
new_image_path = '../data/ny-brooklyn/buildings/buildings_1370.jpg'
predictions_output_file = os.path.join(output_folder, f'predictions_{identifier}.txt')

# More Parameters
normalize_mean = [0.485, 0.456, 0.406]
normalize_std = [0.229, 0.224, 0.225]
num_classes = len(class_names)
weight_decay = 1e-5  # Reduced due to large dataset (prior tests limited to 500 images per class)

# Allow loading of truncated images
ImageFile.LOAD_TRUNCATED_IMAGES = True

# Define output folder
os.makedirs(output_folder, exist_ok=True)

# Define a custom dataset class
class CityDataset(Dataset):
    def __init__(self, folders, transform=None, max_images_per_class=max_images_per_class):
        self.image_paths = []
        self.labels = []
        self.transform = transform
        self.class_to_idx = {class_name: idx for idx, class_name in enumerate(folders.keys())}

        for class_name, folder in folders.items():
            class_images = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(('.jpg', '.jpeg', '.png'))]
            selected_images = random.sample(class_images, min(max_images_per_class, len(class_images)))
            self.image_paths.extend(selected_images)
            self.labels.extend([self.class_to_idx[class_name]] * len(selected_images))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label = self.labels[idx]
        image = Image.open(image_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, label

# Define transformations with data augmentation
transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=normalize_mean, std=normalize_std),
])

# Create dataset
dataset = CityDataset(folders, transform=transform)

# Set device
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

# Load a pre-trained ResNet50 model
weights = models.ResNet50_Weights.DEFAULT
model = models.resnet50(weights=weights)

# Modify the final layer to match the number of classes
model.fc = nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(model.fc.in_features, num_classes)
)
model.to(device)

# Save training parameters
training_params = {
    "identifier": identifier,
    'model': str(model),
    'device': str(device),
    'max_images_per_class': max_images_per_class,
    'num_classes': num_classes,
    'class_names': class_names,
    "batch_size": batch_size,
    "num_epochs": num_epochs,
    "learning_rate": learning_rate,
    "checkpoint_interval": checkpoint_interval,
    'normalize_mean': normalize_mean,
    'normalize_std': normalize_std,

}
params_path = os.path.join(output_folder, 'training_params.json')
with open(params_path, 'w') as f:
    json.dump(training_params, f)
print(f"Training parameters saved to {params_path}")

# Model training function
def train_and_save_model(model, train_loader, val_loader, num_epochs, checkpoint_interval, checkpoint_dir):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    scheduler = torch.optim.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)
    train_loss_log = []
    val_loss_log = []
    val_accuracy_log = []
    epoch_times = []

    # Early stopping parameters
    patience = 20
    best_val_loss = float('inf')
    epochs_without_improvement = 0

    total_iterations = num_epochs * len(train_loader)
    progress_bar = tqdm(total=total_iterations, desc="Training Progress")

    for epoch in range(num_epochs):
        epoch_start_time = time.time()

        # Training
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * images.size(0)
            progress_bar.update(1)

        epoch_loss = running_loss / len(train_loader.dataset)
        train_loss_log.append(epoch_loss)

        # Validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * images.size(0)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_epoch_loss = val_loss / len(val_loader.dataset)
        val_accuracy = correct / total
        val_loss_log.append(val_epoch_loss)
        val_accuracy_log.append(val_accuracy)

        # Learning rate scheduling
        scheduler.step(val_epoch_loss)

        # Early stopping check
        if val_epoch_loss < best_val_loss:
            best_val_loss = val_epoch_loss
            epochs_without_improvement = 0
            # Save the best model
            torch.save(model.state_dict(), model_save_path)
        else:
            epochs_without_improvement += 1

        if epochs_without_improvement >= patience:
            print(f"Early stopping triggered at epoch {epoch + 1}")
            break

        epoch_end_time = time.time()
        epoch_duration = epoch_end_time - epoch_start_time
        epoch_times.append(epoch_duration)

        progress_bar.set_postfix({
            'Epoch': f'{epoch + 1}/{num_epochs}',
            'Train Loss': f'{epoch_loss:.4f}',
            'Val Loss': f'{val_epoch_loss:.4f}',
            'Val Accuracy': f'{val_accuracy:.4f}',
            'Epoch Time (s)': f'{epoch_duration:.2f}'
        })

        # Save checkpoint
        if (epoch + 1) % checkpoint_interval == 0:
            checkpoint_path = os.path.join(checkpoint_dir, f'checkpoint_epoch_{epoch+1}_{identifier}.pth')
            os.makedirs(checkpoint_dir, exist_ok=True)
            torch.save(model.state_dict(), checkpoint_path)

    progress_bar.close()

    # Save the loss and accuracy logs
    with open(loss_log_path, 'w') as f:
        json.dump({
            'train_loss': train_loss_log,
            'val_loss': val_loss_log,
            'val_accuracy': val_accuracy_log,
            'epoch_times': epoch_times
        }, f)

    # Plot the loss and accuracy curves
    plot_training_curves(train_loss_log, val_loss_log, val_accuracy_log)

    return train_loss_log, val_loss_log, val_accuracy_log

def plot_training_curves(train_loss_log, val_loss_log, val_accuracy_log):
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(range(1, len(train_loss_log) + 1), train_loss_log, label='Train Loss')
    plt.plot(range(1, len(val_loss_log) + 1), val_loss_log, label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(range(1, len(val_accuracy_log) + 1), val_accuracy_log)
    plt.title('Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')

    plt.tight_layout()
    plt.savefig(training_curves_path)
    plt.close()

def k_fold_cross_validation(dataset, num_folds=5):
    kfold = KFold(n_splits=num_folds, shuffle=True, random_state=42)
    fold_results = []
    fold_times = []
    all_labels = []
    all_predictions = []
    all_train_loss_logs = []
    all_val_loss_logs = []
    all_val_accuracy_logs = []

    for fold, (train_ids, val_ids) in enumerate(kfold.split(dataset), 1):
        print(f"Fold {fold}")
        fold_start_time = time.time()
        
        train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
        val_subsampler = torch.utils.data.SubsetRandomSampler(val_ids)
        
        train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_subsampler)
        val_loader = DataLoader(dataset, batch_size=batch_size, sampler=val_subsampler)
        
        model = models.resnet50(weights=weights)
        model.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(model.fc.in_features, num_classes)
        )
        model.to(device)
        
        train_loss_log, val_loss_log, val_accuracy_log = train_and_save_model(
            model, train_loader, val_loader, num_epochs, checkpoint_interval, 
            os.path.join(checkpoint_dir, f'fold_{fold}')
        )
        
        all_train_loss_logs.append(train_loss_log)
        all_val_loss_logs.append(val_loss_log)
        all_val_accuracy_logs.append(val_accuracy_log)
        
        model.eval()
        correct = 0
        total = 0
        fold_labels = []
        fold_predictions = []
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                fold_labels.extend(labels.cpu().numpy())
                fold_predictions.extend(predicted.cpu().numpy())
        
        accuracy = correct / total
        fold_results.append(accuracy)
        all_labels.extend(fold_labels)
        all_predictions.extend(fold_predictions)
        fold_end_time = time.time()
        fold_duration = fold_end_time - fold_start_time
        fold_times.append(fold_duration)
        print(f"Fold {fold} accuracy: {accuracy:.4f}, Time: {fold_duration:.2f} seconds")
    
    average_accuracy = sum(fold_results) / len(fold_results)
    print(f"Average accuracy across folds: {average_accuracy:.4f}")
    print(f"Average time per fold: {sum(fold_times) / len(fold_times):.2f} seconds")

    # Plot cross-validation results
    plot_cross_validation_results(fold_results, average_accuracy, num_folds)

    # Plot confusion matrix
    plot_confusion_matrix(all_labels, all_predictions)

    # Plot misclassified samples
    plot_misclassified_samples(dataset, all_labels, all_predictions)

    # Generate classification report
    generate_classification_report(all_labels, all_predictions)

    # Calculate and save additional metrics
    calculate_additional_metrics(all_train_loss_logs, all_val_loss_logs)

def plot_cross_validation_results(fold_results, average_accuracy, num_folds):
    plt.figure()
    plt.bar(range(1, num_folds + 1), fold_results, tick_label=[f'Fold {i}' for i in range(1, num_folds + 1)])
    plt.axhline(y=average_accuracy, color='r', linestyle='--', label=f'Average Accuracy: {average_accuracy:.4f}')
    plt.title('Cross-Validation Accuracy')
    plt.xlabel('Fold')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.savefig(cross_validation_path)
    plt.close()

def plot_confusion_matrix(all_labels, all_predictions):
    cm = confusion_matrix(all_labels, all_predictions)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
    disp.plot(cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.savefig(confusion_matrix_path)
    plt.close()

def plot_misclassified_samples(dataset, all_labels, all_predictions):
    misclassified_indices = [i for i, (label, pred) in enumerate(zip(all_labels, all_predictions)) if label != pred]
    if misclassified_indices:
        plt.figure(figsize=(20, 20))
        for i, idx in enumerate(random.sample(misclassified_indices, min(25, len(misclassified_indices)))):
            image, label = dataset[idx]
            plt.subplot(5, 5, i + 1)
            plt.imshow(image.permute(1, 2, 0).numpy())
            plt.title(f'True: {class_names[label]}\nPred: {class_names[all_predictions[idx]]}')
            plt.axis('off')
        plt.tight_layout()
        plt.savefig(misclassified_samples_path)
        plt.close()

def generate_classification_report(all_labels, all_predictions):
    report = classification_report(all_labels, all_predictions, target_names=class_names)
    with open(report_path, 'w') as f:
        f.write(report)
    print(f"Classification report saved to {report_path}")

def calculate_additional_metrics(all_train_loss_logs, all_val_loss_logs):
    avg_train_loss_log = np.mean(all_train_loss_logs, axis=0)
    avg_val_loss_log = np.mean(all_val_loss_logs, axis=0)

    convergence_rate = (avg_train_loss_log[-1] - avg_train_loss_log[0]) / len(avg_train_loss_log)
    overfitting_score = (avg_val_loss_log[-1] - avg_train_loss_log[-1]) / avg_val_loss_log[-1]
    learning_plateau = np.mean(avg_val_loss_log[-5:]) - np.mean(avg_val_loss_log[:5])

    with open(report_path, 'a') as f:
        f.write(f"\nConvergence Rate: {convergence_rate:.4f}\n")
        f.write(f"Overfitting Score: {overfitting_score:.4f}\n")
        f.write(f"Learning Plateau: {learning_plateau:.4f}\n")

# Run k-fold cross-validation
k_fold_cross_validation(dataset)

# Function to predict on a new image
def predict_image(model, image_path, transform):
    model.eval()
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)
    
    with torch.no_grad():
        outputs = model(image)
        probabilities = F.softmax(outputs, dim=1)[0]
        predicted_class = torch.argmax(probabilities).item()
    
    return probabilities, predicted_class

# Load the best model
best_model = models.resnet50(weights=None)
best_model.fc = nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(best_model.fc.in_features, num_classes)
)
best_model.load_state_dict(torch.load(model_save_path))
best_model.to(device)

# Predict on a new image
new_image_probabilities, new_image_class = predict_image(best_model, new_image_path, transform)

# Print and save predictions
predictions = [f"{class_names[i]}: {prob:.2f}" for i, prob in enumerate(new_image_probabilities)]
print(f"Predictions for {new_image_path}:")
print(f"Predicted class: {class_names[new_image_class]}")
print("Class probabilities:")
for pred in predictions:
    print(pred)

with open(predictions_output_file, 'w') as f:
    f.write(f"Predictions for {new_image_path}:\n")
    f.write(f"Predicted class: {class_names[new_image_class]}\n")
    f.write("Class probabilities:\n")
    for pred in predictions:
        f.write(f"{pred}\n")

print(f"Predictions saved to {predictions_output_file}")

# Optional: Visualize the prediction
plt.figure(figsize=(10, 10))
img = Image.open(new_image_path)
plt.imshow(img)
plt.title(f"Predicted: {class_names[new_image_class]}")
plt.axis('off')
plt.show()

print("Script execution completed.")

Using device: mps
Fold 1


AttributeError: module 'torch.optim' has no attribute 'ReduceLROnPlateau'

## Test classification on a different image

In [None]:
import torch
from torchvision import models, transforms
from PIL import Image
import matplotlib.pyplot as plt
import os
import argparse

# Define the classes and model path
class_names = ['Boston', 'Charlotte', 'Manhattan', 'Pittsburgh']
model_path = 'path/to/your/trained-model.pth'  # Update this path

# Set up the device
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

# Define the transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

def load_model():
    # Load the ResNet50 model
    model = models.resnet50(weights=None)
    model.fc = torch.nn.Sequential(
        torch.nn.Dropout(0.5),
        torch.nn.Linear(model.fc.in_features, len(class_names))
    )
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()
    return model

def predict_image(model, image_path):
    image = Image.open(image_path).convert('RGB')
    image_tensor = transform(image).unsqueeze(0).to(device)
    
    with torch.no_grad():
        outputs = model(image_tensor)
        probabilities = torch.nn.functional.softmax(outputs, dim=1)[0]
        predicted_class = torch.argmax(probabilities).item()
    
    return probabilities, predicted_class

def visualize_prediction(image_path, predicted_class, probabilities):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
    
    # Display the image
    img = Image.open(image_path)
    ax1.imshow(img)
    ax1.set_title(f"Predicted: {class_names[predicted_class]}")
    ax1.axis('off')
    
    # Display the probabilities
    bars = ax2.bar(class_names, probabilities.cpu().numpy())
    ax2.set_ylabel('Probability')
    ax2.set_title('Class Probabilities')
    ax2.set_ylim([0, 1])
    
    # Rotate x-axis labels for better readability
    plt.setp(ax2.get_xticklabels(), rotation=45, ha='right')
    
    # Add probability values on top of each bar
    for rect in bars:
        height = rect.get_height()
        ax2.text(rect.get_x() + rect.get_width()/2., height,
                 f'{height:.2f}',
                 ha='center', va='bottom')
    
    plt.tight_layout()
    plt.show()

def main(image_paths):
    model = load_model()
    
    for image_path in image_paths:
        if not os.path.exists(image_path):
            print(f"Image not found: {image_path}")
            continue
        
        probabilities, predicted_class = predict_image(model, image_path)
        
        print(f"\nPredictions for {image_path}:")
        print(f"Predicted class: {class_names[predicted_class]}")
        print("Class probabilities:")
        for i, prob in enumerate(probabilities):
            print(f"{class_names[i]}: {prob:.4f}")
        
        visualize_prediction(image_path, predicted_class, probabilities)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Classify satellite images of cities.")
    parser.add_argument("image_paths", nargs='+', help="Paths to the images to classify")
    args = parser.parse_args()
    
    main(args.image_paths)

In [7]:
### Old Code

In [None]:
# Temp. overwrite new image for testing purposes
new_image_path = '../data/nc-charlotte/buildings/buildings_131.jpg'

# Load model weights
model.load_state_dict(torch.load(model_save_path, weights_only=True))

# Function to classify a new image
def classify_new_image(image_path, model, transform):
    model.eval()
    input_image = Image.open(image_path)
    input_tensor = transform(input_image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(input_tensor)
        probabilities = F.softmax(output, dim=1)
        probabilities = probabilities.cpu().numpy().flatten()

    predictions = [(class_names[i], prob * 100) for i, prob in enumerate(probabilities)]
    predictions.sort(key=lambda x: x[1], reverse=True)
    return predictions, input_image

with tqdm(total=1, desc="Classifying new image", leave=False) as pbar:
    predictions, input_image = classify_new_image(new_image_path, model, transform)
    pbar.update(1)

# Display the image
display(input_image)

# Save predictions to a file
with open(predictions_output_file, 'w') as f:
    f.write(f'Image path: {new_image_path}\n')  # Write the image path
    for label, percentage in predictions:
        f.write(f'Predicted class: {label}, Confidence: {percentage:.2f}%\n')
        print(f'Predicted class: {label}, Confidence: {percentage:.2f}%')

# Optionally, display the predictions in the notebook
print(f'Image path: {new_image_path}')
#for label, percentage in predictions:
#    print(f'Predicted class: {label}, Confidence: {percentage:.2f}%')
