### Imports and Setup

In [1]:
import torch
from torchvision import models, transforms
from torch.utils.data import DataLoader, ConcatDataset
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision.datasets import ImageFolder
from PIL import Image, ImageFile
import os
import numpy as np
from tqdm.auto import tqdm
import json
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score
import seaborn as sns
import random
from scipy.stats import linregress
import requests
import tempfile
import gc
import contextlib

# Parameters
identifier = 'softmax-v28-prevent-overfitting'
class_names = ['Boston', 'Charlotte', 'Manhattan', 'Pittsburgh']
base_folder = '../data'
folders = {
    'Boston': os.path.join(base_folder, 'ma-boston/buildings'),
    'Charlotte': os.path.join(base_folder, 'nc-charlotte/buildings'),
    'Manhattan': os.path.join(base_folder, 'ny-manhattan/buildings'),
    'Pittsburgh': os.path.join(base_folder, 'pa-pittsburgh/buildings')
}
output_folder = 'softmax-output'
normalize_mean = [0.485, 0.456, 0.406]
normalize_std = [0.229, 0.224, 0.225]
batch_size = 32
num_classes = len(class_names)
num_epochs = 10
learning_rate = 0.001
weight_decay = 1e-4
checkpoint_interval = 5
checkpoint_dir = os.path.join(output_folder, f'checkpoints-{identifier}')
model_save_path = os.path.join(output_folder, f'trained-model-{identifier}.pth')
loss_log_path = os.path.join(output_folder, f'loss-log-{identifier}.json')
new_image_path = os.path.join(base_folder, 'ny-brooklyn', 'buildings_1370.jpg')
predictions_output_file = os.path.join(output_folder, f'predictions-{identifier}.txt')

os.makedirs(output_folder, exist_ok=True)

# Set device
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Using device: {device}")

def download_file_with_progress(url, filename):
    response = requests.get(url, stream=True)
    total_size = int(response.headers.get('content-length', 0))
    block_size = 1024  # 1 KB
    with open(filename, 'wb') as file, tqdm(
        desc=filename,
        total=total_size,
        unit='iB',
        unit_scale=True,
        unit_divisor=1024,
    ) as progress_bar:
        for data in response.iter_content(block_size):
            size = file.write(data)
            progress_bar.update(size)

def download_pretrained_weights():
    print("Downloading pre-trained weights...")
    url = "https://download.pytorch.org/models/resnet18-5c106cde.pth"
    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
        download_file_with_progress(url, temp_file.name)
        state_dict = torch.load(temp_file.name, map_location='cpu')
    os.unlink(temp_file.name)
    return state_dict

# Download weights
pretrained_weights = download_pretrained_weights()

print("Setup completed. Pre-trained weights downloaded and ready for use.")

Using device: mps
Downloading pre-trained weights...


/var/folders/9g/r0ctqhfj26l910sgbwcdndq00000gn/T/tmpi1umpwc4:   0%|          | 0.00/44.7M [00:00<?, ?iB/s]

Setup completed. Pre-trained weights downloaded and ready for use.


  state_dict = torch.load(temp_file.name, map_location='cpu')


### Dataset and Model Definition

In [2]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch
import os
from PIL import Image, ImageFile
from tqdm.auto import tqdm
import contextlib

ImageFile.LOAD_TRUNCATED_IMAGES = True

print("Defining data transformations...")
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=normalize_mean, std=normalize_std)
])

class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label = self.labels[idx]
        
        try:
            with open(image_path, 'rb') as f:
                img = Image.open(f).convert('RGB')
                if self.transform:
                    img = self.transform(img)
                return img, label
        except Exception as e:
            print(f"Error loading image {image_path}: {str(e)}")
            return torch.zeros((3, 224, 224)), label

print("Preparing dataset...")
all_image_paths = []
all_labels = []
class_to_idx = {}
skipped_files = []

print("Folder paths:")
for class_name, folder in folders.items():
    print(f"{class_name}: {folder}")
    if not os.path.exists(folder):
        print(f"Warning: Folder does not exist: {folder}")
    elif not os.path.isdir(folder):
        print(f"Warning: Path is not a directory: {folder}")

with contextlib.closing(tqdm(folders.items(), desc="Loading class folders")) as pbar:
    for class_name, folder in pbar:
        if not os.path.isdir(folder):
            print(f"Warning: Folder not found or not a directory: {folder}")
            continue
        class_idx = len(class_to_idx)
        class_to_idx[class_name] = class_idx
        class_images = [os.path.join(folder, img_name) for img_name in os.listdir(folder) if os.path.isfile(os.path.join(folder, img_name)) and img_name.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
        all_image_paths.extend(class_images)
        all_labels.extend([class_idx] * len(class_images))
        print(f"Loaded {len(class_images)} images for class {class_name}")
        if len(class_images) == 0:
            print(f"Contents of {folder}:")
            for item in os.listdir(folder):
                print(f"  {item}")

print(f"Dataset prepared with {len(all_image_paths)} images")

if len(all_image_paths) == 0:
    raise ValueError("No images found in the specified folders. Please check your folder paths and make sure they contain image files.")

print("Creating dataset...")
full_dataset = CustomDataset(all_image_paths, all_labels, transform=transform)

print("Splitting dataset into train and test sets...")
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])

print("Creating DataLoaders...")
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

class CustomResNet18(nn.Module):
    def __init__(self, num_classes, pretrained_weights):
        super(CustomResNet18, self).__init__()
        self.resnet = models.resnet18(weights=None)
        self.resnet.load_state_dict(pretrained_weights)
        self.resnet.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(self.resnet.fc.in_features, num_classes)
        )
    
    def forward(self, x):
        return self.resnet(x)

print("Initializing the model...")
model = CustomResNet18(len(class_to_idx), pretrained_weights).to(device)

print(f"Dataset created with {len(full_dataset)} images")
print(f"Training set: {len(train_dataset)} images")
print(f"Test set: {len(test_dataset)} images")
print(f"Model initialized and moved to device: {device}")
print(f"Train DataLoader created with {len(train_loader)} batches")
print(f"Test DataLoader created with {len(test_loader)} batches")

# Export skipped files list
skipped_files_path = os.path.join(output_folder, f'skipped_files-{identifier}.txt')
with open(skipped_files_path, 'w') as f:
    for path, reason in skipped_files:
        f.write(f"{path}: {reason}\n")
print(f"List of skipped files exported to: {skipped_files_path}")

print("Clearing memory...")
gc.collect()
torch.cuda.empty_cache() if torch.cuda.is_available() else None

print("Section 2 completed successfully")

print("Testing data loading...")
with contextlib.closing(tqdm(train_loader, desc="Testing batches", total=min(5, len(train_loader)))) as pbar:
    for i, (images, labels) in enumerate(pbar):
        print(f"Batch {i+1}: Images shape: {images.shape}, Labels shape: {labels.shape}")
        if i == 4:  # Test first 5 batches
            break
print("Data loading test completed")

Defining data transformations...
Preparing dataset...
Folder paths:
Boston: ../data/ma-boston/buildings
Charlotte: ../data/nc-charlotte/buildings
Manhattan: ../data/ny-manhattan/buildings
Pittsburgh: ../data/pa-pittsburgh/buildings


Loading class folders:   0%|          | 0/4 [00:00<?, ?it/s]

Loaded 24995 images for class Boston
Loaded 24995 images for class Charlotte
Loaded 25064 images for class Manhattan
Loaded 24998 images for class Pittsburgh
Dataset prepared with 100052 images
Creating dataset...
Splitting dataset into train and test sets...
Creating DataLoaders...
Initializing the model...
Dataset created with 100052 images
Training set: 80041 images
Test set: 20011 images
Model initialized and moved to device: mps
Train DataLoader created with 2502 batches
Test DataLoader created with 626 batches
List of skipped files exported to: softmax-output/skipped_files-softmax-v28-prevent-overfitting.txt
Clearing memory...
Section 2 completed successfully
Testing data loading...


Testing batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batch 1: Images shape: torch.Size([32, 3, 224, 224]), Labels shape: torch.Size([32])
Batch 2: Images shape: torch.Size([32, 3, 224, 224]), Labels shape: torch.Size([32])
Batch 3: Images shape: torch.Size([32, 3, 224, 224]), Labels shape: torch.Size([32])
Batch 4: Images shape: torch.Size([32, 3, 224, 224]), Labels shape: torch.Size([32])
Batch 5: Images shape: torch.Size([32, 3, 224, 224]), Labels shape: torch.Size([32])
Data loading test completed


### Training and Evaluation

In [3]:
def train_with_early_stopping(model, train_loader, val_loader, num_epochs, patience=5):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)
    
    best_val_loss = float('inf')
    epochs_without_improvement = 0
    train_loss_log, val_loss_log, val_accuracy_log = [], [], []

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        total_samples = 0
        with contextlib.closing(tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")) as pbar:
            for images, labels in pbar:
                if images.numel() == 0:
                    continue
                images, labels = images.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                train_loss += loss.item() * images.size(0)
                total_samples += images.size(0)
                pbar.set_postfix({'train_loss': loss.item()})
        
        train_loss /= total_samples
        train_loss_log.append(train_loss)

        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            with contextlib.closing(tqdm(val_loader, desc="Validating", leave=False)) as pbar:
                for images, labels in pbar:
                    if images.numel() == 0:
                        continue
                    images, labels = images.to(device), labels.to(device)
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item() * images.size(0)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
                    pbar.set_postfix({'val_loss': loss.item()})

        val_loss /= total
        val_accuracy = correct / total if total > 0 else 0
        val_loss_log.append(val_loss)
        val_accuracy_log.append(val_accuracy)

        print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

        scheduler.step(val_loss)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_without_improvement = 0
            torch.save(model.state_dict(), model_save_path)
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= patience:
                print(f'Early stopping triggered after epoch {epoch+1}')
                model.load_state_dict(torch.load(model_save_path))
                break

    # Save the loss and accuracy logs
    with open(loss_log_path, 'w') as f:
        json.dump({
            'train_loss': train_loss_log,
            'val_loss': val_loss_log,
            'val_accuracy': val_accuracy_log
        }, f)

    # Plot the loss and accuracy curves
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(train_loss_log, label='Train Loss')
    plt.plot(val_loss_log, label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(val_accuracy_log)
    plt.title('Validation Accuracy')
    plt.tight_layout()
    plt.savefig(os.path.join(output_folder, f'training_curves-{identifier}.png'))
    plt.close()

    return val_accuracy_log[-1]

def evaluate_model(model, data_loader):
    model.eval()
    correct = 0
    total = 0
    class_correct = list(0. for i in range(num_classes))
    class_total = list(0. for i in range(num_classes))
    
    with torch.no_grad():
        with contextlib.closing(tqdm(data_loader, desc="Evaluating")) as pbar:
            for images, labels in pbar:
                if images.numel() == 0:
                    continue
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                
                c = (predicted == labels).squeeze()
                for i in range(len(labels)):
                    label = labels[i]
                    class_correct[label] += c[i].item()
                    class_total[label] += 1

    accuracy = 100 * correct / total if total > 0 else 0
    print(f'Overall accuracy: {accuracy:.2f}%')
    
    for i in range(num_classes):
        class_accuracy = 100 * class_correct[i] / class_total[i] if class_total[i] > 0 else 0
        print(f'Accuracy of {class_names[i]}: {class_accuracy:.2f}%')
    
    return accuracy

### Analysis

In [4]:
def analyze_cross_validation(fold_results):
    avg_accuracy = np.mean(fold_results)
    std_accuracy = np.std(fold_results)
    
    print(f"Cross-validation results:")
    print(f"Average accuracy: {avg_accuracy:.4f}")
    print(f"Standard deviation: {std_accuracy:.4f}")
    
    plt.figure(figsize=(10, 6))
    plt.bar(range(1, len(fold_results) + 1), fold_results)
    plt.axhline(y=avg_accuracy, color='r', linestyle='--', label='Average')
    plt.title('Cross-validation Accuracy per Fold')
    plt.xlabel('Fold')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.savefig(os.path.join(output_folder, f'cross_validation_results-{identifier}.png'))
    plt.close()

def error_analysis(model, test_loader):
    model.eval()
    all_preds = []
    all_labels = []
    misclassified_images = []
    
    with torch.no_grad():
        with contextlib.closing(tqdm(test_loader, desc="Analyzing errors")) as pbar:
            for images, labels in pbar:
                if images.numel() == 0:
                    continue
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, preds = torch.max(outputs, 1)
                
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
                
                misclassified = (preds != labels).nonzero().squeeze()
                for idx in misclassified:
                    misclassified_images.append((images[idx].cpu(), labels[idx].item(), preds[idx].item()))

    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.savefig(os.path.join(output_folder, f'confusion_matrix-{identifier}.png'))
    plt.close()

    fig, axes = plt.subplots(3, 3, figsize=(15, 15))
    for i, ax in enumerate(axes.flat):
        if i < len(misclassified_images):
            img, true_label, pred_label = misclassified_images[i]
            ax.imshow(img.permute(1, 2, 0))
            ax.set_title(f'True: {class_names[true_label]}\nPred: {class_names[pred_label]}')
        ax.axis('off')
    plt.tight_layout()
    plt.savefig(os.path.join(output_folder, f'misclassified_samples-{identifier}.png'))
    plt.close()

def compare_with_baseline(train_data, test_data, model_accuracy):
    X_train = []
    y_train = []
    X_test = []
    y_test = []

    for images, labels in tqdm(train_data, desc="Preparing train data"):
        if images.numel() > 0:
            X_train.append(images.view(images.size(0), -1).cpu().numpy())
            y_train.append(labels.cpu().numpy())
    
    for images, labels in tqdm(test_data, desc="Preparing test data"):
        if images.numel() > 0:
            X_test.append(images.view(images.size(0), -1).cpu().numpy())
            y_test.append(labels.cpu().numpy())

    X_train = np.concatenate(X_train)
    y_train = np.concatenate(y_train)
    X_test = np.concatenate(X_test)
    y_test = np.concatenate(y_test)

    print(f"Train data shape: {X_train.shape}, Train labels shape: {y_train.shape}")
    print(f"Test data shape: {X_test.shape}, Test labels shape: {y_test.shape}")

    baseline_model = DummyClassifier(strategy='stratified')
    baseline_model.fit(X_train, y_train)
    baseline_preds = baseline_model.predict(X_test)
    baseline_accuracy = accuracy_score(y_test, baseline_preds) * 100

    print(f"Baseline model accuracy: {baseline_accuracy:.2f}%")
    print(f"Improvement over baseline: {model_accuracy - baseline_accuracy:.2f}%")

def predict_image(model, image_path, transform):
    model.eval()
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)
    
    with torch.no_grad():
        outputs = model(image)
        probabilities = F.softmax(outputs, dim=1)[0]
        predicted_class = torch.argmax(probabilities).item()
    
    return probabilities.cpu().numpy(), predicted_class

### Main execution

In [5]:
if __name__ == "__main__":
    try:
        # Train the model
        print("Training the model...")
        final_accuracy = train_with_early_stopping(model, train_loader, test_loader, num_epochs)
        
        # Evaluate on test set
        print("Evaluating on test set...")
        test_accuracy = evaluate_model(model, test_loader)
        
        # Perform error analysis
        print("Performing error analysis...")
        error_analysis(model, test_loader)
        
        # Compare with baseline
        print("Comparing with baseline model...")
        compare_with_baseline(train_loader, test_loader, test_accuracy)
        
        # Predict on a new image
        print(f"Predicting on new image: {new_image_path}")
        new_image_probabilities, new_image_class = predict_image(model, new_image_path, transform)
        
        # Print and save predictions
        predictions = [f"{class_names[i]}: {prob:.2f}" for i, prob in enumerate(new_image_probabilities)]
        print(f"Predictions for {new_image_path}:")
        print(f"Predicted class: {class_names[new_image_class]}")
        print("Class probabilities:")
        for pred in predictions:
            print(pred)
        
        with open(predictions_output_file, 'w') as f:
            f.write(f"Predictions for {new_image_path}:\n")
            f.write(f"Predicted class: {class_names[new_image_class]}\n")
            f.write("Class probabilities:\n")
            for pred in predictions:
                f.write(f"{pred}\n")
        
        print(f"Predictions saved to {predictions_output_file}")
        print("Script execution completed successfully.")

    except Exception as e:
        print(f"An error occurred during execution: {str(e)}")
        import traceback
        traceback.print_exc()

    finally:
        # Clear memory
        print("Clearing memory...")
        gc.collect()
        torch.cuda.empty_cache() if torch.cuda.is_available() else None

Training the model...


Epoch 1/2:   0%|          | 0/2502 [00:00<?, ?it/s]

Validating:   0%|          | 0/626 [00:00<?, ?it/s]

Epoch 1: Train Loss: 0.0695, Val Loss: 0.1013, Val Accuracy: 0.9694


Epoch 2/2:   0%|          | 0/2502 [00:00<?, ?it/s]

Validating:   0%|          | 0/626 [00:00<?, ?it/s]

Epoch 2: Train Loss: 0.0336, Val Loss: 0.0066, Val Accuracy: 0.9981
Evaluating on test set...


Evaluating:   0%|          | 0/626 [00:00<?, ?it/s]

Overall accuracy: 99.81%
Accuracy of Boston: 99.88%
Accuracy of Charlotte: 100.00%
Accuracy of Manhattan: 99.56%
Accuracy of Pittsburgh: 99.80%
Performing error analysis...


Analyzing errors:   0%|          | 0/626 [00:00<?, ?it/s]

An error occurred during execution: iteration over a 0-d tensor
Clearing memory...


Traceback (most recent call last):
  File "/var/folders/9g/r0ctqhfj26l910sgbwcdndq00000gn/T/ipykernel_91796/609564052.py", line 13, in <module>
    error_analysis(model, test_loader)
  File "/var/folders/9g/r0ctqhfj26l910sgbwcdndq00000gn/T/ipykernel_91796/4080706123.py", line 38, in error_analysis
    for idx in misclassified:
  File "/opt/homebrew/lib/python3.11/site-packages/torch/_tensor.py", line 1043, in __iter__
    raise TypeError("iteration over a 0-d tensor")
TypeError: iteration over a 0-d tensor
