In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

# **Base line code**

In [None]:
# ======================
# 1. Data preparation
# ======================
data_dir = "/content/drive/MyDrive/chest_xray"

# Transform: resize, convert to tensor, normalize as ImageNet pretrained model expects
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Load datasets (train, val, test folders already exist in Kaggle dataset)
train_dataset = datasets.ImageFolder(root=f"{data_dir}/train", transform=transform)
val_dataset   = datasets.ImageFolder(root=f"{data_dir}/val", transform=transform)
test_dataset  = datasets.ImageFolder(root=f"{data_dir}/test", transform=transform)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

In [None]:
# ======================
# 2. Model definition
# ======================
num_classes = 2  # Normal vs Pneumonia
model = models.resnet18(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)



In [None]:
# ======================
# 3. Loss and optimizer
# ======================
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [None]:

# ======================
# 4. Training and evaluation functions
# ======================
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    return running_loss / len(loader), correct / total

def evaluate(model, loader, criterion):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return running_loss / len(loader), correct / total


In [None]:
# ======================
# 5. Training loop
# ======================
num_epochs = 5
for epoch in range(num_epochs):
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
    val_loss, val_acc = evaluate(model, val_loader, criterion)
    print(f"Epoch {epoch+1}: "
          f"Train Loss={train_loss:.4f}, Train Acc={train_acc:.4f}, "
          f"Val Loss={val_loss:.4f}, Val Acc={val_acc:.4f}")

Epoch 1: Train Loss=0.2623, Train Acc=0.8737, Val Loss=2.1448, Val Acc=0.5000
Epoch 2: Train Loss=0.0790, Train Acc=0.9785, Val Loss=1.9070, Val Acc=0.5625
Epoch 3: Train Loss=0.0410, Train Acc=0.9866, Val Loss=1.6307, Val Acc=0.5625
Epoch 4: Train Loss=0.0187, Train Acc=0.9973, Val Loss=1.1242, Val Acc=0.6875
Epoch 5: Train Loss=0.0140, Train Acc=0.9987, Val Loss=1.4925, Val Acc=0.6250


In [None]:
# ======================
# 6. Final test evaluation with Detailed Metrics
# ======================

# Get detailed predictions for baseline model
def evaluate_with_predictions(model, loader, criterion):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0
    all_preds, all_labels = [], []
    
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
            
            # Store predictions and labels
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    return running_loss / len(loader), correct / total, all_preds, all_labels

# Evaluate baseline model with detailed metrics
test_loss, test_acc, test_preds_baseline, test_labels_baseline = evaluate_with_predictions(model, test_loader, criterion)

print("=" * 60)
print("BASELINE MODEL RESULTS")
print("=" * 60)
print(f"Final Test: Loss={test_loss:.4f}, Acc={test_acc:.4f}")
print()

# Import necessary libraries for detailed metrics
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Class names
class_names = ['Normal', 'Pneumonia']

# Detailed classification report
print("Classification Report:")
print(classification_report(test_labels_baseline, test_preds_baseline, target_names=class_names))

# Confusion Matrix
cm_baseline = confusion_matrix(test_labels_baseline, test_preds_baseline)
plt.figure(figsize=(8, 6))
sns.heatmap(cm_baseline, annot=True, fmt='d', cmap='Reds', 
            xticklabels=class_names, yticklabels=class_names)
plt.title('Baseline Model - Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Calculate per-class metrics
precision_baseline, recall_baseline, f1_baseline, support_baseline = precision_recall_fscore_support(test_labels_baseline, test_preds_baseline)

print("\nPer-class Metrics:")
for i, class_name in enumerate(class_names):
    print(f"{class_name}:")
    print(f"  Precision: {precision_baseline[i]:.4f}")
    print(f"  Recall:    {recall_baseline[i]:.4f}")
    print(f"  F1-score:  {f1_baseline[i]:.4f}")
    print(f"  Support:   {support_baseline[i]}")
    print()

Final Test: Loss=1.0738, Acc=0.7228


# **Improved Model Implementation**
以下は、Project Deliverableの要件に沿って精度を改善したモデルです。Baselineコードは一切変更しません。

In [None]:
# ======================
# Improved Model 1: Transfer Learning with Pretrained Weights
# ======================
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Improved model with pretrained weights
num_classes_improved = 2  # Keep same as baseline for fair comparison
model_improved = models.resnet18(pretrained=True)  # Use pretrained weights!
model_improved.fc = nn.Linear(model_improved.fc.in_features, num_classes_improved)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_improved = model_improved.to(device)

print("Improved Model 1: ResNet-18 with ImageNet pretrained weights")

In [None]:
# ======================
# Improved Data Augmentation for Training
# ======================

# Enhanced transforms with data augmentation for training
train_transform_improved = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Keep validation and test transforms same as baseline
val_test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Create improved datasets with augmentation
train_dataset_improved = datasets.ImageFolder(root=f"{data_dir}/train", transform=train_transform_improved)
val_dataset_improved = datasets.ImageFolder(root=f"{data_dir}/val", transform=val_test_transform)
test_dataset_improved = datasets.ImageFolder(root=f"{data_dir}/test", transform=val_test_transform)

# Create data loaders with optimized batch size
train_loader_improved = DataLoader(train_dataset_improved, batch_size=16, shuffle=True, num_workers=2)
val_loader_improved = DataLoader(val_dataset_improved, batch_size=16, shuffle=False, num_workers=2)
test_loader_improved = DataLoader(test_dataset_improved, batch_size=16, shuffle=False, num_workers=2)

print("Improved data loaders created with augmentation and optimized batch size")

In [None]:
# ======================
# Improved Loss and Optimizer with Learning Rate Scheduling
# ======================

# Calculate class weights for imbalanced dataset
def calculate_class_weights(dataset):
    class_counts = {}
    for _, label in dataset:
        class_counts[label] = class_counts.get(label, 0) + 1
    
    total_samples = len(dataset)
    class_weights = []
    for i in range(len(class_counts)):
        weight = total_samples / (len(class_counts) * class_counts[i])
        class_weights.append(weight)
    
    return torch.FloatTensor(class_weights)

# Calculate class weights
class_weights = calculate_class_weights(train_dataset_improved)
print(f"Class weights: {class_weights}")

# Improved loss with class weighting
criterion_improved = nn.CrossEntropyLoss(weight=class_weights.to(device))

# Improved optimizer with different learning rate
optimizer_improved = optim.Adam(model_improved.parameters(), lr=1e-3, weight_decay=1e-4)

# Learning rate scheduler (removed verbose parameter as it's not supported in some PyTorch versions)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer_improved, mode='min', factor=0.5, patience=3)

print("Improved loss, optimizer, and scheduler initialized")

In [None]:
# ======================
# Improved Training Functions with Detailed Metrics
# ======================

def train_one_epoch_improved(model, loader, optimizer, criterion):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    all_preds, all_labels = [], []
    
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
        
        # Store predictions and labels for detailed metrics
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
    
    return running_loss / len(loader), correct / total, all_preds, all_labels

def evaluate_improved(model, loader, criterion):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0
    all_preds, all_labels = [], []
    
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
            
            # Store predictions and labels for detailed metrics
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    return running_loss / len(loader), correct / total, all_preds, all_labels

print("Improved training and evaluation functions defined")

In [None]:
# ======================
# Improved Training Loop with Early Stopping
# ======================

# Training history tracking
train_losses, train_accs = [], []
val_losses, val_accs = [], []
best_val_acc = 0.0
patience_counter = 0
patience = 5

num_epochs_improved = 15
print("Starting improved model training...")

for epoch in range(num_epochs_improved):
    # Training
    train_loss, train_acc, train_preds, train_labels = train_one_epoch_improved(
        model_improved, train_loader_improved, optimizer_improved, criterion_improved)
    
    # Validation
    val_loss, val_acc, val_preds, val_labels = evaluate_improved(
        model_improved, val_loader_improved, criterion_improved)
    
    # Learning rate scheduling
    scheduler.step(val_loss)
    
    # Track history
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    val_losses.append(val_loss)
    val_accs.append(val_acc)
    
    print(f"Epoch {epoch+1}/{num_epochs_improved}: "
          f"Train Loss={train_loss:.4f}, Train Acc={train_acc:.4f}, "
          f"Val Loss={val_loss:.4f}, Val Acc={val_acc:.4f}")
    
    # Early stopping
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        patience_counter = 0
        # Save best model
        torch.save(model_improved.state_dict(), 'best_model_improved.pth')
        print(f"New best validation accuracy: {best_val_acc:.4f}")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"Early stopping triggered after {epoch+1} epochs")
            break

print(f"Training completed. Best validation accuracy: {best_val_acc:.4f}")

In [None]:
# ======================
# Load Best Model and Final Evaluation with Detailed Metrics
# ======================

# Load the best model
model_improved.load_state_dict(torch.load('best_model_improved.pth'))

# Final test evaluation with detailed metrics
test_loss_improved, test_acc_improved, test_preds, test_labels = evaluate_improved(
    model_improved, test_loader_improved, criterion_improved)

print("=" * 60)
print("IMPROVED MODEL RESULTS")
print("=" * 60)
print(f"Final Test: Loss={test_loss_improved:.4f}, Acc={test_acc_improved:.4f}")
print()

# Detailed classification report
class_names = ['Normal', 'Pneumonia']  # Adjust based on your dataset classes
print("Classification Report:")
print(classification_report(test_labels, test_preds, target_names=class_names))

# Confusion Matrix
cm = confusion_matrix(test_labels, test_preds)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=class_names, yticklabels=class_names)
plt.title('Improved Model - Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Calculate per-class metrics
from sklearn.metrics import precision_recall_fscore_support
precision, recall, f1, support = precision_recall_fscore_support(test_labels, test_preds)

print("\nPer-class Metrics:")
for i, class_name in enumerate(class_names):
    print(f"{class_name}:")
    print(f"  Precision: {precision[i]:.4f}")
    print(f"  Recall:    {recall[i]:.4f}")
    print(f"  F1-score:  {f1[i]:.4f}")
    print(f"  Support:   {support[i]}")
    print()

In [None]:
# ======================
# Training History Visualization
# ======================

# Plot training history
plt.figure(figsize=(15, 5))

# Plot 1: Loss
plt.subplot(1, 3, 1)
plt.plot(train_losses, label='Training Loss', color='blue')
plt.plot(val_losses, label='Validation Loss', color='red')
plt.title('Model Loss Over Time')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

# Plot 2: Accuracy
plt.subplot(1, 3, 2)
plt.plot(train_accs, label='Training Accuracy', color='blue')
plt.plot(val_accs, label='Validation Accuracy', color='red')
plt.title('Model Accuracy Over Time')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

# Plot 3: Learning Rate (if we can access it)
plt.subplot(1, 3, 3)
current_lr = optimizer_improved.param_groups[0]['lr']
plt.axhline(y=current_lr, color='green', linestyle='--', label=f'Final LR: {current_lr:.6f}')
plt.title('Learning Rate')
plt.xlabel('Epoch')
plt.ylabel('Learning Rate')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

print("Training visualizations completed")

# **Comparison: Baseline vs Improved Model**

以下のセルで、ベースラインモデルと改善されたモデルの性能を比較します。

In [None]:
# ======================
# Detailed Model Comparison with Actual Results
# ======================

print("=" * 80)
print("DETAILED MODEL PERFORMANCE COMPARISON")
print("=" * 80)

print("BASELINE MODEL (ResNet-18, No Pretrained Weights):")
print("- Architecture: ResNet-18")
print("- Pretrained: No")
print("- Data Augmentation: None")
print("- Learning Rate: 1e-4")
print("- Batch Size: 32")
print("- Epochs: 5")
print("- Class Weighting: No")
print("- Early Stopping: No")
print(f"- Test Accuracy: {test_acc:.4f}")
print(f"- Test Loss: {test_loss:.4f}")
print()

print("IMPROVED MODEL (ResNet-18, Pretrained + Enhancements):")
print("- Architecture: ResNet-18")
print("- Pretrained: Yes (ImageNet)")
print("- Data Augmentation: Yes (Crop, Flip, Rotation, ColorJitter)")
print("- Learning Rate: 1e-3 with ReduceLROnPlateau")
print("- Batch Size: 16")
print("- Epochs: Up to 15 (with early stopping)")
print("- Class Weighting: Yes")
print("- Early Stopping: Yes (patience=5)")
print(f"- Test Accuracy: {test_acc_improved:.4f}")
print(f"- Test Loss: {test_loss_improved:.4f}")
print()

# Calculate improvement
acc_improvement = test_acc_improved - test_acc
acc_improvement_percent = (acc_improvement / test_acc) * 100

print("PERFORMANCE IMPROVEMENT:")
print(f"- Accuracy Improvement: {acc_improvement:+.4f} ({acc_improvement_percent:+.2f}%)")
print(f"- Loss Improvement: {test_loss - test_loss_improved:+.4f}")
print()

print("DETAILED METRICS COMPARISON:")
print("-" * 50)
print(f"{'Metric':<15} {'Baseline':<12} {'Improved':<12} {'Improvement':<12}")
print("-" * 50)
print(f"{'Accuracy':<15} {test_acc:<12.4f} {test_acc_improved:<12.4f} {acc_improvement:+.4f}")
print(f"{'Loss':<15} {test_loss:<12.4f} {test_loss_improved:<12.4f} {test_loss_improved - test_loss:+.4f}")

# Per-class comparison
print("\nPER-CLASS METRICS COMPARISON:")
print("-" * 80)
for i, class_name in enumerate(class_names):
    print(f"\n{class_name.upper()}:")
    print(f"{'Metric':<12} {'Baseline':<12} {'Improved':<12} {'Improvement':<12}")
    print("-" * 50)
    
    # Calculate improvements for this class
    prec_imp = precision[i] - precision_baseline[i]
    rec_imp = recall[i] - recall_baseline[i]
    f1_imp = f1[i] - f1_baseline[i]
    
    print(f"{'Precision':<12} {precision_baseline[i]:<12.4f} {precision[i]:<12.4f} {prec_imp:+.4f}")
    print(f"{'Recall':<12} {recall_baseline[i]:<12.4f} {recall[i]:<12.4f} {rec_imp:+.4f}")
    print(f"{'F1-score':<12} {f1_baseline[i]:<12.4f} {f1[i]:<12.4f} {f1_imp:+.4f}")
    print(f"{'Support':<12} {support_baseline[i]:<12} {support[i]:<12} {support[i] - support_baseline[i]:+}")

print("\n" + "=" * 80)
print("KEY IMPROVEMENTS IMPLEMENTED:")
improvements = [
    "✅ Transfer Learning (ImageNet pretrained weights)",
    "✅ Data Augmentation (Random transforms for training)",
    "✅ Class-weighted Loss (Handle imbalanced dataset)",
    "✅ Learning Rate Scheduling (ReduceLROnPlateau)",
    "✅ Early Stopping (Prevent overfitting)",
    "✅ Optimized Batch Size (Better memory utilization)",
    "✅ Detailed Evaluation Metrics (Precision, Recall, F1-score)",
    "✅ Confusion Matrix Visualization",
    "✅ Training History Tracking",
    "✅ Model Checkpointing (Save best model)"
]

for i, improvement in enumerate(improvements, 1):
    print(f"{i:2d}. {improvement}")

print("=" * 80)

In [None]:
# ======================
# Visual Comparison of Models
# ======================

# Create side-by-side comparison plots
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
fig.suptitle('Baseline vs Improved Model Comparison', fontsize=16, fontweight='bold')

# 1. Confusion Matrices Comparison
axes[0, 0].set_title('Baseline - Confusion Matrix', fontsize=12, fontweight='bold')
sns.heatmap(cm_baseline, annot=True, fmt='d', cmap='Reds', 
            xticklabels=class_names, yticklabels=class_names, ax=axes[0, 0])
axes[0, 0].set_ylabel('True Label')
axes[0, 0].set_xlabel('Predicted Label')

axes[0, 1].set_title('Improved - Confusion Matrix', fontsize=12, fontweight='bold')
cm_improved = confusion_matrix(test_labels, test_preds)
sns.heatmap(cm_improved, annot=True, fmt='d', cmap='Blues', 
            xticklabels=class_names, yticklabels=class_names, ax=axes[0, 1])
axes[0, 1].set_ylabel('True Label')
axes[0, 1].set_xlabel('Predicted Label')

# 2. Accuracy Comparison Bar Chart
models = ['Baseline', 'Improved']
accuracies = [test_acc, test_acc_improved]
colors = ['red', 'blue']

bars = axes[0, 2].bar(models, accuracies, color=colors, alpha=0.7)
axes[0, 2].set_title('Overall Accuracy Comparison', fontsize=12, fontweight='bold')
axes[0, 2].set_ylabel('Accuracy')
axes[0, 2].set_ylim(0, 1)

# Add value labels on bars
for bar, acc in zip(bars, accuracies):
    height = bar.get_height()
    axes[0, 2].text(bar.get_x() + bar.get_width()/2., height + 0.01,
                    f'{acc:.4f}', ha='center', va='bottom', fontweight='bold')

# 3. Per-class Precision Comparison
x = np.arange(len(class_names))
width = 0.35

prec_baseline_plot = axes[1, 0].bar(x - width/2, precision_baseline, width, 
                                    label='Baseline', color='red', alpha=0.7)
prec_improved_plot = axes[1, 0].bar(x + width/2, precision, width, 
                                    label='Improved', color='blue', alpha=0.7)

axes[1, 0].set_title('Precision Comparison by Class', fontsize=12, fontweight='bold')
axes[1, 0].set_ylabel('Precision')
axes[1, 0].set_xlabel('Class')
axes[1, 0].set_xticks(x)
axes[1, 0].set_xticklabels(class_names)
axes[1, 0].legend()
axes[1, 0].set_ylim(0, 1)

# 4. Per-class Recall Comparison
rec_baseline_plot = axes[1, 1].bar(x - width/2, recall_baseline, width, 
                                   label='Baseline', color='red', alpha=0.7)
rec_improved_plot = axes[1, 1].bar(x + width/2, recall, width, 
                                   label='Improved', color='blue', alpha=0.7)

axes[1, 1].set_title('Recall Comparison by Class', fontsize=12, fontweight='bold')
axes[1, 1].set_ylabel('Recall')
axes[1, 1].set_xlabel('Class')
axes[1, 1].set_xticks(x)
axes[1, 1].set_xticklabels(class_names)
axes[1, 1].legend()
axes[1, 1].set_ylim(0, 1)

# 5. Per-class F1-score Comparison
f1_baseline_plot = axes[1, 2].bar(x - width/2, f1_baseline, width, 
                                  label='Baseline', color='red', alpha=0.7)
f1_improved_plot = axes[1, 2].bar(x + width/2, f1, width, 
                                  label='Improved', color='blue', alpha=0.7)

axes[1, 2].set_title('F1-Score Comparison by Class', fontsize=12, fontweight='bold')
axes[1, 2].set_ylabel('F1-Score')
axes[1, 2].set_xlabel('Class')
axes[1, 2].set_xticks(x)
axes[1, 2].set_xticklabels(class_names)
axes[1, 2].legend()
axes[1, 2].set_ylim(0, 1)

plt.tight_layout()
plt.show()

print("Visual comparison completed!")