PS D:\MS Purdue\1\ECE570> python --version

Python 3.9.0

PS D:\MS Purdue\1\ECE570> py -3.11 --version

Python 3.11.9

py -3.11 -m pip list

py -3.11 -m pip install <package_name>

In [29]:
import torch
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
import os

# Print the current working directory
current_dir = os.getcwd()
print(f"Current directory: {current_dir}")

Current directory: d:\MS Purdue\1\ECE570\Project\1stAttempt


In [30]:
transform = transforms.Compose([
    # Resize to Tiny ImageNet's image size
    transforms.Resize((64, 64)),  
    transforms.ToTensor(),
    # Normalization
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) # Normalization
])

# Path to dataset
data_dir = '../tiny-imagenet-200Class-500Data/'

# Load training and validation data
train_dataset = datasets.ImageFolder(root=data_dir + 'train', transform=transform)
val_dataset = datasets.ImageFolder(root=data_dir + 'val', transform=transform)


# DataLoader for batching
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Check class-to-index mapping
print(train_dataset.class_to_idx) # Class labels
# Check class-to-index mapping and dataset size
print(train_dataset.class_to_idx)  # Class labels

print(f"Train subset size: {len(train_dataset)}")
print(f"Validation subset size: {len(val_dataset)}")

# Example: Iterate through the train_loader to test
for images, labels in train_loader:
    print(images.shape, labels.shape)
    break

{'n01443537': 0, 'n01629819': 1, 'n01641577': 2, 'n01644900': 3, 'n01698640': 4, 'n01742172': 5, 'n01768244': 6, 'n01770393': 7, 'n01774384': 8, 'n01774750': 9, 'n01784675': 10, 'n01855672': 11, 'n01882714': 12, 'n01910747': 13, 'n01917289': 14, 'n01944390': 15, 'n01945685': 16, 'n01950731': 17, 'n01983481': 18, 'n01984695': 19, 'n02002724': 20, 'n02056570': 21, 'n02058221': 22, 'n02074367': 23, 'n02085620': 24, 'n02094433': 25, 'n02099601': 26, 'n02099712': 27, 'n02106662': 28, 'n02113799': 29, 'n02123045': 30, 'n02123394': 31, 'n02124075': 32, 'n02125311': 33, 'n02129165': 34, 'n02132136': 35, 'n02165456': 36, 'n02190166': 37, 'n02206856': 38, 'n02226429': 39, 'n02231487': 40, 'n02233338': 41, 'n02236044': 42, 'n02268443': 43, 'n02279972': 44, 'n02281406': 45, 'n02321529': 46, 'n02364673': 47, 'n02395406': 48, 'n02403003': 49, 'n02410509': 50, 'n02415577': 51, 'n02423022': 52, 'n02437312': 53, 'n02480495': 54, 'n02481823': 55, 'n02486410': 56, 'n02504458': 57, 'n02509815': 58, 'n0266

In [31]:
# Initialize the model, loss function, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Use GPU if available

# Import the specific functions from the vanillanet.py file
from vanillanet_NN import vanillanet_5, vanillanet_6, vanillanet_9

# Create an instance of the VanillaNet model (VanillaNet-9 in this case)
model = vanillanet_5(pretrained=False, num_classes=200).to(device)  

# Print model architecture to verify
print(model)


VanillaNet(
  (stem1): Sequential(
    (0): Conv2d(3, 512, kernel_size=(4, 4), stride=(4, 4))
    (1): BatchNorm2d(512, eps=1e-06, momentum=0.1, affine=True, track_running_stats=True)
  )
  (stem2): Sequential(
    (0): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1))
    (1): BatchNorm2d(512, eps=1e-06, momentum=0.1, affine=True, track_running_stats=True)
    (2): activation(
      (bn): BatchNorm2d(512, eps=1e-06, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (stages): ModuleList(
    (0): Block(
      (conv1): Sequential(
        (0): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1))
        (1): BatchNorm2d(512, eps=1e-06, momentum=0.1, affine=True, track_running_stats=True)
      )
      (conv2): Sequential(
        (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1))
        (1): BatchNorm2d(1024, eps=1e-06, momentum=0.1, affine=True, track_running_stats=True)
      )
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=Fa

In [32]:
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt  
from timm.optim import AdamP  

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for classification
optimizer = AdamP(model.parameters(), lr=0.001, weight_decay=0.35)  # LAMB or AdamP with weight decay

# Early Stopping Parameters
best_val_loss = float('inf')
patience = 5
counter = 0

# Initialize lists to store metrics
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

num_epochs = 10000  

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)  # Move data to device

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Update running loss
        running_loss += loss.item()

        # Calculate training accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    # Calculate average training loss and accuracy
    avg_train_loss = running_loss / len(train_loader)
    train_accuracy = 100 * correct_train / total_train

    train_losses.append(avg_train_loss)
    train_accuracies.append(train_accuracy)

    # Validation loop
    model.eval()  # Set model to evaluation mode
    correct_val = 0
    total_val = 0
    val_loss = 0.0

    with torch.no_grad():  # No need to calculate gradients during validation
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            # Calculate validation accuracy
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    # Calculate average validation loss and accuracy
    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = 100 * correct_val / total_val

    val_losses.append(avg_val_loss)
    val_accuracies.append(val_accuracy)

    # Print epoch statistics
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%')
    print(f'Validation Accuracy: {val_accuracy:.2f}%, Validation Loss: {avg_val_loss:.4f}')


    if(epoch % 10 == 0):
        # Save checkpoint
        checkpoint = {
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': avg_train_loss,
            'val_loss': avg_val_loss,
            'train_accuracy': train_accuracy,
            'val_accuracy': val_accuracy,
        }
        torch.save(checkpoint, f'checkpoint_epoch_{epoch + 1}.pth')

    # Check for early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        counter = 0
        # Save the best model
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': avg_val_loss,
        }, 'best_checkpoint.pth')
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping triggered")
            break

print('Training complete!')

# Plotting only up to the epoch where training stopped
epochs = range(1, epoch + 2)  # Adjust the range to match the actual number of trained epochs

plt.figure(figsize=(12, 5))

# Plot training & validation loss
plt.subplot(1, 2, 1)
plt.plot(epochs, train_losses, 'bo-', label='Train Loss')
plt.plot(epochs, val_losses, 'ro-', label='Validation Loss')
plt.title('Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Plot training & validation accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, train_accuracies, 'bo-', label='Train Accuracy')
plt.plot(epochs, val_accuracies, 'ro-', label='Validation Accuracy')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.legend()

# Save the plot after training completion
plt.savefig('training_progress.png')
plt.show()


KeyboardInterrupt: 