In [None]:
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns
import os
from datetime import datetime
import pytz
from pathlib import Path
import sklearn as sk
import tensorflow as tf
import torch
import torch.nn as nn
from torchvision.io import read_image
import time
import timeit
import random
import torchvision.models as models
from torchvision.datasets import CIFAR100
from torch.utils.data import DataLoader, Subset
import torchvision.transforms as transforms
import sklearn.metrics
import csv
import timeit

%matplotlib inline

In [None]:
# mount google drive to use as data repo
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
ls 'drive/MyDrive/DS5220: Final Project/logs/densenet'

[0m[01;34m20240425_214007_logs[0m/  [01;34m20240425_222653_logs[0m/


In [None]:
BASE_LOGS_PATH = 'drive/MyDrive/DS5220: Final Project/logs/densenet'

In [None]:
# https://pytorch.org/docs/stable/notes/mps.html

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

# enable memory history, which will
# add tracebacks and event history to snapshots
#torch.cuda.memory._record_memory_history()

# add to end of training
#torch.cuda.memory._dump_snapshot("my_snapshot.pickle")


Using cuda device


In [None]:
###################################################
CIFAR100_ROOT_PATH='drive/MyDrive/DS5220: Final Project'  # Modify this line with the path to the folder where folder "cifar-10-batches-py" locate
###################################################

class Cifar100():
    def __init__(self,
                 calculate_mean_and_std = False):



        if calculate_mean_and_std:
            self.mean, self.std = self.calculate_mean_and_std()
        else:
            self.mean, self.std = (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)

        self.BATCH_SIZE = 128
        self.transform = transforms.Compose(
            [transforms.ToTensor(),
            transforms.Normalize(self.mean, self.std)])

        self.train_dataset = CIFAR100(root=CIFAR100_ROOT_PATH,
                                download=True,
                                train=True,
                                transform=self.transform)

        self.eval_dataset = CIFAR100(root=CIFAR100_ROOT_PATH,
                                train=False,
                                transform=self.transform)

        self.train_data_loader = DataLoader(dataset=self.train_dataset,
                               num_workers=0,
                               batch_size=self.BATCH_SIZE,
                               shuffle=True)

        self.eval_data_loader = DataLoader(dataset=self.eval_dataset,
                              num_workers=0,
                              batch_size=self.BATCH_SIZE,
                              shuffle=False)

    def calculate_mean_and_std(self):

        train_dataset = CIFAR100(root=CIFAR100_ROOT_PATH,
                                download=True,
                                train=True)
        # stick all the images together to form a 1600000 X 32 X 3 array
        x = np.concatenate([np.asarray(train_dataset[i][0]) for i in range(len(train_dataset))])
        # calculate the mean and std along the (0, 1) axes
        _mean = np.mean(x, axis=(0, 1))/255
        _std = np.std(x, axis=(0, 1))/255
        _mean = _mean.tolist()
        _std = _std.tolist()

        return _mean, _std




class Cifar100WithAugmentation():
    def __init__(self, calculate_mean_and_std=False):
        CIFAR100_ROOT_PATH = 'drive/MyDrive/DS5220: Final Project'  # Modify this path accordingly

        if calculate_mean_and_std:
            self.mean, self.std = self.calculate_mean_and_std()
        else:
            self.mean, self.std = (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)

        self.BATCH_SIZE = 128
        # data augmentation and normalization for training
        self.train_transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),  # pad 4 pixels on each side and randomly crop a 32x32 image
            transforms.RandomHorizontalFlip(),     # randomly flip the image horizontally
            transforms.ToTensor(),
            transforms.Normalize(self.mean, self.std)
        ])

        # rormalization for evaluation (without augmentation)
        self.eval_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(self.mean, self.std)
        ])

        self.train_dataset = CIFAR100(root=CIFAR100_ROOT_PATH,
                                               download=True,
                                               train=True,
                                               transform=self.train_transform)

        self.eval_dataset = CIFAR100(root=CIFAR100_ROOT_PATH,
                                              train=False,
                                              transform=self.eval_transform)

        self.train_data_loader = DataLoader(dataset=self.train_dataset,
                                            num_workers=4,
                                            batch_size=self.BATCH_SIZE,
                                            shuffle=True)

        self.eval_data_loader = DataLoader(dataset=self.eval_dataset,
                                           num_workers=4,
                                           batch_size=self.BATCH_SIZE,
                                           shuffle=False)

    def calculate_mean_and_std(self):

        train_dataset = CIFAR100(root='drive/MyDrive/DS5220: Final Project',
                                          download=True,
                                          train=True)
        # concat all the images to compute the mean and std
        x = np.concatenate([np.asarray(train_dataset[i][0]) for i in range(len(train_dataset))])
        # compute mean and std along the (0, 1) axes
        mean = np.mean(x, axis=(0, 1)) / 255
        std = np.std(x, axis=(0, 1)) / 255
        return mean.tolist(), std.tolist()


cf100 = Cifar100(calculate_mean_and_std=True)
cf100_w_aug = Cifar100WithAugmentation(calculate_mean_and_std=True)


Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified




In [None]:
config = {
    'dataloader' : cf100_w_aug,
    'model': models.densenet161(),
    'epochs': 40,
    'optimizer': 'SGD',
    'learning_rate': 0.01,
    'momentum': 0.9,
    'weight_decay': 5e-4,
    'loss': nn.CrossEntropyLoss(),
    'log_dir': os.path.join(BASE_LOGS_PATH, time.strftime('%Y%m%d_%H%M%S') + '_logs'),
    'device' : device,
    'run_time' : None
}

model = config['model']
model.to(device)
EPOCHS = config['epochs']
criterion = config['loss']
optimizer = torch.optim.SGD(model.parameters(),
                            lr=config['learning_rate'],
                            momentum=config['momentum'],
                            weight_decay=config['weight_decay'])

# create log directory
if not os.path.exists(config['log_dir']):
    os.makedirs(config['log_dir'])
    print(f"Directory '{config['log_dir']}' created")
else:
    print(f"Directory '{config['log_dir']}' already exists")



metrics_path = os.path.join(config['log_dir'], 'training_metrics.csv')
with open(metrics_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    # write the header row
    writer.writerow(['Epoch', 'Train Loss', 'Train Accuracy', 'Eval Loss', 'Eval Accuracy', 'Top-1 Accuracy', 'Top-5 Accuracy', 'Epoch Time (seconds)'])
    training_start_time = timeit.default_timer()
    for epoch in range(config['epochs']):
        start_time = timeit.default_timer()
        train_losses = []
        train_accuracies = []

        model.train()
        for images, labels in config['dataloader'].train_data_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())
            _, predicted = torch.max(outputs, 1)
            accuracy = (predicted == labels).float().mean().item()
            train_accuracies.append(accuracy)

        eval_losses = []
        eval_accuracies = []
        top1_accuracies = []
        top5_accuracies = []
        model.eval()
        with torch.no_grad():
            for images, labels in config['dataloader'].eval_data_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                eval_losses.append(loss.item())

                # calculate top-1 accuracy
                _, predicted = torch.max(outputs, 1)
                top1_accuracy = (predicted == labels).float().mean().item()
                top1_accuracies.append(top1_accuracy)

                # calculate top-5 accuracy
                _, top5_preds = outputs.topk(5, dim=1)
                top5_correct = top5_preds.eq(labels.view(-1, 1).expand_as(top5_preds))
                top5_accuracy = top5_correct.float().sum(1).ge(1).float().mean().item()
                top5_accuracies.append(top5_accuracy)

                eval_accuracies.append(top1_accuracy)
        # write model training info to csv
        end_time = timeit.default_timer()  # end timing the epoch
        epoch_duration = end_time - start_time
        writer.writerow([
            epoch + 1,
            np.mean(train_losses),
            np.mean(train_accuracies) * 100,
            np.mean(eval_losses),
            np.mean(eval_accuracies) * 100,
            np.mean(top1_accuracies) * 100,
            np.mean(top5_accuracies) * 100,
            epoch_duration
        ])
        print(f"Epoch {epoch+1}/{config['epochs']}")
        print(f"Train Loss: {np.mean(train_losses):.4f}, Train Accuracy: {np.mean(train_accuracies) * 100:.2f}%")
        print(f"Eval Loss: {np.mean(eval_losses):.4f}, Eval Accuracy: {np.mean(eval_accuracies) * 100:.2f}%, Top-1 Accuracy: {np.mean(top1_accuracies) * 100:.2f}%, Top-5 Accuracy: {np.mean(top5_accuracies) * 100:.2f}%")
    training_end_time = timeit.default_timer()  # end timing the epoch
    training_duration = training_end_time - training_start_time
    config['run_time'] = training_duration

config_path = os.path.join(config['log_dir'], 'config.txt')
# output config file to directory
with open(config_path, 'w') as f:
    for key, value in config.items():
        f.write(f'{key}: {value}\n')



torch.save(model.state_dict(), os.path.join(config['log_dir'], 'model_state_dict.pth'))
torch.save(model, os.path.join(config['log_dir'], 'complete_model.pth'))


Directory 'drive/MyDrive/DS5220: Final Project/logs/densenet/20240425_214007_logs' created


  self.pid = os.fork()
  self.pid = os.fork()


Epoch 1/40
Train Loss: 3.9412, Train Accuracy: 11.38%
Eval Loss: 3.4527, Eval Accuracy: 18.37%, Top-1 Accuracy: 18.37%, Top-5 Accuracy: 44.80%
Epoch 2/40
Train Loss: 3.2386, Train Accuracy: 21.20%
Eval Loss: 3.0688, Eval Accuracy: 24.39%, Top-1 Accuracy: 24.39%, Top-5 Accuracy: 54.61%
Epoch 3/40
Train Loss: 2.8771, Train Accuracy: 27.61%
Eval Loss: 2.7533, Eval Accuracy: 30.56%, Top-1 Accuracy: 30.56%, Top-5 Accuracy: 61.42%
Epoch 4/40
Train Loss: 2.6211, Train Accuracy: 32.52%
Eval Loss: 2.5471, Eval Accuracy: 35.75%, Top-1 Accuracy: 35.75%, Top-5 Accuracy: 66.61%
Epoch 5/40
Train Loss: 2.4038, Train Accuracy: 36.75%
Eval Loss: 2.4662, Eval Accuracy: 36.13%, Top-1 Accuracy: 36.13%, Top-5 Accuracy: 69.18%
Epoch 6/40
Train Loss: 2.2349, Train Accuracy: 40.46%
Eval Loss: 2.3619, Eval Accuracy: 39.30%, Top-1 Accuracy: 39.30%, Top-5 Accuracy: 70.55%
Epoch 7/40
Train Loss: 2.0849, Train Accuracy: 44.16%
Eval Loss: 2.2720, Eval Accuracy: 41.21%, Top-1 Accuracy: 41.21%, Top-5 Accuracy: 72.50%

In [None]:
class CustomDenseNet161(nn.Module):
    def __init__(self, dropout_rate=0.5, num_classes=100):
        super(CustomDenseNet161, self).__init__()
        # Initialize DenseNet161 without pre-trained weights
        densenet = models.densenet161(pretrained=False)

        # Replace classifier with a new classifier that includes dropout
        self.features = densenet.features
        self.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(densenet.classifier.in_features, num_classes)
        )

    def forward(self, x):
        features = self.features(x)
        out = nn.functional.relu(features, inplace=True)
        out = nn.functional.adaptive_avg_pool2d(out, (1, 1))
        out = torch.flatten(out, 1)
        out = self.classifier(out)
        return out

config = {
    'dataloader' : cf100_w_aug,
    'model': CustomDenseNet161(dropout_rate=0.5, num_classes=100),
    'epochs': 40,
    'optimizer': 'SGD',
    'learning_rate': 0.01,
    'momentum': 0.9,
    'weight_decay': 5e-4,
    'loss': nn.CrossEntropyLoss(),
    'log_dir': os.path.join(BASE_LOGS_PATH, time.strftime('%Y%m%d_%H%M%S') + '_logs'),
    'device' : device,
    'run_time' : None
}

model = config['model']
model.to(config['device'])
EPOCHS = config['epochs']
criterion = config['loss']
optimizer = torch.optim.SGD(model.parameters(),
                            lr=config['learning_rate'],
                            momentum=config['momentum'],
                            weight_decay=config['weight_decay'])

# create log directory
if not os.path.exists(config['log_dir']):
    os.makedirs(config['log_dir'])
    print(f"Directory '{config['log_dir']}' created")
else:
    print(f"Directory '{config['log_dir']}' already exists")



metrics_path = os.path.join(config['log_dir'], 'training_metrics.csv')
with open(metrics_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    # write the header row
    writer.writerow(['Epoch', 'Train Loss', 'Train Accuracy', 'Eval Loss', 'Eval Accuracy', 'Top-1 Accuracy', 'Top-5 Accuracy', 'Epoch Time (seconds)'])
    training_start_time = timeit.default_timer()
    for epoch in range(config['epochs']):
        start_time = timeit.default_timer()
        train_losses = []
        train_accuracies = []

        model.train()
        for images, labels in config['dataloader'].train_data_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())
            _, predicted = torch.max(outputs, 1)
            accuracy = (predicted == labels).float().mean().item()
            train_accuracies.append(accuracy)

        eval_losses = []
        eval_accuracies = []
        top1_accuracies = []
        top5_accuracies = []
        model.eval()
        with torch.no_grad():
            for images, labels in config['dataloader'].eval_data_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                eval_losses.append(loss.item())

                # calculate top-1 accuracy
                _, predicted = torch.max(outputs, 1)
                top1_accuracy = (predicted == labels).float().mean().item()
                top1_accuracies.append(top1_accuracy)

                # calculate top-5 accuracy
                _, top5_preds = outputs.topk(5, dim=1)
                top5_correct = top5_preds.eq(labels.view(-1, 1).expand_as(top5_preds))
                top5_accuracy = top5_correct.float().sum(1).ge(1).float().mean().item()
                top5_accuracies.append(top5_accuracy)

                eval_accuracies.append(top1_accuracy)
        # write model training info to csv
        end_time = timeit.default_timer()  # end timing the epoch
        epoch_duration = end_time - start_time
        writer.writerow([
            epoch + 1,
            np.mean(train_losses),
            np.mean(train_accuracies) * 100,
            np.mean(eval_losses),
            np.mean(eval_accuracies) * 100,
            np.mean(top1_accuracies) * 100,
            np.mean(top5_accuracies) * 100,
            epoch_duration
        ])
        print(f"Epoch {epoch+1}/{config['epochs']}")
        print(f"Train Loss: {np.mean(train_losses):.4f}, Train Accuracy: {np.mean(train_accuracies) * 100:.2f}%")
        print(f"Eval Loss: {np.mean(eval_losses):.4f}, Eval Accuracy: {np.mean(eval_accuracies) * 100:.2f}%, Top-1 Accuracy: {np.mean(top1_accuracies) * 100:.2f}%, Top-5 Accuracy: {np.mean(top5_accuracies) * 100:.2f}%")
    training_end_time = timeit.default_timer()  # end timing the epoch
    training_duration = training_end_time - training_start_time
    config['run_time'] = training_duration

config_path = os.path.join(config['log_dir'], 'config.txt')
# output config file to directory
with open(config_path, 'w') as f:
    for key, value in config.items():
        f.write(f'{key}: {value}\n')



torch.save(model.state_dict(), os.path.join(config['log_dir'], 'model_state_dict.pth'))
torch.save(model, os.path.join(config['log_dir'], 'complete_model.pth'))


Directory 'drive/MyDrive/DS5220: Final Project/logs/densenet/20240426_194838_logs' created
Epoch 1/40
Train Loss: 4.1093, Train Accuracy: 8.73%
Eval Loss: 3.6482, Eval Accuracy: 14.87%, Top-1 Accuracy: 14.87%, Top-5 Accuracy: 39.81%
Epoch 2/40
Train Loss: 3.5408, Train Accuracy: 16.47%
Eval Loss: 3.2464, Eval Accuracy: 22.24%, Top-1 Accuracy: 22.24%, Top-5 Accuracy: 50.48%
Epoch 3/40
Train Loss: 3.1920, Train Accuracy: 22.40%
Eval Loss: 3.0125, Eval Accuracy: 26.62%, Top-1 Accuracy: 26.62%, Top-5 Accuracy: 57.02%
Epoch 4/40
Train Loss: 2.9266, Train Accuracy: 27.04%
Eval Loss: 2.7743, Eval Accuracy: 30.60%, Top-1 Accuracy: 30.60%, Top-5 Accuracy: 61.76%
Epoch 5/40
Train Loss: 2.7172, Train Accuracy: 30.82%
Eval Loss: 2.5427, Eval Accuracy: 34.56%, Top-1 Accuracy: 34.56%, Top-5 Accuracy: 66.71%
Epoch 6/40
Train Loss: 2.5413, Train Accuracy: 34.32%
Eval Loss: 2.4323, Eval Accuracy: 37.45%, Top-1 Accuracy: 37.45%, Top-5 Accuracy: 69.21%
Epoch 7/40
Train Loss: 2.4012, Train Accuracy: 37.30

In [None]:
config = {
    'dataloader' : cf100_w_aug,
    'model': models.densenet121(),
    'epochs': 40,
    'optimizer': 'SGD',
    'learning_rate': 0.01,
    'momentum': 0.9,
    'weight_decay': 5e-4,
    'loss': nn.CrossEntropyLoss(),
    'log_dir': os.path.join(BASE_LOGS_PATH, time.strftime('%Y%m%d_%H%M%S') + '_logs'),
    'device' : device,
    'run_time' : None
}

model = config['model']
model.to(device)
EPOCHS = config['epochs']
criterion = config['loss']
optimizer = torch.optim.SGD(model.parameters(),
                            lr=config['learning_rate'],
                            momentum=config['momentum'],
                            weight_decay=config['weight_decay'])

# create log directory
if not os.path.exists(config['log_dir']):
    os.makedirs(config['log_dir'])
    print(f"Directory '{config['log_dir']}' created")
else:
    print(f"Directory '{config['log_dir']}' already exists")



metrics_path = os.path.join(config['log_dir'], 'training_metrics.csv')
with open(metrics_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    # write the header row
    writer.writerow(['Epoch', 'Train Loss', 'Train Accuracy', 'Eval Loss', 'Eval Accuracy', 'Top-1 Accuracy', 'Top-5 Accuracy', 'Epoch Time (seconds)'])
    training_start_time = timeit.default_timer()
    for epoch in range(config['epochs']):
        start_time = timeit.default_timer()
        train_losses = []
        train_accuracies = []

        model.train()
        for images, labels in config['dataloader'].train_data_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())
            _, predicted = torch.max(outputs, 1)
            accuracy = (predicted == labels).float().mean().item()
            train_accuracies.append(accuracy)

        eval_losses = []
        eval_accuracies = []
        top1_accuracies = []
        top5_accuracies = []
        model.eval()
        with torch.no_grad():
            for images, labels in config['dataloader'].eval_data_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                eval_losses.append(loss.item())

                # calculate top-1 accuracy
                _, predicted = torch.max(outputs, 1)
                top1_accuracy = (predicted == labels).float().mean().item()
                top1_accuracies.append(top1_accuracy)

                # calculate top-5 accuracy
                _, top5_preds = outputs.topk(5, dim=1)
                top5_correct = top5_preds.eq(labels.view(-1, 1).expand_as(top5_preds))
                top5_accuracy = top5_correct.float().sum(1).ge(1).float().mean().item()
                top5_accuracies.append(top5_accuracy)

                eval_accuracies.append(top1_accuracy)
        # write model training info to csv
        end_time = timeit.default_timer()  # end timing the epoch
        epoch_duration = end_time - start_time
        writer.writerow([
            epoch + 1,
            np.mean(train_losses),
            np.mean(train_accuracies) * 100,
            np.mean(eval_losses),
            np.mean(eval_accuracies) * 100,
            np.mean(top1_accuracies) * 100,
            np.mean(top5_accuracies) * 100,
            epoch_duration
        ])
        print(f"Epoch {epoch+1}/{config['epochs']}")
        print(f"Train Loss: {np.mean(train_losses):.4f}, Train Accuracy: {np.mean(train_accuracies) * 100:.2f}%")
        print(f"Eval Loss: {np.mean(eval_losses):.4f}, Eval Accuracy: {np.mean(eval_accuracies) * 100:.2f}%, Top-1 Accuracy: {np.mean(top1_accuracies) * 100:.2f}%, Top-5 Accuracy: {np.mean(top5_accuracies) * 100:.2f}%")
    training_end_time = timeit.default_timer()  # end timing the epoch
    training_duration = training_end_time - training_start_time
    config['run_time'] = training_duration

config_path = os.path.join(config['log_dir'], 'config.txt')
# output config file to directory
with open(config_path, 'w') as f:
    for key, value in config.items():
        f.write(f'{key}: {value}\n')



torch.save(model.state_dict(), os.path.join(config['log_dir'], 'model_state_dict.pth'))
torch.save(model, os.path.join(config['log_dir'], 'complete_model.pth'))


Directory 'drive/MyDrive/DS5220: Final Project/logs/densenet/20240425_222653_logs' created
Epoch 1/40
Train Loss: 4.0236, Train Accuracy: 10.65%
Eval Loss: 3.4594, Eval Accuracy: 17.34%, Top-1 Accuracy: 17.34%, Top-5 Accuracy: 44.47%
Epoch 2/40
Train Loss: 3.2625, Train Accuracy: 20.41%
Eval Loss: 3.0544, Eval Accuracy: 25.04%, Top-1 Accuracy: 25.04%, Top-5 Accuracy: 53.51%
Epoch 3/40
Train Loss: 2.9417, Train Accuracy: 26.40%
Eval Loss: 2.8547, Eval Accuracy: 28.59%, Top-1 Accuracy: 28.59%, Top-5 Accuracy: 59.69%
Epoch 4/40
Train Loss: 2.7183, Train Accuracy: 30.60%
Eval Loss: 2.6921, Eval Accuracy: 31.94%, Top-1 Accuracy: 31.94%, Top-5 Accuracy: 62.95%
Epoch 5/40
Train Loss: 2.5374, Train Accuracy: 34.32%
Eval Loss: 2.5198, Eval Accuracy: 34.83%, Top-1 Accuracy: 34.83%, Top-5 Accuracy: 66.84%
Epoch 6/40
Train Loss: 2.3929, Train Accuracy: 37.21%
Eval Loss: 2.4004, Eval Accuracy: 37.74%, Top-1 Accuracy: 37.74%, Top-5 Accuracy: 69.00%
Epoch 7/40
Train Loss: 2.2565, Train Accuracy: 39.9