In [2]:
import numpy as np 
import pandas as pd 
import torch # for models
from torch.nn import Module, Linear, ReLU, LogSoftmax, Conv2d, CrossEntropyLoss
import torchvision
from torchvision import transforms, datasets
import pathlib
from torchvision.datasets import ImageFolder
import torch.nn.functional as F
import torchmetrics
from torch.utils.data import DataLoader, SubsetRandomSampler, Subset
from sklearn.model_selection import train_test_split

  warn(f"Failed to load image Python extension: {e}")


In [3]:
torch.cuda.is_available()

True

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [5]:
class CNN(Module):
    def __init__(self, nClasses):
        super(CNN, self).__init__()
        self.conv1 = Conv2d(in_channels=3, out_channels=16, kernel_size=3)
        self.relu1 = ReLU()
        self.logSoftmax = LogSoftmax(dim=1)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        output = self.logSoftmax(x)
        return output

In [6]:
# preprocessing
preprocess = transforms.Compose([
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

path = pathlib.Path("Rice_Image_Dataset")
dataset = datasets.ImageFolder(path, transform=preprocess)

# dataset loader
BATCH_SIZE = 256

# Number of classes
NUM_CLASSES = 5

# train_dataset, valid_dataset, test_dataset = torch.utils.data.random_split(dataset, (0.7, 0.2, 0.1))
# train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
# valid_dataloader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=True)
# test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
# Get the target values (labels) from the dataset
targets = np.array(dataset.targets)

val_prop = 0.2
test_prop = 0.1

# Split the dataset into train and test sets
train_val_indices, test_indices, train_val_targets, test_targets = train_test_split(np.arange(len(dataset)), targets, test_size=test_prop, stratify=targets)

# Split the train set into train and validation sets
train_indices, val_indices, train_targets, val_targets = train_test_split(train_val_indices, train_val_targets, test_size=val_prop, stratify=train_val_targets)

# Create custom PyTorch datasets for the train, validation, and test sets using the original dataset and the indices of the split data
train_dataset = Subset(dataset, train_indices)
val_dataset = Subset(dataset, val_indices)
test_dataset = Subset(dataset, test_indices)

# Create custom dataloaders for the train, validation, and test sets
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [8]:
def train_model(model, train_dataloader, device, lr=1e-3, momentum=0.9, num_classes=5, epochs=1):
    # Freeze the weights of the model
    for param in model.parameters():
        param.requires_grad = False
    model_in_features = model.fc.in_features
    model.fc = torch.nn.Linear(model_in_features, num_classes).to(device)
    optimizer = torch.optim.SGD([
                        {'params': model.fc.parameters()}
                    ],
                    lr=lr,
                    momentum=momentum
                )
    model.train()
    for i in range(epochs):
        print(f'running epoch {i+1}')
        for inputs, targets in train_dataloader:
            inputs = inputs.to(device)
            targets = targets.to(device)
            # Zero the gradients
            optimizer.zero_grad()
            # Forward pass
            outputs = model(inputs)
            loss = torch.nn.functional.cross_entropy(outputs, targets)
            # Backward pass
            loss.backward()
            optimizer.step()
    return model

def get_model_specs(model):
    total_params = 0 #default value
    total_params = sum(
        param.numel() for param in model.parameters()
    )
    return total_params

def evaluate_model(model, train_dataloader, val_dataloader, test_dataloader, num_classes=5):
    print('collecting param count')
    total_params = get_model_specs(model)
    print('collecting train accuracy')
    train_acc = get_acc(model=model, dataloader=train_dataloader, num_classes=num_classes)
    print('collecting validation accuracy')
    val_acc = get_acc(model=model, dataloader=val_dataloader, num_classes=num_classes)
    print('collecting test accuracy')
    test_acc = get_acc(model=model, dataloader=test_dataloader, num_classes=num_classes)
    metrics_dict = {
        'total_params': total_params,
        'train_acc': train_acc,
        'val_acc': val_acc,
        'test_acc': test_acc
    }
    metrics_idx = list(metrics_dict.keys())
    metrics = pd.Series(data=metrics_dict, index=metrics_idx)
    return metrics

def get_acc(model, dataloader, num_classes):
    model.eval()
    predictions = []
    targets = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            # Move the inputs and labels to the device
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(inputs)
            preds = F.softmax(outputs, dim=1)

            # Store the predictions and targets
            predictions.extend(preds.cpu().detach().numpy())
            targets.extend(labels.cpu().detach().numpy())
    accuracy = torchmetrics.functional.accuracy(torch.tensor(predictions), torch.tensor(targets), num_classes=num_classes, task='multiclass')
    return accuracy

In [9]:
# model loading
resnet_model = torchvision.models.resnet50(pretrained=True).to(device)
# alexnet_model = torchvision.models.alexnet(pretrained=True).to(device)
# vgg_model = torchvision.models.vgg16(pretrained=True).to(device)
resnet_model_trained = train_model(resnet_model, train_dataloader=train_dataloader, device=device, num_classes=NUM_CLASSES, epochs=8)
metrics = evaluate_model(resnet_model_trained, train_dataloader, val_dataloader, test_dataloader, 5)



running epoch 1
running epoch 2
running epoch 3
running epoch 4
running epoch 5
running epoch 6
running epoch 7
running epoch 8
collecting param count
collecting train accuracy


In [1]:
metrics

NameError: name 'metrics' is not defined

In [None]:
|