In [1]:
import os
import requests
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model, model_selection

import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader

import torchvision
from torchvision import transforms
from torchvision.utils import make_grid
from torchvision.models import resnet18
from copy import deepcopy
import torch.nn.functional as F
from sklearn import linear_model, model_selection
from sklearn.metrics import make_scorer, accuracy_score
from tqdm.notebook import tqdm

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Running on device:", DEVICE.upper())

from typing import Callable

# manual random seed is used for dataset partitioning
# to ensure reproducible results across runs
RNG = torch.Generator().manual_seed(42)

Running on device: CPU


In [2]:
base_dir = "./Alzheimer_data/"
root_dir = "./"
test_dir = base_dir + "test/"
train_dir = base_dir + "train/"
work_dir = root_dir + "dataset/"

In [3]:
CLASSES = [ 'NonDemented',
            'VeryMildDemented',
            'MildDemented',
            'ModerateDemented']

ZOOM = [0.99, 1.01]
BRIGHT_RANGE = [0.8, 1.2]
HORZ_FLIP = True
FILL_MODE = "constant"
DATA_FORMAT = "channels_last"
DIM = (224, 224)  # Replace with your desired target size
BATCH_SIZE = 32  # Adjust as needed

In [4]:
import torch
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import ImageFolder

# Assuming you have predefined values for DIM, HORZ_FLIP, ZOOM, BRIGHT_RANGE, BATCH_SIZE, and train_dir

# PyTorch transforms equivalent to Keras ImageDataGenerator
transform = transforms.Compose([
    transforms.Resize(DIM),
    transforms.RandomHorizontalFlip(p=0.5) if HORZ_FLIP else transforms.RandomHorizontalFlip(p=0),
    transforms.RandomAffine(degrees=0, scale=ZOOM, shear=0),
    transforms.ColorJitter(brightness=BRIGHT_RANGE, contrast=(1, 1), saturation=(1, 1), hue=0),
    transforms.ToTensor(),
])

class CustomImageFolderDataset(Dataset):
    def __init__(self, root, transform=None):
        self.image_folder = ImageFolder(root, transform)
        self.num_classes = len(self.image_folder.classes)

    def __len__(self):
        return len(self.image_folder)

    def __getitem__(self, idx):
        img, label = self.image_folder[idx]

        # Convert label to one-hot vector
        one_hot_label = torch.zeros(self.num_classes)
        one_hot_label[label] = 1.0

        return img, one_hot_label

# Dataset and DataLoader
train_dataset = CustomImageFolderDataset(root=train_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)

In [27]:
len(train_dataset)

5121

In [29]:
train_dataset[0][0].shape

torch.Size([3, 224, 224])

In [6]:
test_dataset = CustomImageFolderDataset(root=test_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)

In [7]:
import torchvision.models as models
model_pytorch = models.resnet18(pretrained=True)

# Replace the last fully connected layer
num_ftrs = model_pytorch.fc.in_features
model_pytorch.fc = nn.Linear(num_ftrs, 4)  # Assuming 4 output classes



In [8]:
model_pytorch

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [9]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for classification problems
optimizer = torch.optim.SGD(model_pytorch.parameters(), lr=0.001, momentum=0.9)

# Train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_pytorch.to(device)
num_epochs = 10

for epoch in range(num_epochs):
    print("Running epoch ", epoch)

    # Variables to track training accuracy
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model_pytorch(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Track training accuracy
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == torch.argmax(labels, dim=1)).sum().item()

    # Calculate training accuracy and print it
    training_accuracy = correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Training Accuracy: {100 * training_accuracy:.2f}%')

Running epoch  0
Epoch [1/10], Loss: 0.6748, Training Accuracy: 59.60%
Running epoch  1
Epoch [2/10], Loss: 0.7597, Training Accuracy: 75.96%
Running epoch  2
Epoch [3/10], Loss: 1.0584, Training Accuracy: 85.74%
Running epoch  3
Epoch [4/10], Loss: 0.2967, Training Accuracy: 93.05%
Running epoch  4
Epoch [5/10], Loss: 1.0428, Training Accuracy: 96.82%
Running epoch  5
Epoch [6/10], Loss: 4.0739, Training Accuracy: 97.48%
Running epoch  6
Epoch [7/10], Loss: 0.8348, Training Accuracy: 97.95%
Running epoch  7
Epoch [8/10], Loss: 0.3699, Training Accuracy: 98.89%
Running epoch  8
Epoch [9/10], Loss: 0.4108, Training Accuracy: 99.12%
Running epoch  9
Epoch [10/10], Loss: 0.4352, Training Accuracy: 98.89%


In [10]:
torch.save(model_pytorch.state_dict(), 'origional_model.pth')

In [12]:
model_pytorch.eval()

correct_test = 0
total_test = 0

with torch.no_grad():  # No need to compute gradients during evaluation
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model_pytorch(inputs)

        # Track test accuracy
        _, predicted_test = torch.max(outputs.data, 1)
        total_test += labels.size(0)
        correct_test += (predicted_test == torch.argmax(labels, dim=1)).sum().item()

# Calculate test accuracy
test_accuracy = correct_test / total_test
print(f'Test Accuracy: {100 * test_accuracy:.2f}%')

Test Accuracy: 62.63%


In [13]:
len(train_loader)

161

In [14]:
rt_model_pytorch = models.resnet18(pretrained=True)

# Replace the last fully connected layer
num_ftrs = rt_model_pytorch.fc.in_features
rt_model_pytorch.fc = nn.Linear(num_ftrs, 4) 

In [15]:
rt_model_pytorch

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [16]:
from torch.utils.data import DataLoader, random_split

# Assuming you have a dataset named 'train_dataset' and BATCH_SIZE defined

# Calculate the sizes for retain and forget loaders
retain_size = int(0.8 * len(train_dataset))  # 80% for retain_loader
forget_size = len(train_dataset) - retain_size  # 20% for forget_loader

# Split the dataset into retain and forget using random_split
retain_dataset, forget_dataset = random_split(train_dataset, [retain_size, forget_size])

# Create DataLoader instances for retain and forget
retain_loader = DataLoader(retain_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
forget_loader = DataLoader(forget_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)

# Get the indices of the samples in forget_loader
forget_indices = list(forget_dataset.indices)

# Print the indices for verification
print("Indices in forget_loader:", forget_indices)

Indices in forget_loader: [2287, 2572, 3384, 494, 3545, 4957, 2293, 827, 149, 2165, 4422, 1257, 633, 4789, 4882, 2900, 1429, 3049, 4100, 2588, 2801, 1684, 1551, 3113, 4824, 1196, 4854, 145, 2068, 1167, 1191, 3690, 962, 708, 4324, 258, 543, 1178, 4891, 421, 2778, 37, 2108, 4190, 1766, 2703, 259, 801, 1924, 4463, 3118, 2642, 3562, 3809, 1559, 570, 386, 4284, 0, 186, 1567, 5085, 397, 2931, 2853, 215, 1630, 2975, 4312, 2887, 2008, 4944, 4079, 2747, 802, 3091, 3974, 974, 4144, 3536, 3665, 70, 368, 5080, 1562, 4269, 2081, 2787, 724, 1401, 2732, 4906, 3950, 2936, 830, 992, 5086, 3793, 3022, 4414, 595, 1201, 884, 1962, 1995, 182, 2023, 3651, 1344, 2080, 3997, 717, 1198, 5036, 2211, 3940, 382, 1856, 1041, 5011, 4083, 562, 2539, 1336, 3906, 563, 3995, 4632, 4365, 2847, 157, 3838, 2623, 3938, 4169, 4268, 3799, 1174, 805, 3892, 220, 625, 970, 3175, 4137, 1086, 117, 385, 2278, 4069, 5066, 3466, 457, 3493, 2209, 671, 2630, 4478, 4255, 2004, 780, 1938, 713, 2345, 4628, 4822, 3356, 2138, 1734, 3413, 1

In [24]:
with open('forget_indices.txt', 'w') as file:
    for index in forget_indices:
        file.write(f"{index}\n")

In [17]:
len(forget_indices)

1025

In [19]:
retain_size

4096

In [21]:
len(train_dataset)

5121

In [25]:
num_epochs = 10

for epoch in range(num_epochs):
    print("Running epoch ", epoch)

    # Variables to track training accuracy
    correct = 0
    total = 0

    for inputs, labels in retain_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = rt_model_pytorch(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Track training accuracy
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == torch.argmax(labels, dim=1)).sum().item()

    # Calculate training accuracy and print it
    training_accuracy = correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Training Accuracy: {100 * training_accuracy:.2f}%')

Running epoch  0
Epoch [1/10], Loss: 1.3656, Training Accuracy: 27.25%
Running epoch  1
Epoch [2/10], Loss: 1.4694, Training Accuracy: 25.90%
Running epoch  2
Epoch [3/10], Loss: 1.4194, Training Accuracy: 25.95%
Running epoch  3
Epoch [4/10], Loss: 1.3694, Training Accuracy: 25.07%
Running epoch  4
Epoch [5/10], Loss: 1.4712, Training Accuracy: 26.12%
Running epoch  5
Epoch [6/10], Loss: 1.4628, Training Accuracy: 26.25%
Running epoch  6
Epoch [7/10], Loss: 1.3692, Training Accuracy: 24.90%
Running epoch  7
Epoch [8/10], Loss: 1.3773, Training Accuracy: 27.32%
Running epoch  8
Epoch [9/10], Loss: 1.3866, Training Accuracy: 26.32%
Running epoch  9
Epoch [10/10], Loss: 1.4128, Training Accuracy: 25.88%


In [26]:
torch.save(rt_model_pytorch.state_dict(), 'retrain_model.pth')