In [1]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from tqdm import trange
from tensorboardX import SummaryWriter

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision.transforms as transforms

import medmnist
from medmnist import INFO

In [2]:
info = INFO["retinamnist"]
DataClass = getattr(medmnist, info['python_class'])
data_transform = transforms.Compose(
        [transforms.ToTensor(),
        transforms.Normalize(mean=[.5], std=[.5])])
train_dataset = DataClass(split='train', transform=data_transform, download=True, as_rgb=True)
train_loader = data.DataLoader(train_dataset, batch_size=64, shuffle=True)

Using downloaded and verified file: C:\Users\tjall\.medmnist\retinamnist.npz


In [3]:

class MyModel(nn.Module):
    def __init__(self, input_size):
        super(MyModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(512, 5)
        self.softmax = nn.Softmax()

    def forward(self, x):
        x = x.reshape((x.shape[0], x.shape[1]*x.shape[2]*x.shape[3]))
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x
    

def train(model, train_loader, criterion, optimizer, device, writer, num_batches):
    total_loss = []
    global iteration

    model.train()
    g_layer1 = []
    g_layer2 = []
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        if batch_idx > num_batches: # for now, let's only look at two batches
            break
        optimizer.zero_grad()
        outputs = model(inputs.to(device))

        targets = torch.squeeze(targets, 1).long().to(device)
        loss = criterion(outputs, targets)

        total_loss.append(loss.item())
        writer.add_scalar('train_loss_logs', loss.item(), iteration)
        iteration += 1
        loss.backward()
        optimizer.step()
        g_layer1.append(model.fc1.weight.grad)
        g_layer2.append(model.fc2.weight.grad)
        # print(model.fc2.weight.grad[0])
    #     if batch_idx==0:
    #         G_layer1 = model.fc1.weight.grad
    #         G_layer2 = model.fc2.weight.grad
    #     else:
    #         G_layer1 += model.fc1.weight.grad
    #         G_layer2 += model.fc2.weight.grad

    # G_layer1 /= num_batches
    # G_layer2 /= num_batches
    epoch_loss = sum(total_loss)/len(total_loss)
    return epoch_loss, g_layer1, g_layer2


# Define model and optimizer
model = MyModel(2352)
optimizer = optim.Adam(model.parameters())

# Define loss function
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter()

iteration = 0
num_epochs = 1
num_batches = 5
gradients_layer1 = []
gradients_layer2 = []
for epoch in trange(num_epochs):
    epoch_loss, G_layer1, G_layer2 = train(model, train_loader, criterion, optimizer, "cpu", writer, num_batches)
    # print(np.max(G_layer1)) #[1]/G_layer2[0])
    gradients_layer1.append(G_layer1)
    gradients_layer2.append(G_layer2)


  x = self.softmax(x)
100%|██████████| 1/1 [00:00<00:00, 12.94it/s]


In [4]:
def negative_indices(arr):
    result = np.zeros_like(arr)
    indices = np.where(arr < 0)
    result[indices] = indices
    return result

def replace_non_negative(arr):
    result = np.copy(arr)
    first = result.flatten()[0]
    result[result >= 0] = first
    return result

In [6]:
for i in range(num_epochs):
    for j in range(num_batches):
        for c in range(1,5):
            G1 = gradients_layer2[i][j][0,:]
            Gc = gradients_layer2[i][j][c,:]
            r = Gc/G1
            #print(r)
            r_diff = np.diff(r)
            #print(r_diff)
            indices = np.argwhere(r_diff==0)
            #print(indices)

            Y = negative_indices(Gc)
            GY = replace_non_negative(Gc)
            #print(GY)
            #print(Gc)
            
            delta = G1/GY
            #print(delta)
                
            #for idx in indices[5][10]:
                #print('hello')
                #print(gradients_layer2[i][j][:,idx])



#print(gradients_layer1)

#print(indices)
#print(Gc)

# for j in range(num_batches):
#     Gc[num_epochs][j]

# for i in range(5):
#     for j in range(64):
#         idx = indices[0][0]
#         print(r[idx])


  delta = G1/GY
  delta = G1/GY
