In [None]:
import torch
import torch.nn as nn

In [None]:
from torchvision import datasets

In [None]:
cifar10 = datasets.CIFAR10(root='./misc_data', train=True, download=True)
cifar10_val = datasets.CIFAR10(root='./misc_data', train=False, download=True)


In [None]:
type(cifar10).mro()

In [None]:
len(cifar10)

In [None]:
cifar10.class_to_idx

In [None]:
cifar10[2]

In [None]:
cifar10[2][0]

In [None]:
import matplotlib.pyplot as plt
plt.imshow(cifar10[2][0])

In [None]:
from torchvision import transforms

In [None]:
img, label = cifar10[2]
to_tensor = transforms.ToTensor()
img_t = to_tensor(img)


In [None]:
img_t.shape, img_t.dtype

In [None]:
img_t.min(), img_t.max()

In [None]:
plt.imshow(img_t.permute(1, 2, 0))

In [None]:
X_train = torch.stack([to_tensor(img) for img, _ in cifar10], dim=3)


In [None]:
X_train.shape

In [None]:
X_train_std = X_train.view(3,-1).std(dim=1)
X_train_mean = X_train.view(3,-1).mean(dim=1)

In [None]:
X_train_normalized = (X_train.view(3,-1) - X_train_mean.unsqueeze(1))/X_train_std.unsqueeze(1)

In [None]:
X_train_normalized.shape

In [None]:
X_train_normalized.view(3,32,32,-1).permute(3,0,1,2).shape

In [None]:
normalize_transform = transforms.Normalize(mean = X_train_mean, std = X_train_std)
normalize_transform

In [None]:
transform = transforms.Compose([
    to_tensor,
    normalize_transform
])
transform

In [None]:
plt.imshow(transform(cifar10[99][0]).permute(1, 2, 0))

In [None]:
cifar10 = datasets.CIFAR10(root='./misc_data', train=True, download=True, transform=transform)
cifar10_val = datasets.CIFAR10(root='./misc_data', train=False, download=True, transform=transform)

In [None]:
label_map = {0: 0, 2: 1}
label_classes = ['airplane', 'bird']
cifar2 = [(img, label_map[label]) for img, label in cifar10 if label in [0,2]]
cifar2_val = [(img, label_map[label]) for img, label in cifar10_val if label in [0,2]]

In [None]:
len(cifar2), len(cifar2_val), 32*32*3

In [None]:
model = nn.Sequential(
    nn.Linear(3072, 512),
    nn.Tanh(),
    nn.Linear(512, 2),
    # nn.Softmax(1)
    nn.LogSoftmax(dim=1)
)

In [None]:
# nn.Softmax(1)(torch.Tensor([1,2,3.]).unsqueeze(0))

In [None]:
cifar2[0][0].view(-1,).unsqueeze(0).shape

In [None]:
probs = model(cifar2[0][0].view(1,-1))
probs

In [None]:
torch.max(probs, 1)

In [None]:
nll_loss = nn.NLLLoss()

In [None]:
nll_loss(probs, torch.tensor([cifar2[0][1]]))

In [None]:
model

In [None]:
import torch.optim as optim

In [None]:
img.view(1,-1).shape

In [None]:
X_train = torch.zeros(len(cifar2), 3072, dtype=torch.float32)
y_train = torch.zeros(len(cifar2), dtype=torch.int64)

for i, (img, label) in enumerate(cifar2):
    img_t = img.view(1,-1)
    X_train[i] = img_t
    y_train[i] = label
    
    

In [None]:
X_train.shape, y_train.shape

In [None]:
import torch

if torch.cuda.is_available():
    device = torch.device("cuda")  # Select GPU
    print("GPU is available") 
else:
    device = torch.device("cpu")  # Fallback to CPU
    print("GPU not available, using CPU") 


In [None]:
n_epochs = 100
bs = 1500
#change to gpu @@ about 8.3 times faster
model = model.to(device)
X_train = X_train.to(device)
y_train = y_train.to(device)
optimizer = optim.SGD(model.parameters(), lr=1e-1, momentum=0.9)
nll_loss = nn.NLLLoss()


In [None]:
# manual batch descent
model = nn.Sequential(
    nn.Linear(3072, 512),
    nn.Tanh(),
    nn.Linear(512, 2),
    # nn.Softmax(1)
    nn.LogSoftmax(dim=1)
).to(device)
for epoch in range(1, n_epochs+1):
    shuffled_idx = torch.randperm(len(X_train))
    X_train = X_train[shuffled_idx]
    y_train = y_train[shuffled_idx]
    for batch in range(0, len(X_train), bs):
        X_batch = X_train[i:i+bs]
        y_batch = y_train[i:i+bs]
        y_pred = model(X_batch)
        loss = nll_loss(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f'Epoch {epoch} -- batch {batch}, Loss {loss}')

In [None]:
cifar2_val_gpu = [(img.to(device), 
                   torch.tensor(label, device=device)) 
                  for img, label in cifar2_val
                  ]

In [None]:
import torch.utils.data as data
train_loader = data.DataLoader(
    list(zip(X_train, y_train)), 
    batch_size=1000, shuffle=True
    )
val_loader = data.DataLoader(cifar2_val_gpu, batch_size=100, shuffle=False)


In [None]:
# x,y = next(iter(train_loader))
# x.shape,y.shape

In [None]:

from torch import relu


model = nn.Sequential(
    nn.Linear(3072, 512),
    nn.Tanh(),
    nn.Linear(512, 690),
    nn.ReLU(),
    nn.Linear(690, 690),
    nn.ReLU(),
    nn.Linear(690, 2),
    # nn.Softmax(1)
    # nn.LogSoftmax(dim=1)
).to(device)

n_epochs = 100
X_train = X_train.to(device)
y_train = y_train.to(device)
optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=.9)
nll_loss = nn.CrossEntropyLoss()


for epoch in range(1, n_epochs+1):
    correct = 0
    total = 0
    for (X_batch, y_batch), (X_val, y_val) in zip(train_loader, val_loader):
        y_pred = model(X_batch)
        loss = nll_loss(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            batch_size = X_val.shape[0]
            y_pred = model(X_val.view(batch_size, -1))
            val_loss = nll_loss(y_pred, y_val)
            _, pred_class = torch.max(y_pred, dim=1)
            total += y_val.shape[0]
            correct += (pred_class == y_val).sum().item()
    print(f'Epoch {epoch}, Loss {loss}, val_loss {val_loss}, Accuracy {correct/total}')
        # print(X_batch.device, y_batch.device)

In [None]:
num = [t.numel() for t in model.parameters() if t.requires_grad]
num, sum(num)

In [None]:
model

In [None]:
list(model.modules())[0][2].bias.shape