# Classification with Segment Network
* We flatten a BW image row/col wise with values as positions and see if we can classify with Segment Net


# Initialization

In [2]:
import math
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

import time
from tqdm import tqdm

from segment import Segment

In [3]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using mps device


In [4]:
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed_all(RANDOM_SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# MNIST dataset

In [5]:
from torchvision import datasets, transforms

flatten_by_row_transform = transforms.Lambda(lambda x: x.flatten())
flatten_by_col_transform = transforms.Lambda(lambda x: x.permute(0,2,1).flatten())
bw_array_transform = transforms.Lambda(lambda x: torch.where(x < x.mean(), 0., 1.))
position_transform = transforms.Lambda(lambda x: x*torch.arange(1, x.shape[0]+1.)/x.shape[0] )

train_transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.RandomRotation(15),
    bw_array_transform,
    flatten_by_row_transform
    #position_transform
])

test_transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    bw_array_transform,
    flatten_by_row_transform
    #position_transform
])

train_set = datasets.MNIST('data', train=True, download=False, transform=train_transform)
test_set = datasets.MNIST('data', train=False, download=False, transform=test_transform)
print(len(train_set), len(test_set))
print(train_set[0][0].shape, test_set[0][0].shape)



60000 10000
torch.Size([784]) torch.Size([784])


# Model Definition
* Start with single layer Segment with output = number of labels
* Then increase number of layers to see if loss reduces 
* Then try transposing input x and batch size and see if it trains better

# Model Training

In [6]:
class SimpleModel(nn.Module):
    def __init__(self, input_size, output_size, segment_size):
        super().__init__()
        self.seg1 = Segment(input_size, output_size, segment_size)
        self.init = False

    def forward(self, x):
        if self.training and not self.init:
            self.seg1.custom_init(torch.zeros_like(x.min(dim=0).values), torch.ones_like(x.max(dim=0).values))
            self.init = True
        x = self.seg1(x)
        return x

In [32]:
#Permute input to seg1.
#Seg2 input should be the parameters from seg1 (not output)
class MyModel(nn.Module):
    def __init__(self, input_size, batch_size, segment1_size, segment2_size, segment3_size, output_dim):
        super().__init__()
        self.init = False
        self.seg1 = Segment(1, batch_size, segment1_size)
        seg2_output_size = segment1_size+1
        self.x_in = None
        #split seg2 into x,y parts to reduce unnecessary parameters
        self.seg2x = Segment(input_size, seg2_output_size, segment2_size)
        self.seg2y = Segment(input_size, seg2_output_size, segment2_size)
        self.seg3 = Segment(2*seg2_output_size, output_dim, segment3_size)
        
    def custom_init(self, x):
        x_min = torch.zeros(self.seg1.in_features, dtype=x.dtype)
        x_max = torch.ones(self.seg1.in_features, dtype=x.dtype)
        self.seg1.custom_init(x_min, x_max)
        x_min = torch.zeros(self.seg2x.in_features, dtype=x.dtype)
        x_max = torch.ones(self.seg2x.in_features, dtype=x.dtype)
        self.seg2x.custom_init(x_min, x_max)
        self.seg2y.custom_init(x_min, x_max)
        x_min = torch.zeros(self.seg3.in_features, dtype=x.dtype)
        x_max = torch.ones(self.seg3.in_features, dtype=x.dtype)
        self.seg3.custom_init(x_min, x_max)
        
        self.init = True

    def forward(self, x):
        if self.training:
            if not self.init:
                self.custom_init(x)
            
            #Train a model1 that predicts x,y for the input x_in so output is x
            self.x_in = torch.arange(1, x.shape[1]+1.)/x.shape[1]
            self.x_in.unsqueeze_(-1)
            y1 = self.seg1(self.x_in)
            loss1 = F.mse_loss(y1, x.permute(1,0))

        
        self.xembeddings = self.seg2x(x)
        self.yembeddings = self.seg2y(x)
        
        if self.training:
            seg1_xparams = self.seg1.x.view(self.seg1.x.shape[2], self.seg1.x.shape[1])
            seg1_yparams = self.seg1.y.view(self.seg1.y.shape[2], self.seg1.y.shape[1])
            loss2x = F.mse_loss(self.xembeddings, seg1_xparams)
            loss2y = F.mse_loss(self.yembeddings, seg1_yparams)
        
        self.embeddings = torch.cat((self.xembeddings, self.yembeddings), dim=1)
        ypred = self.seg3(self.embeddings)

        if self.training:
            return [loss1, loss2x, loss2y], ypred
        else:
            return ypred
    

In [72]:
# Define Model : 1 input, 1 output, play with segments starting from 1/2 of image pixels.
BATCH_SIZE=64

#model = SimpleModel(784,10,10)
model = MyModel(784, BATCH_SIZE, 14, 3, 10, 10)
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params}")

criterion = nn.CrossEntropyLoss()

lr=1e-4
#optimizer only optimizes parameter that are sent to it in arg1
optimizer = torch.optim.AdamW(model.parameters(),
                               lr=lr, betas=(0.9, 0.999), eps=1e-8)

Total parameters: 196680


In [65]:
model

MyModel(
  (seg1): Segment(in_features=1, out_features=64, segment_features=28)
  (seg2x): Segment(in_features=784, out_features=29, segment_features=3)
  (seg2y): Segment(in_features=784, out_features=29, segment_features=3)
  (seg3): Segment(in_features=58, out_features=10, segment_features=10)
)

In [33]:
# TESTING MODEL
train_dataloader = DataLoader(train_set, batch_size=64, shuffle=True)
for data in train_dataloader:
    break
img, label = data
model = MyModel(784, 64, 28, 3, 10, 10)
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params}")
losses, ypred = model(img)
losses
#loss = criterion(y_pred, label) + loss1 + loss2

Total parameters: 380248


[tensor(0.1646, grad_fn=<MseLossBackward0>),
 tensor(0.3393, grad_fn=<MseLossBackward0>),
 tensor(0., grad_fn=<MseLossBackward0>)]

In [69]:
num_epochs = 40

train_dataloader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)

start = time.time()
training_losses = np.empty((0, 4))
val_losses = np.empty((0))
val_accuracies = np.empty((0))
for epoch in range(1,num_epochs+1):
    model.train()
    t0 = time.time()
    step = 0
    epoc_losses = np.zeros((4))
    for data in tqdm(train_dataloader, position=0, leave=True):
        img, label = data

        #don't train for wrong batch size for now
        if img.shape[0] != BATCH_SIZE:
            continue

        # ===================forward=====================
        losses, y_pred = model(img)
        y_pred_label = torch.argmax(y_pred, dim=1)
        class_loss = criterion(y_pred, label)
        losses.append(class_loss)
        if(math.isnan(class_loss)):
            print(f"nan class_loss at step {step}")
            break
        # ===================backward====================
        optimizer.zero_grad()
        loss = losses[0] + losses[1] + losses[2] + losses[3]
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            for i in range(len(losses)):
                epoc_losses[i] += losses[i].item()

        step += 1
    # ===================log========================
    t1 = time.time()
    #train_loss = train_running_loss / len(train_dataloader)
    training_losses = np.append(training_losses, [epoc_losses/len(train_dataloader)], axis=0)
    
    if(math.isnan(class_loss)):
        print(f"nan class_loss ")
        break

    model.eval()
    val_labels = []
    val_preds = []
    val_running_loss = 0
    with torch.no_grad():
        for data in tqdm(test_dataloader, position=0, leave=True):
            img, label = data
            #don't test for wrong batch size for now
            #if img.shape[0] != BATCH_SIZE:
            #    continue
            y_pred = model(img)
            y_pred_label = torch.argmax(y_pred, dim=1)
            
            val_labels.extend(label.cpu().detach())
            val_preds.extend(y_pred_label.cpu().detach())
            
            loss = criterion(y_pred, label)
            val_running_loss += loss.item()
    val_loss = val_running_loss/len(test_dataloader)
    val_losses = np.append(val_losses, val_loss)
    val_accuracy = sum(1 for x,y in zip(val_preds, val_labels) if x == y) / len(val_labels)
    val_accuracies = np.append(val_accuracies, val_accuracy)

    print("-"*30)
    print(f"Train Loss EPOCH {epoch}: {epoc_losses/len(train_dataloader)}")
    print(f"Valid Loss EPOCH {epoch}: {val_loss:.4f}")
    #print(f"Train Accuracy EPOCH {epoch+1}: {sum(1 for x,y in zip(train_preds, train_labels) if x == y) / len(train_labels):.4f}")
    print(f"Valid Accuracy EPOCH {epoch}: {val_accuracy:.4f}")
    print("-"*30)

stop = time.time()
print(f"Training Time: {stop-start:.2f}s")

100%|██████████| 938/938 [00:45<00:00, 20.59it/s]
100%|██████████| 157/157 [00:02<00:00, 53.81it/s]


------------------------------


TypeError: unsupported format string passed to numpy.ndarray.__format__

In [71]:
print(f"Train Loss EPOCH {epoch}: {epoc_losses/len(train_dataloader)}")

Train Loss EPOCH 1: [0.15944983 0.10714777 0.05381234 1.54371139]


In [48]:
with torch.no_grad():
    print(torch.Tensor(losses))

    train_running_loss = torch.Tensor()
    train_running_loss.app

tensor([0.1658, 0.3393, 0.0000, 2.3026])


In [25]:
img.shape

torch.Size([16, 784])

In [10]:
# Save graph to a file
#!pip install torchviz
from torchviz import make_dot

# Generate a Graphviz object from the computation graph
graph = make_dot(loss, params=dict(model.named_parameters())) 

# Save the graph as a PDF or any other format if needed
graph.render("model_Classification_Segment_v1_graph")

'model_Classification_Segment_v1_graph.pdf'

# Experiment Observations
* model=SimpleModel(784,10,8), model_params=141120, batch_size=64, lr=1e-5, epochs=40, test_acc=0.9118
* model=SimpleModel(784,10,10), model_params=172480, batch_size=64, lr=1e-4, epochs=40, test_acc=0.9239

### Turned off bw_transform and position transform. feeding flatten grayscape to above model
* model=SimpleModel(784,10,10), model_params=172480, batch_size=64, lr=1e-4, epochs=40, test_acc=0.9310 (max 0.935)
* so we are not really learning from shape

### Trained a new model that uses multiple segment nets and tries to predict x,y
*  MyModel(784, 64, 14, 10, 10), model_params=384840, batch_size=64, lr=1e-4, epochs=20, test_acc=0.9231 (max 0.9355)

### Turned ON bw_transform (no position transform)
*  MyModel(784, 64, 14, 10, 10), model_params=384840, batch_size=64, lr=1e-4, epochs=40, test_acc=0.9546 (max 0.9572)
### Turned ON bw_transform, rotation_15 (no position transform) - ramps up faster
*  MyModel(784, 64, 14, 10, 10), model_params=384840, batch_size=64, lr=1e-4, epochs=26, test_acc=0.9585 (max 0.9585)

### Split seg2 in x,y to reduce params by 2x
*  MyModel(784, 64, 14, 10, 10), model_params=196680, batch_size=64, lr=1e-4, epochs=26, test_acc=0.9598 (max 0.9633)

### Experiment with change seg2 models to get transposed input failed spectacularly. Trained to 22% accuracy and slow

### Changed seg2 segments (doubled them) - not much help
*  MyModel(784, 64, 14, 10, 10), model_params=384840, batch_size=64, lr=1e-4, epochs=40, test_acc=0.9634 (max 0.9634)


## Try to not sum on seg1. output =1 and use input_shape as the dimension with sum_on_x=False