In [1]:

import os
import cv2

import pandas as pd

import numpy as np
import torch


from CustomDataset import CustomTensorDataset
from torch.utils.data import DataLoader


from transformers import AutoFeatureExtractor, AutoModelForImageClassification
from caxton_model.network_module import ParametersClassifier


import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger

from lightning_model import LightningModel

from tqdm import tqdm

from PIL import Image, ImageFilter


# from utils import get_images_and_targets

import torch.multiprocessing
torch.multiprocessing.set_sharing_strategy('file_system')


In [2]:
import numpy as np
import torch
import torchmetrics
import torch.nn.functional as F

        
class ConvNet(pl.LightningModule):
    
    def __init__(self, n_inputs, n_hiddens_per_conv_layer, n_hiddens_per_fc_layer, n_outputs, 
                 patch_size_per_conv_layer, stride_per_conv_layer, activation_function='tanh', learning_rate=0.01, optimizer='adam'):
        
        super().__init__()

        self.n_class = n_outputs
        self.lr = learning_rate

        self.opt = optimizer
        self.criterion = torch.nn.CrossEntropyLoss()
        self.softmax = torch.nn.Softmax(dim=1)

        self.batch_norm1 = torch.nn.BatchNorm2d(64)
        self.batch_norm2 = torch.nn.BatchNorm2d(192)
        self.batch_norm3 = torch.nn.BatchNorm2d(384)
        self.batch_norm4 = torch.nn.BatchNorm2d(256)
        self.conv1 = torch.nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2)
        self.relu = torch.nn.ReLU(inplace=True)
        self.maxpool = torch.nn.MaxPool2d(kernel_size=3, stride=2)
        self.conv2 = torch.nn.Conv2d(64, 192, kernel_size=5, padding=2)
        self.conv3 = torch.nn.Conv2d(192, 384, kernel_size=3, padding=1)
        self.conv4 = torch.nn.Conv2d(384, 256, kernel_size=3, padding=1)
        self.conv5 = torch.nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.avgpool = torch.nn.AdaptiveAvgPool2d((6, 6))
        self.dropout1 = torch.nn.Dropout(0.3)
        self.fc1 = torch.nn.Linear(256 * 6 * 6, 4096)
        self.dropout2 = torch.nn.Dropout(0.3)
        self.fc2 = torch.nn.Linear(4096, 4096)
        self.dropout3 = torch.nn.Dropout(0.3)
        self.fc3 = torch.nn.Linear(4096, self.n_class)
        
        # self.to(self.device)


    def forward_all_outputs(self, X):
        n_samples = X.shape[0]
        Ys = [X]
        for i, conv_layer in enumerate(self.conv_layers):
            print("here: ", i)
            x = conv_layer(Ys[-1])
            print('conv output', x.shape)
            x = self.activation_function(x)
            print('act out', x.shape)
            x = self.pool(x)
            print('pool out', x.shape)
            Ys.append( x )

        for layeri, fc_layer in enumerate(self.fc_layers[:-1]):
            if layeri == 0:
                Ys.append( self.activation_function(fc_layer(Ys[-1].reshape(n_samples, -1))) )
            else:
                Ys.append( self.activation_function(fc_layer(Ys[-1])) )

        Ys.append(self.fc_layers[-1](Ys[-1]))
        return Ys


    def forward(self, X):
        # Ys = self.forward_all_outputs(X)
        # return Ys[-1]

        x = self.relu(self.conv1(X))
        x = self.maxpool(x)
        x = self.batch_norm1(x)
        x = self.relu(self.conv2(x))
        x = self.maxpool(x)
        x = self.batch_norm2(x)
        x = self.relu(self.conv3(x))
        x = self.batch_norm3(x)
        x = self.relu(self.conv4(x))
        x = self.batch_norm4(x)
        x = self.relu(self.conv5(x))
        # x = self.batch_norm()
        x = self.maxpool(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.dropout1(x)
        x = self.relu(self.fc1(x))
        x = self.dropout2(x)
        x = self.relu(self.fc2(x))
        x = self.dropout3(x)
        return self.fc3(x)
    
    def training_step(self, batch, batch_idx):
        img_seq, label = batch
        gt_label = F.one_hot(label.view(1, -1)[0].long(), num_classes=self.n_class).float()
        

        logits = self.forward(img_seq) # output size: batch_size x 4
        probs = self.softmax(logits)
        preds = torch.max(probs, 1, keepdim=True)[1].int()
        
        loss = self.criterion(logits, gt_label)

        f1 = torchmetrics.functional.f1_score(preds, label, task='binary', average='weighted')
        self.log('train_f1_batch', f1, prog_bar=True)

        return {'loss': loss, 'f1': f1}

    def training_epoch_end(self, outputs):
        # log epoch metric

        loss = sum(output['loss'] for output in outputs) / len(outputs)
        self.logger.experiment.add_scalar("Loss/Train", loss, self.current_epoch)
        self.log('train_loss', loss, prog_bar=True)

        top1 = sum(output['f1'] for output in outputs) / len(outputs)
        self.logger.experiment.add_scalar("F1/Train", top1, self.current_epoch)
        self.log('train_f1', top1, prog_bar=True)

        

    def validation_step(self, batch, batch_idx):
        img_seq, label = batch
        gt_label = F.one_hot(label.view(1, -1)[0].long(), num_classes=self.n_class).float()

        
        logits = self.forward(img_seq) # output size: batch_size x 49
        probs = self.softmax(logits)
        preds = torch.max(probs, 1, keepdim=True)[1].int()

        loss = self.criterion(logits, gt_label)
        
        f1 = torchmetrics.functional.f1_score(preds, label, task='binary', average='weighted')
        return {'loss':loss, 'f1': f1}



    
    def validation_epoch_end(self, outputs):

        loss = sum(output['loss'] for output in outputs) / len(outputs)
        self.logger.experiment.add_scalar("Loss/Validation", loss, self.current_epoch)
        self.log('val_loss', loss, prog_bar=True)

        top1 = sum(output['f1'] for output in outputs) / len(outputs)
        self.logger.experiment.add_scalar("F1/Validation", top1, self.current_epoch)
        self.log('val_f1', top1, prog_bar=True)

    

    def predict_step(self, batch, batch_idx):
        img_seq, label = batch
        logits = self.forward(img_seq)
        probs = self.softmax(logits)

        preds = torch.max(probs, 1, keepdim=True)[1].int()
        return probs, preds

    def configure_optimizers(self):
        
        return torch.optim.SGD(self.parameters(), lr=self.lr) if self.opt =='sgd' else torch.optim.Adam(self.parameters(), lr=self.lr)
    


    def use(self, X):
        # Set input matrix to torch.tensors if not already.
        if not isinstance(X, torch.Tensor):
            X = torch.from_numpy(X).float()
        
        X = torch.permute(X, (0, 3, 1, 2))
        Y = self.forward(X)
        probs = self.softmax(Y)
        classes = self.classes[torch.argmax(probs, axis=1).cpu().numpy()]
        return classes.cpu().numpy(), probs.detach().cpu().numpy()



In [3]:
np.random.seed(42)

ROOT_DATA_PATH = '/s/babbage/e/nobackup/nkrishna/m3x/others/data/'
SAVING_OUTPUTS = './'
train_set = ROOT_DATA_PATH + 'train.csv'
test_set = ROOT_DATA_PATH + 'test.csv'
images = ROOT_DATA_PATH + 'images/'
MODEL_NAME = 'microsoft/resnet-50'


# HYPERPARAMETERS
LEARNING_RATE = 1e-05
EPOCHS = 10
BATCH_SIZE = 100

In [37]:

def get_images_and_targets(labels_df: pd.DataFrame, images_path: str, image_processor=None, test=False, train_fraction=0.7, val=False, lesser=True):
    
    
    # PRINTS_TO_USE = []
    # for i in labels_df['print_id'].unique():
    #     count = len(labels_df[labels_df['print_id'] == i ])
    #     if lesser and count <= CUTOFF_PRINTS:
    #         PRINTS_TO_USE.append(i)
    #     elif not lesser and count > CUTOFF_PRINTS:
    #         PRINTS_TO_USE.append(i)

    # raw_labels = labels_df.values
    # print_filter = labels_df['print_id'].isin(PRINTS_TO_USE)
    # if test == False:
    #     filter = (labels_df['printer_id'].isin(PRINTERS_FOR_VAL_ONLY) | labels_df['print_id'].isin(PRINT_IDS_FOR_VAL_ONLY))
        

    #     if val == True:
    #         raw_labels = labels_df[filter].values
    #     else:
    #         raw_labels = labels_df[~filter & print_filter].values
    # else:
    #     raw_labels = labels_df[print_filter].values

    # if test==False:
    #     fraction = int(len(raw_labels) - (len(raw_labels)*train_fraction))
    #     r_indexes = np.arange(len(raw_labels))
    #     np.random.shuffle(r_indexes)
    #     raw_labels = raw_labels[r_indexes]
    #     if val:
    #         raw_labels = raw_labels[:fraction , :]
    #     else:
    #         raw_labels = raw_labels[fraction: , :]

    
    raw_labels = labels_df.values
    X, Y = [], []
    # print(len(raw_labels)//10000)
    for i in tqdm(range(len(raw_labels))):
        image = Image.open(images_path + raw_labels[i][0])
        image = image.resize((224,224), resample=Image.Resampling.LANCZOS)
        # print(image)
        if image_processor != None:
            image = image_processor(image, return_tensors='pt').pixel_values
        else:
            image = np.array(image)
            # print(image.shape)
            image = np.moveaxis(image, -1, 0)
            # print(image.shape)
            image = torch.from_numpy(image)[None, :, :, :]
            # print(image.shape)
            # image = torch.from_numpy(np.moveaxis(np.array(image), -1, 0))



        X.append(image)
        if test==False:
            Y.append(raw_labels[i][3])
        else:
            Y.append(raw_labels[i][0])
        if i == 3:
            break
    # print(X)
    
    X, Y = torch.vstack(X), torch.from_numpy(np.array(Y)).reshape(-1,1) if test == False else np.array(Y)
    return X, Y

In [38]:
train_labels = pd.read_csv(train_set)

# image_processor = AutoFeatureExtractor.from_pretrained(MODEL_NAME)
image_processor = None



X, Y= get_images_and_targets(train_labels, images, image_processor)
# valX, valY = get_images_and_targets(train_labels, images, image_processor, test=False, val=True, train_fraction=0.8)

# print("loaded train and val data")

# # dataset_train = CustomTensorDataset(trainX, trainY)
# dataset_val = CustomTensorDataset(valX, valY)


# # trainloader = DataLoader(dataset=dataset_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=6)
# valloader = DataLoader(dataset=dataset_val, batch_size=BATCH_SIZE, shuffle=False, num_workers=6)    

# print("data ready for training")
# image[0][0]
X.shape
  

  0%|          | 3/81060 [00:00<15:12, 88.80it/s]

(3, 224, 224)
torch.Size([1, 3, 224, 224])
(3, 224, 224)
torch.Size([1, 3, 224, 224])
(3, 224, 224)
torch.Size([1, 3, 224, 224])
(3, 224, 224)
torch.Size([1, 3, 224, 224])





torch.Size([4, 3, 224, 224])

In [28]:
# X, Y = torch.ones((10, 3, 224, 224)), torch.ones((10, 1))
dataset = CustomTensorDataset(X, Y)
loader = DataLoader(dataset=dataset, batch_size=BATCH_SIZE, shuffle=True)
# print(X, Y)

In [29]:
# HYPERPARAMETERS
LEARNING_RATE = 1e-03
EPOCHS = 100
BATCH_SIZE = 100

n_hiddens_per_conv_layer = [128]
n_hiddens_per_fc_layer = [128]
patch_size_per_conv_layer = [64]
stride_per_conv_layer = [1]



model = ConvNet(224*224, n_hiddens_per_conv_layer, n_hiddens_per_fc_layer, 2, patch_size_per_conv_layer, stride_per_conv_layer, learning_rate=LEARNING_RATE)



In [31]:
idx, data = enumerate(loader).__next__()
# data
img_seq, label = data
print(img_seq.shape)
model(img_seq)

torch.Size([4, 150528])


RuntimeError: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [4, 150528]

In [26]:
# checkpoint_callback = ModelCheckpoint(
#         monitor='train_f1',
#         dirpath='{}{}/'.format(SAVING_OUTPUTS, 'output/model'),
#         filename='{}-{}-{}-{}'.format(MODEL_NAME, '3dprint_convnet', LEARNING_RATE, BATCH_SIZE)+'-{epoch:02d}-{train_f1:.4f}_test',
#         save_top_k=5,
#         mode='max',
#     )
# early_stopping = EarlyStopping(monitor="train_f1", min_delta=0.00, patience=10, verbose=False, mode="max")


# logger = TensorBoardLogger('lightning_logs', name=f'{MODEL_NAME}_convnet_lr_{LEARNING_RATE}_test_epoch_{EPOCHS}')


trainer = pl.Trainer(
    max_epochs=EPOCHS,
    precision=16,
    accelerator='gpu', devices=[0],
    num_sanity_val_steps=0,
    # check_val_every_n_epoch=5,
    # callbacks=[checkpoint_callback, early_stopping],
    # logger=logger,
    # strategy='ddp'
)


Using 16bit None Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [15]:
trainer.fit(model, loader)


  rank_zero_warn(
You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name        | Type             | Params
-------------------------------------------------
0 | pools       | ModuleList       | 0     
1 | conv_layers | ModuleList       | 10.5 M
2 | fc_layers   | ModuleList       | 23.9 M
3 | criterion   | CrossEntropyLoss | 0     
4 | softmax     | Softmax          | 0     
-------------------------------------------------
34.4 M    Trainable params
0         Non-trainable params
34.4 M    Total params
68.835    Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

RuntimeError: Calculated padded input size per channel: (1 x 1). Kernel size: (32 x 32). Kernel size can't be greater than actual input size