In [1]:
import csv
import os
import cv2
from PIL import Image
import pandas as pd
from tqdm import tqdm
import numpy as np
import torch
from convnet import ConvNet
from sklearn.metrics import classification_report

from lightning_model import LightningModel

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger


# np.random.seed(42)

In [2]:
ROOT_DATA_PATH = './'
train_set = ROOT_DATA_PATH + 'train.csv'
test_set = ROOT_DATA_PATH + 'test.csv'
images = ROOT_DATA_PATH + 'images/'

In [3]:
from transformers import SwinForImageClassification, AutoFeatureExtractor
from transformers import AutoProcessor
from datasets import load_dataset

# from  datasets import Data

In [4]:


dataset = load_dataset("huggingface/cats-image")




No config specified, defaulting to: cats-image/image
Found cached dataset cats-image (/s/chopin/a/grad/sam97/.cache/huggingface/datasets/huggingface___cats-image/image/1.9.0/68fbc793fb10cd165e490867f5d61fa366086ea40c73e549a020103dcb4f597e)


  0%|          | 0/1 [00:00<?, ?it/s]

In [5]:

image = dataset["test"]["image"]



In [6]:
image_processor = AutoFeatureExtractor.from_pretrained("microsoft/swin-tiny-patch4-window7-224" )


In [7]:
inputs = image_processor(image, return_tensors="pt")


In [8]:
inputs.pixel_values.shape

torch.Size([1, 3, 224, 224])

In [9]:
def get_images_and_targets(labels_df, images_path, image_processor, test=False,):
    # img_height, img_width = 224, 224
    raw_labels = labels_df.values
    
    if test==False:
        r_indexes = np.arange(len(raw_labels))
        raw_labels = raw_labels[r_indexes]
    
    X, Y = [], []
    # print(len(raw_labels)//10000)
    for i in tqdm(range(len(raw_labels)//100)):
        image = Image.open(images_path + raw_labels[i][0])
        image = image_processor(image, return_tensors='pt')
        # image = image.resize((img_height, img_width))
#         print(np.array(image).shape)
#         image= np.resize(image,(img_height,img_width,3))
#         image = image.astype('float32')
#         image = (image -127.5) / 127.5
        X.append(image.pixel_values)
        if test==False:
            Y.append(raw_labels[i][3])
        else:
            Y.append(raw_labels[i][0])
    # print(X)

    X, Y = torch.vstack(X), torch.from_numpy(np.array(Y)).reshape(-1,1) if test == False else np.array(Y)
    return X, Y

In [10]:
train_labels = pd.read_csv(train_set)
test_labels = pd.read_csv(test_set)


In [11]:
trainX, trainY = get_images_and_targets(train_labels, images, image_processor)
testX, imgpathsY = get_images_and_targets(test_labels, images, image_processor, test=True)

100%|██████████| 810/810 [00:09<00:00, 86.89it/s] 
100%|██████████| 252/252 [00:02<00:00, 95.82it/s] 


In [12]:
# import torch
from torch.utils.data import Dataset

class CustomTensorDataset(Dataset):
  def __init__(self, X, Y, transform_list=None):
    # # [data_X, data_y] = dataset
    # X_tensor, y_tensor = torch.tensor(data_X), torch.tensor(data_y)
    #X_tensor, y_tensor = Tensor(data_X), Tensor(data_y)
    tensors = (X, Y)
    assert all(tensors[0].size(0) == tensor.size(0) for tensor in tensors)
    self.tensors = tensors
    self.transforms = transform_list

  def __getitem__(self, index):
    x = self.tensors[0][index]

    if self.transforms:
      #for transform in self.transforms: 
      #  x = transform(x)
      x = self.transforms(x)

    y = self.tensors[1][index]

    return x, y

  def __len__(self):
    return self.tensors[0].size(0)

In [13]:
from torch.utils.data import DataLoader

dataset_train = CustomTensorDataset(trainX, trainY)
trainloader = DataLoader(dataset=dataset_train, batch_size=2, shuffle=True, num_workers=1)

In [14]:
from transformers import SwinForImageClassification, Trainer, TrainingArguments


2023-03-26 01:56:58.263891: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [15]:
model = SwinForImageClassification.from_pretrained(
    'microsoft/swin-tiny-patch4-window7-224',
    num_labels=2,
    id2label={str(i): c for i, c in enumerate(range(2))},
    label2id={c: str(i) for i, c in enumerate(range(2))},
    ignore_mismatched_sizes = True,
)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Some weights of SwinForImageClassification were not initialized from the model checkpoint at microsoft/swin-tiny-patch4-window7-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [16]:
# def collate_fn(batch):
#   #data collator
#     images, label = batch
#     return {
#         'pixel_values': torch.stack([x for x in images]),
#         'labels': torch.tensor([x for x in label])
#     }

In [17]:
# batch_size=8

# # Defining training arguments (set push_to_hub to false if you don't want to upload it to HuggingFace's model hub)
# training_args = TrainingArguments(
#     f"swin-finetuned-3dprinting",
#     remove_unused_columns=False,
#     evaluation_strategy = "epoch",
#     save_strategy = "epoch",
#     learning_rate=5e-5,
#     per_device_train_batch_size=batch_size,
#     gradient_accumulation_steps=4,
#     per_device_eval_batch_size=batch_size,
#     num_train_epochs=3,
#     warmup_ratio=0.1,
#     logging_steps=10,
#     load_best_model_at_end=True,
#     metric_for_best_model="accuracy",
#     push_to_hub=False,
# )

# # Instantiate the Trainer object
# trainer = Trainer(
#     model=model,
#     args=training_args,
#     data_collator=collate_fn,
#     # compute_metrics=compute_metrics,
#     train_dataset=trainloader,
#     # eval_dataset=prepared_ds["validation"],
#     tokenizer=image_processor,
# )

In [18]:
light_model = LightningModel(model, num_classes=2,reshape_input=False)


In [20]:
if not os.path.exists('{}{}/'.format(ROOT_DATA_PATH, 'output/model')):
        os.makedirs('{}{}/'.format(ROOT_DATA_PATH, 'output/model'))
    

checkpoint_callback = ModelCheckpoint(
    monitor='val_accuracy',
    dirpath='{}{}/'.format(ROOT_DATA_PATH, 'output/model'),
    filename='{}-{}-{}-{}-{}'.format('swin', 'swin', '3dprint', 0.0001, 1)+'-{epoch:02d}-{train_accuracy:.4f}',
    save_top_k=3,
    mode='max',
)
early_stopping = EarlyStopping(monitor="train_accuracy", min_delta=0.00, patience=10, verbose=False, mode="max")

print('DATA LOADED...')

logger = TensorBoardLogger('lightning_logs', name='swin')


trainer = pl.Trainer(
    max_epochs=5,
    precision=16,
    accelerator='gpu', devices=[0],
    num_sanity_val_steps=0,
    # check_val_every_n_epoch=5,
    callbacks=[checkpoint_callback, early_stopping],
    logger=logger,
    # strategy='ddp'
)

print('Start Training...')
trainer.fit(light_model, trainloader)



Using 16bit None Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA RTX A6000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name      | Type                       | Params
---------------------------------------------------------
0 | model     | SwinForImageClassification | 27.5 M
1 | criterion | CrossEntropyLoss           | 0     
2 | softmax   | Softmax                    | 0     
---------------------------------------------------------
27.5 M    Trainable params
0         Non-trainable params
27.5 M    Total

DATA LOADED...
Start Training...


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


In [21]:
device = torch.device('cuda:0')
model = model.to(device)




In [23]:
# from torch import softmax
softmax = torch.nn.Softmax(dim=1)


gt_max, pred_max, probs_all = [], [], []
with torch.no_grad():

    for idx, data in tqdm(enumerate(trainloader)):

        img_seq, label = data
        # if args.model != 'videomae':
        #     img_seq = torch.permute(img_seq, (0,2,1,3,4))
        
        img_seq = img_seq.cuda(device)
        
        logits = model(img_seq).logits


        probs = softmax(logits)
        preds = torch.max(probs, 1, keepdim=True)[1].int().cpu()
        
        
        gt_max.append(label)
        pred_max.append( preds)
        probs_all.append(probs)


gt_max = torch.vstack(gt_max).cpu()
pred_max = torch.vstack(pred_max).cpu()
probs_all = torch.vstack(probs_all).cpu()




405it [00:07, 54.22it/s]


In [24]:
print(classification_report(gt_max, pred_max, zero_division=0, digits=7))

              precision    recall  f1-score   support

           1  1.0000000 1.0000000 1.0000000       810

    accuracy                      1.0000000       810
   macro avg  1.0000000 1.0000000 1.0000000       810
weighted avg  1.0000000 1.0000000 1.0000000       810



In [25]:
with torch.no_grad():
    
    logits = model(testX).logits
    probs = softmax(logits)
    preds = torch.max(probs, 1, keepdim=True)[1].int().cpu()

testPreds = preds

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor