In [3]:
import math
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image, UnidentifiedImageError
from pathlib import Path
import torch
import glob
import pytorch_lightning as pl
from huggingface_hub import HfApi, Repository
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchmetrics import Accuracy
from transformers import ViTFeatureExtractor, ViTForImageClassification
from pytorch_lightning.callbacks import ModelCheckpoint

In [4]:
data_dir = Path("/home/orelit/Projects -Sathsara/Planigo/data/Sri Lankan_Final")

ds=ImageFolder(data_dir)
indices = torch.randperm(len(ds)).tolist()
n_val = math.floor(len(indices) * .15)
train_ds = torch.utils.data.Subset(ds, indices[:-n_val])
val_ds = torch.utils.data.Subset(ds, indices[-n_val:])


label2id = {}
id2label = {}
for i, class_name in enumerate(ds.classes):
  label2id[class_name] = str(i)
  id2label[str(i)] = class_name

In [5]:
label2id = {}
id2label = {}
for i, class_name in enumerate(ds.classes):
  label2id[class_name] = str(i)
  id2label[str(i)] = class_name

In [4]:
import json
with open('/home/orelit/Projects -Sathsara/Planigo/Models/VIT_SL_new/label2id.json', 'w') as fp:
    json.dump(label2id, fp)
    
with open('/home/orelit/Projects -Sathsara/Planigo/Models/VIT_SL_new/id2label.json', 'w') as fp:
    json.dump(id2label, fp)

In [10]:
class ImageClassificationCollator:
   def __init__(self, feature_extractor): 
      self.feature_extractor = feature_extractor
   def __call__(self, batch):  
      encodings = self.feature_extractor([x[0] for x in batch],
      return_tensors='pt')   
      encodings['labels'] = torch.tensor([x[1] for x in batch],    
      dtype=torch.long)
      return encodings

feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
collator = ImageClassificationCollator(feature_extractor)
train_loader = DataLoader(train_ds, batch_size=32, 
   collate_fn=collator, num_workers=2, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32, collate_fn=collator, 
   num_workers=2)
model = ViTForImageClassification.from_pretrained(
        'google/vit-base-patch16-224-in21k',
         num_labels=len(label2id),
         label2id=label2id,
         id2label=id2label)

Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
class Classifier(pl.LightningModule):
   def __init__(self, model, lr: float = 2e-5, **kwargs): 
       super().__init__()
       self.save_hyperparameters('lr', *list(kwargs))
       self.model = model
       self.forward = self.model.forward 
       self.val_acc = Accuracy()
   def training_step(self, batch, batch_idx):
       outputs = self(**batch)
       self.log(f"train_loss", outputs.loss)
       return outputs.loss
   def validation_step(self, batch, batch_idx):
       outputs = self(**batch)
       self.log(f"val_loss", outputs.loss)
       acc = self.val_acc(outputs.logits.argmax(1), batch['labels'])
       self.log(f"val_acc", acc, prog_bar=True)
       return outputs.loss
   def configure_optimizers(self):
       return torch.optim.Adam(self.parameters(), 
                        lr=self.hparams.lr,weight_decay = 0.00025)

In [7]:
pl.seed_everything(42)
classifier = Classifier(model, lr=2e-5)
trainer = pl.Trainer(gpus=1, precision=16, max_epochs=10)
trainer.fit(classifier, train_loader, val_loader)

model.save_pretrained("/home/orelit/Projects -Sathsara/Planigo/Models/VIT_SL_new/model_1")

Global seed set to 42
  rank_zero_deprecation(
Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type                      | Params
------------------------------------------------------
0 | model   | ViTForImageClassification | 85.8 M
1 | val_acc | Accuracy                  | 0     
------------------------------------------------------
85.8 M    Trainable params
0         Non-trainable params
85.8 M    Total params
171.605   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


In [6]:
model_path = '/home/orelit/Projects -Sathsara/Planigo/Models/VIT_SL_new/model_1'
SL_prod_model = ViTForImageClassification.from_pretrained(
         model_path,
         num_labels=len(label2id),
         label2id=label2id,
         id2label=id2label)

In [7]:
def prediction(img_path):
   im=Image.open(img_path)
   encoding = feature_extractor(images=im, return_tensors="pt")
   encoding.keys()
   pixel_values = encoding['pixel_values']
   outputs = SL_prod_model(pixel_values)
   result = outputs.logits.softmax(1).argmax(1)
   tensor_result = outputs.logits.softmax(1)
   prob = torch.max(tensor_result)
   new_result = result.tolist() 
   for i in new_result:
     return(id2label[str(i)])

In [8]:
import os
test_data_path = '/home/orelit/Projects -Sathsara/Planigo/data/sri_lankan_test_images/All_barcodes'

image_paths = []
for image in os.listdir(test_data_path):
    img = test_data_path +'/'+image
    image_paths.append(img)

In [11]:
import re
count = 0
correct = 0
for image in os.listdir(test_data_path):
    if len(image)>=18:
        img = test_data_path +'/'+image
        pred= prediction(img)
        print('Prediction : ',pred)
        test_label = image[image.rfind('/')+1:]
        real_class = test_label[0:13]
        print('Real Label : ',real_class)
        if real_class == pred:
            correct = correct +1
            print('Correctly Classified')
        else:
            print('Misclassified')
        count = count + 1
        print('.....................................')
    
    else:
        img = test_data_path +'/'+image
        pred= prediction(img)
        print('Prediction : ',pred)
        test_label = image[image.rfind('/')+1:]
        real_class = test_label[0:5]
        print('Real Label : ',real_class)
        if real_class == pred:
            correct = correct +1
            print('Correctly Classified')
        else:
            print('Misclassified')
        count = count + 1
        print('.....................................')
    
acc = correct/count
print("Number of images : ",count)
print("Number of correctly classified images : ",correct)
print("Accuray : ",acc)

Prediction :  4792229216206
Real Label :  4792229216206
Correctly Classified
.....................................
Prediction :  4796918130712
Real Label :  4796918130712
Correctly Classified
.....................................
Prediction :  8901030732911
Real Label :  8901030732911
Correctly Classified
.....................................
Prediction :  4792143282417
Real Label :  4792143282417
Correctly Classified
.....................................
Prediction :  4792037767266
Real Label :  4792037767266
Correctly Classified
.....................................
Prediction :  4792143280413
Real Label :  4792143280413
Correctly Classified
.....................................
Prediction :  4792229216206
Real Label :  4792229216206
Correctly Classified
.....................................
Prediction :  4792149097107
Real Label :  4792149097107
Correctly Classified
.....................................
Prediction :  4792068181130
Real Label :  4792068181130
Correctly Classified
...

In [18]:
import numpy as np
import shutil
import os

def imlist(path):
    return [os.path.join(path, f) for f in os.listdir(path)]

def copy_images(imagePaths, folder):

    if not os.path.exists(folder):
        os.makedirs(folder)

    for path in imagePaths:

        imageName = path.split(os.path.sep)[-1]
        label = path.split(os.path.sep)[-2]
        labelFolder = os.path.join(folder, label)

        if not os.path.exists(labelFolder):
            os.makedirs(labelFolder)

            destination = os.path.join(labelFolder, imageName)
            shutil.copy(path, destination)


path = "/home/orelit/Projects -Sathsara/Planigo/data/VIT_train_wine_low_variance_test_augmented"

items =os.listdir(path)

for item in items:
    imagePaths = list(imlist('/home/orelit/Projects -Sathsara/Planigo/data/VIT_train_wine_low_variance_test_augmented/{}'.format(item)))
    np.random.shuffle(imagePaths)
    valPathsLen = int(len(imagePaths) * 0.2)
    trainPathsLen = len(imagePaths) - valPathsLen

    trainPaths = imagePaths[:trainPathsLen]
    valPaths = imagePaths[trainPathsLen:]

In [20]:
items

['7290008804189',
 '7290008670142',
 '7290000024264',
 '7290008670159',
 '7290015781015',
 '7290006256089',
 '7290014466609',
 '7290008802512',
 '7290015781114',
 '7290012576607',
 '7290015951227',
 '7290015781008',
 '7290008801843',
 '7290000023809',
 '7290015350150',
 '7290000023847']