In [1]:
import math
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image, UnidentifiedImageError
from pathlib import Path
import torch
import glob
import pytorch_lightning as pl
from huggingface_hub import HfApi, Repository
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchmetrics import Accuracy
from transformers import ViTFeatureExtractor, ViTForImageClassification
from pytorch_lightning.callbacks import ModelCheckpoint

In [2]:
data_dir = Path("/home/orelit/Projects -Sathsara/Planigo/data/VIT_train_wine_low_variance_test_augmented")

ds=ImageFolder(data_dir)
indices = torch.randperm(len(ds)).tolist()
n_val = math.floor(len(indices) * .15)
train_ds = torch.utils.data.Subset(ds, indices[:-n_val])
val_ds = torch.utils.data.Subset(ds, indices[-n_val:])


label2id = {}
id2label = {}
for i, class_name in enumerate(ds.classes):
  label2id[class_name] = str(i)
  id2label[str(i)] = class_name

In [3]:
class ImageClassificationCollator:
   def __init__(self, feature_extractor): 
      self.feature_extractor = feature_extractor
   def __call__(self, batch):  
      encodings = self.feature_extractor([x[0] for x in batch],
      return_tensors='pt')   
      encodings['labels'] = torch.tensor([x[1] for x in batch],    
      dtype=torch.long)
      return encodings

feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
collator = ImageClassificationCollator(feature_extractor)
train_loader = DataLoader(train_ds, batch_size=32, 
   collate_fn=collator, num_workers=2, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32, collate_fn=collator, 
   num_workers=2)
model = ViTForImageClassification.from_pretrained(
        'google/vit-base-patch16-224-in21k',
         num_labels=len(label2id),
         label2id=label2id,
         id2label=id2label)

Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
class Classifier(pl.LightningModule):
   def __init__(self, model, lr: float = 2e-5, **kwargs): 
       super().__init__()
       self.save_hyperparameters('lr', *list(kwargs))
       self.model = model
       self.forward = self.model.forward 
       self.val_acc = Accuracy()
   def training_step(self, batch, batch_idx):
       outputs = self(**batch)
       self.log(f"train_loss", outputs.loss)
       return outputs.loss
   def validation_step(self, batch, batch_idx):
       outputs = self(**batch)
       self.log(f"val_loss", outputs.loss)
       acc = self.val_acc(outputs.logits.argmax(1), batch['labels'])
       self.log(f"val_acc", acc, prog_bar=True)
       return outputs.loss
   def configure_optimizers(self):
       return torch.optim.Adam(self.parameters(), 
                        lr=self.hparams.lr,weight_decay = 0.00025)

In [5]:
pl.seed_everything(42)
classifier = Classifier(model, lr=2e-5)
trainer = pl.Trainer(gpus=1, precision=16, max_epochs=10)
trainer.fit(classifier, train_loader, val_loader)

model.save_pretrained("/home/orelit/Projects -Sathsara/Planigo/Models/VIT_WINE/low_variance_augmented_data_10_epochs")

Global seed set to 42
  rank_zero_deprecation(
Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type                      | Params
------------------------------------------------------
0 | model   | ViTForImageClassification | 85.8 M
1 | val_acc | Accuracy                  | 0     
------------------------------------------------------
85.8 M    Trainable params
0         Non-trainable params
85.8 M    Total params
171.622   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


# Hyper-parameter tuning for VIT with WandB Sweaps

In [5]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33msathsara_rasantha[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [6]:
sweep_config = {
    'method': 'random'
    }

In [7]:
metric = {
    'name': 'loss',
    'goal': 'minimize'   
    }

sweep_config['metric'] = metric

In [8]:
parameters_dict = {
    'optimizer': {
        'values': ['adam', 'sgd']
        },
    'fc_layer_size': {
        'values': [128, 256, 512]
        },
    'dropout': {
          'values': [0.3, 0.4, 0.5]
        },
    }

sweep_config['parameters'] = parameters_dict

In [9]:
parameters_dict.update({
    'epochs': {
        'value': 1}
    })

In [10]:
parameters_dict.update({
    'learning_rate': {
        # a flat distribution between 0 and 0.1
        'distribution': 'uniform',
        'min': 0,
        'max': 0.1
      },
    'batch_size': {
        # integers between 32 and 256
        # with evenly-distributed logarithms 
        'distribution': 'q_log_uniform_values',
        'q': 8,
        'min': 4,
        'max': 32,
      }
    })

In [11]:
import pprint

pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'minimize', 'name': 'loss'},
 'parameters': {'batch_size': {'distribution': 'q_log_uniform_values',
                               'max': 32,
                               'min': 4,
                               'q': 8},
                'dropout': {'values': [0.3, 0.4, 0.5]},
                'epochs': {'value': 1},
                'fc_layer_size': {'values': [128, 256, 512]},
                'learning_rate': {'distribution': 'uniform',
                                  'max': 0.1,
                                  'min': 0},
                'optimizer': {'values': ['adam', 'sgd']}}}


In [12]:
sweep_id = wandb.sweep(sweep_config, project="VIT-Wine-Hyper-parameter-tuning")

Create sweep with ID: wytqkv0x
Sweep URL: https://wandb.ai/sathsara_rasantha/VIT-Wine-Hyper-parameter-tuning/sweeps/wytqkv0x


In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")         

In [14]:
pl.seed_everything(42)

Global seed set to 42


42

In [15]:
def train(config=None):
    # Initialize a new wandb run
    with wandb.init(config=config):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config
        
        feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
        collator = ImageClassificationCollator(feature_extractor)
        train_loader = DataLoader(train_ds, batch_size=config.batch_size, 
             collate_fn=collator, num_workers=2, shuffle=True)
        val_loader = DataLoader(val_ds, batch_size=config.batch_size, collate_fn=collator, 
             num_workers=2)
        model = ViTForImageClassification.from_pretrained(
                 'google/vit-base-patch16-224-in21k',
                  num_labels=len(label2id),
                  label2id=label2id,
                  id2label=id2label)

        classifier = Classifier(model, lr=config.learning_rate)
        trainer = pl.Trainer(gpus=1, precision=16, max_epochs=config.epochs)
        trainer.fit(classifier, train_loader, val_loader)
        

In [None]:
wandb.agent(sweep_id, train, count=10)

[34m[1mwandb[0m: Agent Starting Run: 93k8se2r with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	fc_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.08139950020932776
[34m[1mwandb[0m: 	optimizer: sgd


Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  rank_zero_deprecation(
Using 1

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: Agent Starting Run: zk29gb06 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	fc_layer_size: 256
[34m[1mwandb[0m: 	learning_rate: 0.03293916214026169
[34m[1mwandb[0m: 	optimizer: adam


Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  rank_zero_deprecation(
Using 1

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: Agent Starting Run: qq9mv3e6 with config:
[34m[1mwandb[0m: 	batch_size: 24
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	fc_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.09260322191459402
[34m[1mwandb[0m: 	optimizer: adam


Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using 16bit native Automatic Mix

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: Agent Starting Run: 2ovhsts3 with config:
[34m[1mwandb[0m: 	batch_size: 24
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	fc_layer_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.05026258050569345
[34m[1mwandb[0m: 	optimizer: sgd


Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using 16bit native Automatic Mix

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

# Model training with WandB

In [20]:
import wandb
wandb.login()

True

In [21]:
from pytorch_lightning.callbacks import ModelCheckpoint

checkpoint_callback = ModelCheckpoint(monitor='val_acc', mode='max')

In [22]:
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning import Trainer

wandb_logger = WandbLogger(project='VIT_wine-trainig_with_PL_2022_09_27', # group runs in "MNIST" project
                           log_model='all') # log all new checkpoints during training

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333678245544434, max=1.0)…

In [23]:
classifier = Classifier(model, lr=2e-5)
trainer = pl.Trainer(
    gpus=1, 
    precision=16,
    logger=wandb_logger,                   
    callbacks=[checkpoint_callback],       
    max_epochs=5) 
trainer.fit(classifier, train_loader, val_loader)

wandb.finish()

Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type                      | Params
------------------------------------------------------
0 | model   | ViTForImageClassification | 86.0 M
1 | val_acc | Accuracy                  | 0     
------------------------------------------------------
86.0 M    Trainable params
0         Non-trainable params
86.0 M    Total params
171.916   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


VBox(children=(Label(value='983.953 MB of 983.953 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0,…

0,1
epoch,▁▁▁▁▁▁▁▁▃▃▃▃▃▃▃▃▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆████████
train_loss,██▇▅▅▄▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_acc,██▁██
val_loss,█▃▂▁▁

0,1
epoch,4.0
train_loss,0.01198
trainer/global_step,6669.0
val_acc,1.0
val_loss,0.01208


# Model Inferencing - VIT


In [15]:
model_path = '/home/orelit/Projects -Sathsara/Planigo/Models/VIT_WINE/low_variance_augmented_data_5_epochs'
wine_model = ViTForImageClassification.from_pretrained(
         model_path,
         num_labels=len(label2id),
         label2id=label2id,
         id2label=id2label)

In [16]:
def prediction(img_path):
   im=Image.open(img_path)
   encoding = feature_extractor(images=im, return_tensors="pt")
   encoding.keys()
   pixel_values = encoding['pixel_values']
   outputs = wine_model(pixel_values)
   result = outputs.logits.softmax(1).argmax(1)
   tensor_result = outputs.logits.softmax(1)
   prob = torch.max(tensor_result)
   new_result = result.tolist() 
   for i in new_result:
     return(id2label[str(i)])

In [17]:
def getProbs(img_path):
   im=Image.open(img_path)
   encoding = feature_extractor(images=im, return_tensors="pt")
   encoding.keys()
   pixel_values = encoding['pixel_values']
   outputs = model_new(pixel_values)
   tensor_result = outputs.logits.softmax(1)
   prob = torch.max(tensor_result)
   return prob

In [18]:
def process_image(image_path):
   pil_image = Image.open(image_path)
   if pil_image.size[0] > pil_image.size[1]:
       pil_image.thumbnail((5000, 256))
   else:
       pil_image.thumbnail((256, 5000))
   left_margin = (pil_image.width-224)/2
   bottom_margin = (pil_image.height-224)/2
   right_margin = left_margin + 224
   top_margin = bottom_margin + 224
   pil_image = pil_image.crop((left_margin, bottom_margin, 
                               right_margin, top_margin))
   np_image = np.array(pil_image)/255
   mean = np.array([0.485, 0.456, 0.406])
   std = np.array([0.229, 0.224, 0.225])
   np_image = (np_image - mean) / std
   np_image = np_image.transpose((2, 0, 1))
   return np_image

In [19]:
def imshow(image, ax=None, title=None):
   if ax is None:
      fig, ax = plt.subplots()
   image = image.transpose((1, 2, 0))
   mean = np.array([0.485, 0.456, 0.406])
   std = np.array([0.229, 0.224, 0.225])
   image = std * image + mean
   if title is not None:
      ax.set_title(title)
   image = np.clip(image, 0, 1)
   ax.imshow(image)
   
   return ax


In [20]:
def display_image(image_dir):
   
   plt.figure(figsize = (6,10))
   plot_1 = plt.subplot(2,1,1)
   image = process_image(image_dir)
   asl_sign = image_dir[image_dir.rfind('/')+1:]
   pred= prediction(image_dir)
   plot_1.set_xlabel("The predicted sign: "+pred)
   imshow(image, plot_1, title=asl_sign);


In [7]:
# image_path1 = '/home/orelit/Projects -Sathsara/Planigo/data/VIT_test/4022025001905 (2).jpeg'
# display_image(image_path1)

In [8]:
# prediction(image_path1)

In [21]:
import os
test_data_path = '/home/orelit/Projects -Sathsara/Planigo/data/VIT_test_wine_low_variance_test/All_barcodes'

image_paths = []
for image in os.listdir(test_data_path):
    img = test_data_path +'/'+image
    image_paths.append(img)

In [1]:
# for i in image_paths:
#     display_image(i)

In [4]:
# test_label = image_path1[image_path1.rfind('/')+1:]
# test_label
# test_label = re.sub(r'\([^)]*\)', '', test_label)
# x = test_label.split(" ")

# if len(x)>1:
#     final_output = x[0]
# else:
#     y = test_label.split(".")
#     final_output = y[0]
    
# final_output
# test_label[0:13]

In [22]:
import re
count = 0
correct = 0
for image in os.listdir(test_data_path):
    img = test_data_path +'/'+image
    pred= prediction(img)
    print('Prediction : ',pred)
    test_label = image[image.rfind('/')+1:]
    real_class = test_label[0:13]
    print('Real Label : ',real_class)
    if real_class == pred:
        correct = correct +1
        print('Correctly Classified')
    else:
        print('Misclassified')
    count = count + 1
    print('.....................................')
    
acc = correct/count
print("Number of images : ",count)
print("Number of correctly classified images : ",correct)
print("Accuray : ",acc)

Prediction :  7290000023847
Real Label :  7290008670142
Misclassified
.....................................
Prediction :  7290008801843
Real Label :  7290008801843
Correctly Classified
.....................................
Prediction :  7290000024264
Real Label :  7290000024264
Correctly Classified
.....................................
Prediction :  7290015781008
Real Label :  7290015350150
Misclassified
.....................................
Prediction :  7290015781008
Real Label :  7290015350150
Misclassified
.....................................
Prediction :  7290000023847
Real Label :  7290000023847
Correctly Classified
.....................................
Prediction :  7290012576607
Real Label :  7290012576607
Correctly Classified
.....................................
Prediction :  7290000024264
Real Label :  7290000024264
Correctly Classified
.....................................
Prediction :  7290015781008
Real Label :  7290015350150
Misclassified
...............................

# Wine Models Inferencing with multiple models

In [9]:
import pandas as pd
model_lists = []
for i in range(12):
    my_file = open("/home/orelit/Projects -Sathsara/Planigo/Models/wine_sections_text/model{}.txt".format(i+1)
                   , "r")
    content = my_file.read()
    x = content.split('\n')
    new_list = []
    for item in x:
        try:
            new_item = int(item)
            new_str = str(new_item)
            new_list.append(new_str)
        except:
            pass
    model_lists.append(new_list)
    
model_lists[0]

model_dict = {'model1':model_lists[0],'model2':model_lists[1],'model3':model_lists[2],'model4':model_lists[3],
              'model5':model_lists[4],'model6':model_lists[5],'model7':model_lists[6],'model8':model_lists[7],
              'model9':model_lists[8],'model10':model_lists[9],'model11':model_lists[10],'model12':model_lists[11]}

df = pd.DataFrame.from_dict(model_dict, orient='index').T
df

Unnamed: 0,model1,model2,model3,model4,model5,model6,model7,model8,model9,model10,model11,model12
0,73490154317,3760125946870,4022025001905,4022025002100.0,4022025290408.0,5998623530644.0,7290000024202.0,7290000024264.0,4001432773230.0,4022025001929.0,4022025261002.0,608614309160.0
1,3760125946719,608614309269,608614309290,7290004494063.0,7290000023809.0,7290000023977.0,7290008836265.0,7290014256620.0,7290015781008.0,7290008804332.0,7290006696717.0,7290006256089.0
2,608614309184,7290000023847,7290000024219,7290008801157.0,7290000024530.0,7290000521022.0,7290015951227.0,7290015781145.0,7290016607772.0,7290014503137.0,7290014501232.0,7290010298273.0
3,7290000023816,7290000521008,7290005966088,7290008801850.0,7290004658953.0,7290004658946.0,7290017812847.0,7290017812618.0,7290017812588.0,7290101582397.0,7290018165027.0,7290017004457.0
4,7290000024554,7290006256102,7290008670159,7290008803014.0,7290008801843.0,7290008802291.0,7290108620214.0,7290103681630.0,8422443005213.0,8002450206003.0,8002450206508.0,7290108620153.0
5,7290000521404,7290006696595,7290008670678,7290008807777.0,7290008804462.0,7290008807029.0,7290004494353.0,7290004494049.0,7290000024516.0,7290000024240.0,4603400000043.0,608614309245.0
6,7290002363491,7290008670142,7290008801010,7290008836272.0,7290008805384.0,7290008836425.0,7290008836494.0,7290014910461.0,7290015781138.0,7290010656615.0,7290008804189.0,7290008801461.0
7,7290004494131,7290008670302,7290008802512,7290008921176.0,7290008836401.0,7290014501829.0,7290017647425.0,7290016717235.0,7290017589763.0,7290015781107.0,7290015781046.0,7290012576607.0
8,7290006256775,7290008801119,7290008802529,7290008921336.0,7290008921329.0,7290015350150.0,7290018165294.0,7290017812663.0,7290018165010.0,7290108620054.0,7290101582403.0,7290018165034.0
9,7290008801478,7290008801539,7290008803021,7290014466128.0,7290014466135.0,7290017589633.0,7290008802895.0,7290004494315.0,7290005966033.0,7290000484747.0,608614309276.0,7290000024523.0


In [10]:
import os
test_data_path = '/home/orelit/Projects -Sathsara/Planigo/data/test'

folder_names = []
for folder in os.listdir(test_data_path):
    folder_names.append(folder)

In [25]:
count = 0
correct = 0
for folder in folder_names:
    print('folder name :',folder)
    for column in df.columns:
        if folder  in df[column].unique():
            model  = column
    print('model name :',model)
    
    section_number = model[5:]
    
    data_dir = Path("/home/orelit/Projects -Sathsara/Planigo/data/WINE_SECTIONS_NEW/section{}".
                    format(section_number))
    ds=ImageFolder(data_dir)
    
    label2id = {}
    id2label = {}
    for i, class_name in enumerate(ds.classes):
        label2id[class_name] = str(i)
        id2label[str(i)] = class_name
        
    model_path = '/home/orelit/Projects -Sathsara/Planigo/Models/VIT_WINE/{}'.format(model)
    wine_model = ViTForImageClassification.from_pretrained(
         model_path,
         num_labels=len(label2id),
         label2id=label2id,
         id2label=id2label)
    for image in os.listdir(test_data_path+'/'+folder):
        print('image name : ',image)
        img = test_data_path+'/'+folder +'/'+image
        pred= prediction(img,wine_model)
        print("predicted label : ",pred)
        test_label = image[image.rfind('/')+1:]
        real_class = test_label[0:13]
        print('real label : ',real_class)
        if real_class == pred:
            correct = correct +1
            print("Correctly classified")
        count = count + 1
        print('..........................................')

acc = correct/count
print("Number of images : ",count)
print("Number of correctly classified images : ",correct)
print("Accuray : ",acc)

folder name : 7290017289106
model name : model1
image name :  7290017289106 (2).jpeg
predicted label :  7290017289106
real label :  7290017289106
Correctly classified
..........................................
image name :  7290017289106 (1).jpeg
predicted label :  7290017289106
real label :  7290017289106
Correctly classified
..........................................
folder name : 7290108620061
model name : model11
image name :  7290108620061.jpeg
predicted label :  7290004494919
real label :  7290108620061
..........................................
folder name : 7290017289199
model name : model3
image name :  7290017289199.jpeg
predicted label :  7290015350143
real label :  7290017289199
..........................................
folder name : 7290012576614
model name : model12
image name :  7290012576614 (1).jpeg
predicted label :  7290012576614
real label :  7290012576614
Correctly classified
..........................................
image name :  7290012576614 (3).jpeg
predicted

predicted label :  7290008670159
real label :  7290008670159
Correctly classified
..........................................
image name :  7290008670159 (3).jpeg
predicted label :  7290008670159
real label :  7290008670159
Correctly classified
..........................................
image name :  7290008670159 (2).jpeg
predicted label :  7290008670159
real label :  7290008670159
Correctly classified
..........................................
image name :  7290008670159 (4).jpeg
predicted label :  7290008670159
real label :  7290008670159
Correctly classified
..........................................
folder name : 7290015781015
model name : model8
image name :  7290015781015 (2).jpeg
predicted label :  7290000024264
real label :  7290015781015
..........................................
image name :  7290015781015 (3).jpeg
predicted label :  7290000024264
real label :  7290015781015
..........................................
image name :  7290015781015 (1).jpeg
predicted label :  729

image name :  7290108620115 (3).jpeg
predicted label :  7290015781114
real label :  7290108620115
..........................................
image name :  7290108620115 (2).jpeg
predicted label :  7290017589923
real label :  7290108620115
..........................................
image name :  7290108620115 (1).jpeg
predicted label :  7290015781114
real label :  7290108620115
..........................................
folder name : 7290015781121
model name : model5
image name :  7290015781121 (1).jpeg
predicted label :  7290015781121
real label :  7290015781121
Correctly classified
..........................................
image name :  7290015781121 (2).jpeg
predicted label :  7290015781121
real label :  7290015781121
Correctly classified
..........................................
image name :  7290015781121 (3).jpeg
predicted label :  7290015781121
real label :  7290015781121
Correctly classified
..........................................
image name :  7290015781121 (4).jpeg
predic

predicted label :  7290017289205
real label :  7290008805964
..........................................
folder name : 7290012576607
model name : model12
image name :  7290012576607 (3).jpeg
predicted label :  7290012576607
real label :  7290012576607
Correctly classified
..........................................
image name :  7290012576607 (1).jpeg
predicted label :  7290012576607
real label :  7290012576607
Correctly classified
..........................................
image name :  7290012576607 (2).jpeg
predicted label :  7290012576607
real label :  7290012576607
Correctly classified
..........................................
image name :  7290012576607 (4).jpeg
predicted label :  7290012576607
real label :  7290012576607
Correctly classified
..........................................
folder name : 7290015951227
model name : model7
image name :  7290015951227 (2).jpeg
predicted label :  7290008836494
real label :  7290015951227
..........................................
image name

image name :  7290016717099 (1).jpeg
predicted label :  7290008670159
real label :  7290016717099
..........................................
image name :  7290016717099 (3).jpeg
predicted label :  7290008670159
real label :  7290016717099
..........................................
image name :  7290016717099 (5).jpeg
predicted label :  7290008670159
real label :  7290016717099
..........................................
image name :  7290016717099 (2).jpeg
predicted label :  7290008670159
real label :  7290016717099
..........................................
image name :  7290016717099 (4).jpeg
predicted label :  7290008670159
real label :  7290016717099
..........................................
folder name : 7290017647722
model name : model4
image name :  7290017647722 (2).jpeg
predicted label :  7290103681210
real label :  7290017647722
..........................................
image name :  7290017647722 (1).jpeg
predicted label :  7290103681210
real label :  7290017647722
........

# VIT low variance data

In [2]:
from datasets import load_dataset

dataset = load_dataset("imagefolder", 
    data_dir="/home/orelit/Projects -Sathsara/Planigo/data/VIT_train_wine_low_variance_test_augmented_splitted")

Resolving data files:   0%|          | 0/18122 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/4523 [00:00<?, ?it/s]

Using custom data configuration default-41910383854bf899
Reusing dataset imagefolder (/home/orelit/.cache/huggingface/datasets/imagefolder/default-41910383854bf899/0.0.0/0fc50c79b681877cc46b23245a6ef5333d036f48db40d53765a68034bc48faff)


  0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
dataset.keys()

dict_keys(['train', 'validation'])

In [4]:
labels = dataset['train'].features['label']

In [5]:
labels.num_classes

16

In [7]:
from transformers import ViTFeatureExtractor


checkpoint = 'google/vit-base-patch16-224-in21k'
feature_extractor = ViTFeatureExtractor.from_pretrained(checkpoint)

In [8]:
from torchvision.transforms import (
    Compose,
    Normalize,
    Resize,
    RandomResizedCrop,
    RandomHorizontalFlip,
    RandomAdjustSharpness,
    ToTensor,
    ToPILImage
)


# train
train_aug_transforms = Compose([
    RandomResizedCrop(size=feature_extractor.size),
    RandomHorizontalFlip(p=0.5),
    RandomAdjustSharpness(sharpness_factor=5, p=0.5),
    ToTensor(),
    Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),
])


# validation/test
valid_aug_transforms = Compose([
    Resize(size=(feature_extractor.size, feature_extractor.size)),
    ToTensor(),
    Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),
])


In [9]:
def apply_train_aug_transforms(examples):
  examples['pixel_values'] = [train_aug_transforms(img.convert('RGB')) for img in examples['image']]
  return examples


def apply_valid_aug_transforms(examples):
  examples['pixel_values'] = [valid_aug_transforms(img.convert('RGB')) for img in examples['image']]
  return examples


dataset['train'].set_transform(apply_train_aug_transforms)
dataset['validation'].set_transform(apply_valid_aug_transforms)

datasets_processed = dataset.rename_column('label', 'labels')

In [10]:
from transformers import ViTForImageClassification


def model_init():
    vit_model = ViTForImageClassification.from_pretrained(
        checkpoint,
        num_labels=labels.num_classes,
        id2label={index: label for index, label in enumerate(labels.names)},
        label2id={label: index for index, label in enumerate(labels.names)}
    )
    return vit_model

In [11]:
from datasets import load_metric
import numpy as np


def compute_metrics_fn(eval_preds):
  metrics = dict()
  
  accuracy_metric = load_metric('accuracy')
  precision_metric = load_metric('precision')
  recall_metric = load_metric('recall')
  f1_metric = load_metric('f1')


  logits = eval_preds.predictions
  labels = eval_preds.label_ids
  preds = np.argmax(logits, axis=-1)  
  
  metrics.update(accuracy_metric.compute(predictions=preds, references=labels))
  metrics.update(precision_metric.compute(predictions=preds, references=labels, average='weighted'))
  metrics.update(recall_metric.compute(predictions=preds, references=labels, average='weighted'))
  metrics.update(f1_metric.compute(predictions=preds, references=labels, average='weighted'))


  return metrics

In [12]:
import torch


def collate_fn(examples):
  pixel_values = torch.stack([example['pixel_values'] for example in examples])
  labels = torch.tensor([example['labels'] for example in examples])
  return {'pixel_values': pixel_values, 'labels': labels}

In [20]:
from transformers import TrainingArguments, Trainer
pl.seed_everything(42)

training_args = TrainingArguments(
        output_dir='/home/orelit/Projects -Sathsara/Planigo/Models/wine_low_variance_augmented_data_logs',
        num_train_epochs=5,
        learning_rate=2e-5,
        weight_decay= 0.00025,
        per_device_train_batch_size=32,
        per_device_eval_batch_size=32,
        save_strategy='epoch',
        evaluation_strategy='epoch',
        load_best_model_at_end=True,
        remove_unused_columns=False,
        fp16=True
    )

trainer = Trainer(
        # model,
        model_init=model_init,
        args=training_args,
        data_collator=collate_fn,
        train_dataset=datasets_processed['train'],
        eval_dataset=datasets_processed['validation'],
        compute_metrics=compute_metrics_fn,
    )

trainer.train()

Global seed set to 42
loading configuration file config.json from cache at /home/orelit/.cache/huggingface/hub/models--google--vit-base-patch16-224-in21k/snapshots/1ba429d32753f33a0660b80ac6f43a3c80c18938/config.json
Model config ViTConfig {
  "_name_or_path": "google/vit-base-patch16-224-in21k",
  "architectures": [
    "ViTModel"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "7290000023809",
    "1": "7290000023847",
    "2": "7290000024264",
    "3": "7290006256089",
    "4": "7290008670142",
    "5": "7290008670159",
    "6": "7290008801843",
    "7": "7290008802512",
    "8": "7290008804189",
    "9": "7290012576607",
    "10": "7290014466609",
    "11": "7290015350150",
    "12": "7290015781008",
    "13": "7290015781015",
    "14": "7290015781114",
    "15": "7290015951227"
  },
  "image_size": 224,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
 

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.033336941401163736, max=1.0…

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.6067,0.643301,0.977228,0.98061,0.977228,0.976048
2,0.525,0.284437,0.991156,0.991624,0.991156,0.990883
3,0.2728,0.182262,0.989609,0.990632,0.989609,0.989269
4,0.1824,0.155605,0.989609,0.990633,0.989609,0.98921
5,0.1404,0.131496,0.990051,0.990992,0.990051,0.98969


***** Running Evaluation *****
  Num examples = 4523
  Batch size = 32
Saving model checkpoint to /home/orelit/Projects -Sathsara/Planigo/Models/wine_low_variance_augmented_data_logs/checkpoint-567
Configuration saved in /home/orelit/Projects -Sathsara/Planigo/Models/wine_low_variance_augmented_data_logs/checkpoint-567/config.json
Model weights saved in /home/orelit/Projects -Sathsara/Planigo/Models/wine_low_variance_augmented_data_logs/checkpoint-567/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 4523
  Batch size = 32
Saving model checkpoint to /home/orelit/Projects -Sathsara/Planigo/Models/wine_low_variance_augmented_data_logs/checkpoint-1134
Configuration saved in /home/orelit/Projects -Sathsara/Planigo/Models/wine_low_variance_augmented_data_logs/checkpoint-1134/config.json
Model weights saved in /home/orelit/Projects -Sathsara/Planigo/Models/wine_low_variance_augmented_data_logs/checkpoint-1134/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 4

TrainOutput(global_step=2835, training_loss=0.4959030272468688, metrics={'train_runtime': 1978.8196, 'train_samples_per_second': 45.79, 'train_steps_per_second': 1.433, 'total_flos': 7.022430225919181e+18, 'train_loss': 0.4959030272468688, 'epoch': 5.0})

In [52]:
trained_checkpoint = '/home/orelit/Projects -Sathsara/Planigo/Models/wine_low_variance_augmented_data_logs/checkpoint-2835'

In [53]:
vit_model = ViTForImageClassification.from_pretrained(
        trained_checkpoint,
        num_labels=labels.num_classes,
        id2label={index: label for index, label in enumerate(labels.names)},
        label2id={label: index for index, label in enumerate(labels.names)}
    )

loading configuration file /home/orelit/Projects -Sathsara/Planigo/Models/wine_low_variance_augmented_data_logs/checkpoint-2835/config.json
Model config ViTConfig {
  "_name_or_path": "google/vit-base-patch16-224-in21k",
  "architectures": [
    "ViTForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "7290000023809",
    "1": "7290000023847",
    "2": "7290000024264",
    "3": "7290006256089",
    "4": "7290008670142",
    "5": "7290008670159",
    "6": "7290008801843",
    "7": "7290008802512",
    "8": "7290008804189",
    "9": "7290012576607",
    "10": "7290014466609",
    "11": "7290015350150",
    "12": "7290015781008",
    "13": "7290015781015",
    "14": "7290015781114",
    "15": "7290015951227"
  },
  "image_size": 224,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "7290000023809": 0,
    "7290000023847": 

In [54]:
id2label={index: label for index, label in enumerate(labels.names)}
label2id={label: index for index, label in enumerate(labels.names)}

In [37]:
# data_dir = Path("/home/orelit/Projects -Sathsara/Planigo/data/VIT_train_wine_low_variance_test")

# ds=ImageFolder(data_dir)
# indices = torch.randperm(len(ds)).tolist()
# n_val = math.floor(len(indices) * .15)
# train_ds = torch.utils.data.Subset(ds, indices[:-n_val])
# val_ds = torch.utils.data.Subset(ds, indices[-n_val:])


# label2id = {}
# id2label = {}
# for i, class_name in enumerate(ds.classes):
#   label2id[class_name] = str(i)
#   id2label[str(i)] = class_name

In [55]:
import os
test_data_path = '/home/orelit/Projects -Sathsara/Planigo/data/VIT_test_wine_low_variance_test/All_barcodes'

image_paths = []
for image in os.listdir(test_data_path):
    img = test_data_path +'/'+image
    image_paths.append(img)

In [56]:
def prediction(img_path):
   im=Image.open(img_path)
   encoding = feature_extractor(images=im, return_tensors="pt")
   encoding.keys()
   pixel_values = encoding['pixel_values']
   outputs = vit_model(pixel_values)
   result = outputs.logits.softmax(1).argmax(1)
   tensor_result = outputs.logits.softmax(1)
   prob = torch.max(tensor_result)
   new_result = result.tolist() 
   for i in new_result:
     return(id2label[i])

In [57]:
import re
count = 0
correct = 0
for image in os.listdir(test_data_path):
    img = test_data_path +'/'+image
    pred= prediction(img)
    print("prediction : ",pred)
    test_label = image[image.rfind('/')+1:]
    real_class = test_label[0:13]
    print("real label : ",real_class)
    if real_class == pred:
        correct = correct +1
        print('Correctly Classified')
    else:
        print('Not Classified')
    count = count + 1
    print('......................................................')
    
acc = correct/count
print("Number of images : ",count)
print("Number of correctly classified images : ",correct)
print("Accuray : ",acc)

prediction :  7290012576607
real label :  7290008670142
Not Classified
......................................................
prediction :  7290000024264
real label :  7290008801843
Not Classified
......................................................
prediction :  7290000024264
real label :  7290000024264
Correctly Classified
......................................................
prediction :  7290015781008
real label :  7290015350150
Not Classified
......................................................
prediction :  7290015781008
real label :  7290015350150
Not Classified
......................................................
prediction :  7290000023847
real label :  7290000023847
Correctly Classified
......................................................
prediction :  7290012576607
real label :  7290012576607
Correctly Classified
......................................................
prediction :  7290000024264
real label :  7290000024264
Correctly Classified
.......................

prediction :  7290015781008
real label :  7290015350150
Not Classified
......................................................
Number of images :  66
Number of correctly classified images :  30
Accuray :  0.45454545454545453
