In [1]:
import torch
import argparse
import json
import os
import tarfile
import lightning.pytorch as pl
from train import UDOPModel
from train import ClassificationDataset
from datasets import load_dataset
from transformers import AutoProcessor
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report



In [2]:
processor = AutoProcessor.from_pretrained(
    "nielsr/udop-large", apply_ocr=False
)

In [3]:
trainer = pl.Trainer(
    accelerator="gpu",
    logger=True
)

Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [4]:
val_dl = DataLoader(
    ClassificationDataset(processor, './rvl_cdip_jordyvl', split="validation"),
    batch_size=1, num_workers=4, shuffle=False, collate_fn=lambda x: x[0]
)

In [5]:
model = UDOPModel()

Some weights of the model checkpoint at nielsr/udop-test were not used when initializing UdopForConditionalGeneration: ['encoder.cell2dembedding.x_position_embeddings.weight', 'encoder.cell2dembedding.y_position_embeddings.weight']
- This IS expected if you are initializing UdopForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing UdopForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of UdopForConditionalGeneration were not initialized from the model checkpoint at nielsr/udop-test and are newly initialized: ['encoder.cell_2d_embedding.x_position_embeddings.weight', 'encoder.cell_2d_embedding.y_position_embeddings.weight']
You should probably T

In [6]:
predictions = trainer.predict(model, dataloaders=val_dl)

You are using a CUDA device ('NVIDIA A10G') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Predicting: |          | 0/? [00:00<?, ?it/s]

  return torch.tensor(value)
  return torch.tensor(value)
  return torch.tensor(value)
  return torch.tensor(value)


In [7]:
mapping = {
  0: "letter",
  1: "form",
  2: "email",
  3: "handwritten",
  4: "advertisement",
  5: "scientific report",
  6: "scientific publication",
  7: "specification",
  8: "file folder",
  9: "news article",
  10: "budget",
  11: "invoice",
  12: "presentation",
  13: "questionnaire",
  14: "resume",
  15: "memo"
}

targets = [mapping[dt['label']] for dt in load_dataset("jordyvl/rvl_cdip_100_examples_per_class", split='test')]

In [8]:
report = classification_report(predictions, targets, output_dict=True)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [9]:
report

{'': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1.0},
 '+': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1.0},
 'Documentwritten': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 1.0},
 'Public': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1.0},
 'The': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1.0},
 'advertisement': {'precision': 0.52,
  'recall': 1.0,
  'f1-score': 0.6842105263157895,
  'support': 13.0},
 'advertisement folder': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 1.0},
 'budget': {'precision': 0.68,
  'recall': 0.8095238095238095,
  'f1-score': 0.7391304347826086,
  'support': 21.0},
 'budget request': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 3.0},
 'budgets': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1.0},
 'email': {'precision': 0.84,
  'recall': 0.9130434782608695,
  'f1-score': 0.875,
  'support': 23.0},
 'e

In [10]:
# model_tar_path = "./model/model.tar.gz"

# extract_dir = "./model/extracted_model"
# os.makedirs(extract_dir, exist_ok=True)

# with tarfile.open(model_tar_path, "r:gz") as tar:
#     tar.extractall(path=extract_dir)

In [11]:
model = UDOPModel.load_from_checkpoint("./model/extracted_model/best_model.ckpt")

Some weights of the model checkpoint at nielsr/udop-test were not used when initializing UdopForConditionalGeneration: ['encoder.cell2dembedding.x_position_embeddings.weight', 'encoder.cell2dembedding.y_position_embeddings.weight']
- This IS expected if you are initializing UdopForConditionalGeneration from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing UdopForConditionalGeneration from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of UdopForConditionalGeneration were not initialized from the model checkpoint at nielsr/udop-test and are newly initialized: ['encoder.cell_2d_embedding.x_position_embeddings.weight', 'encoder.cell_2d_embedding.y_position_embeddings.weight']
You should probably T

In [12]:
ftpredictions = trainer.predict(model, dataloaders=val_dl)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Predicting: |          | 0/? [00:00<?, ?it/s]

  return torch.tensor(value)
  return torch.tensor(value)
  return torch.tensor(value)
  return torch.tensor(value)


In [13]:
ftreport = classification_report(ftpredictions, targets, output_dict=True)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [14]:
ftreport

{'': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1.0},
 'Documentwritten': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 1.0},
 'advertisement': {'precision': 0.6,
  'recall': 1.0,
  'f1-score': 0.75,
  'support': 15.0},
 'advertisement folder': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 1.0},
 'budget': {'precision': 0.72,
  'recall': 0.8571428571428571,
  'f1-score': 0.782608695652174,
  'support': 21.0},
 'budget budget': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 1.0},
 'budget request': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 2.0},
 'email': {'precision': 0.72,
  'recall': 0.8181818181818182,
  'f1-score': 0.7659574468085106,
  'support': 22.0},
 'email ': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1.0},
 'email address': {'precision': 0.0,
  'recall': 0.0,
  'f1-score': 0.0,
  'support': 3.0},
 'email article': {'precision': 0.0,
  'recall': 0.0