In [None]:
# Input data files are available in the read-only "../input/" directory
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#donut must use pytorch 2.1.0 and tochvision 0.16.0
from os import environ
pytorch_download_path = ('cpu', 'cu118')['CUDA_VERSION' in environ]
! pip install --quiet --upgrade --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$pytorch_download_path

In [None]:
# ! pip install gdown
# ! gdown --fuzzy https://drive.google.com/file/d/17_ZUsfub-A5T5pIG_T9Ru3hLqS6xsvHi/view?usp=sharing
# ! unzip -oq donut_dataset_elecciones.zip
# ! mv /kaggle/working/data/donut_dataset /kaggle/working/data/elecciones_generales

In [None]:
BASE_PATH = '/kaggle/working'
DATASET_PATH = BASE_PATH  + '/data/elecciones_generales'

In [None]:
! rm -rf $DATASET_PATH
! mkdir -p $BASE_PATH/data
! cp -R /kaggle/input/elecciones-generales/data/donut_dataset $DATASET_PATH

In [None]:
! git clone --quiet https://github.com/clovaai/donut.git

In [None]:
! sed -i '101d' $BASE_PATH/donut/donut/model.py

In [None]:
# import torch
# print("Pytorch versionï¼š")
# print(torch.__version__)
# print("CUDA Version: ")
# print(torch.version.cuda)
# print("cuDNN version is :")
# print(torch.backends.cudnn.version())

In [None]:
! pip install --quiet --no-cache-dir transformers==4.25.1 sconf zss datasets[vision]==2.14.6 sentencepiece timm==0.5.4 pytorch-lightning==2.0.7 tbparse

In [None]:
#convert tiff files to jpeg

from glob import glob
from tqdm.auto import tqdm
from PIL import Image as ImagePil
from os import path, remove

! du -sh $DATASET_PATH
train_images_counter = 0

for tiff_path in tqdm(glob(DATASET_PATH + '/**/*.tiff')):
    if 'train' in tiff_path: train_images_counter += 1
    tiff_path_dir, tiff_filename = path.split(path.abspath(tiff_path))
    tiff_filename_wo_ext = path.splitext(tiff_filename)[0]
    jpeg_path = path.join(tiff_path_dir, tiff_filename_wo_ext + '.jpg')
    ImagePil.open(tiff_path).convert('RGB').save(jpeg_path, quality=90, subsampling=0, optimize=False) #subsampling=1, optimize=True
    remove(tiff_path)

! du -sh $DATASET_PATH

In [None]:
TRAIN_BATCH_SIZES = 4
MAX_EPOCHS = 10
WARMUP_STEPS = int((train_images_counter /  TRAIN_BATCH_SIZES) * MAX_EPOCHS * .10) #10% of total training steps
training_config_file_path = path.join(BASE_PATH, 'donut', 'config', 'train_generales.yaml')
training_config_file_path

In [None]:
%%bash -s "$train_images_counter" "$TRAIN_BATCH_SIZES" "$MAX_EPOCHS" "$WARMUP_STEPS" "$DATASET_PATH" "$training_config_file_path"

cat >$6<<EOL
resume_from_checkpoint_path: null # only used for resume_from_checkpoint option in PL
result_path: "./result"
pretrained_model_name_or_path: "naver-clova-ix/donut-base"
dataset_name_or_paths: ["$5"]
sort_json_key: False
train_batch_sizes: [$2]
val_batch_sizes: [$2]
# input_size: [640, 320] # when the input resolution differs from the pre-training setting, some weights will be newly initialized (but the model training would be okay)
input_size: [640, 640]
max_length: 128
align_long_axis: False
num_nodes: 1
seed: 2022
lr: 3e-5
warmup_steps: $4 # 10% of total steps, equals to num_training_samples_per_epoch / train_batch_sizes * max_epochs / 10
num_training_samples_per_epoch: $1 # Set it to the number of training images you have
max_epochs: $3 #30
max_steps: -1
num_workers: 4 #8???
val_check_interval: 1.0
check_val_every_n_epoch: 3 #10
gradient_clip_val: 1.0
verbose: True
EOL

In [None]:
# ! cd /kaggle/working/donut && python train.py --config /kaggle/working/donut/config/train_generales.yaml

In [None]:
! cd $BASE_PATH/donut && python train.py --config $training_config_file_path

In [None]:
! cd $BASE_PATH/donut && python test.py --dataset_name_or_path $DATASET_PATH --pretrained_model_name_or_path /kaggle/working/donut/result/train_generales/20231116_082818 --save_path ./result/output.json

In [None]:
# ! cat /kaggle/working/donut/result/output.json

In [None]:
raise

In [None]:
from tbparse import SummaryReader
log_dir = "/kaggle/working/donut/result/train_generales/20231116_082818"
reader = SummaryReader(log_dir)
df = reader.scalars
df

In [None]:
df["tag"].unique()

In [None]:
df[df["tag"] == 'val_metric']

In [None]:
# df[df["tag"] == 'learning_rate']['value'].plot()
# df[df["tag"] == 'loss']['value'].plot()
df[df["tag"] == 'val_metric']['value'].plot()

In [None]:
# !ls -lh /kaggle/working/donut/result/train_generales/20231114_145512

In [None]:
# !zip -r -9 donut_elecciones_generales.zip /kaggle/working/donut/result/train_generales/20231116_082818

In [None]:
from donut.donut import DonutModel
import torch

model = DonutModel.from_pretrained("/kaggle/working/donut/result/train_generales/20231116_082818")
# model = DonutModel.from_pretrained("raulcarlomagno/donut_elecciones_generales_crop", revision="main")
if torch.cuda.is_available():
    model.half()
    device = torch.device("cuda")
    model.to(device)
else:
    model.encoder.to(torch.bfloat16)

model.eval()
"loaded"

In [None]:
# from PIL import Image as ImagePil
# import torch

image = ImagePil.open("/kaggle/working/data/elecciones_generales/test/1600600320X.jpg")
# image = ImagePil.open("/kaggle/working/data/elecciones_generales/validation/0100301159X.jpg")
with torch.no_grad():
    output = model.inference(image=image, prompt="<s_elecciones_generales>")
    
output    

In [None]:
model.push_to_hub(repo_id="raulcarlomagno/donut_elecciones_generales_crop", revision="official")

In [None]:
from huggingface_hub import notebook_login
notebook_login()