In [None]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from PIL import Image, UnidentifiedImageError
import os
from torch.utils.data import Dataset, DataLoader
import requests
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torchvision import transforms
from tqdm.notebook import tqdm_notebook as tqdm
from io import BytesIO
import re

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

In [None]:
# Unzip the model

!unzip -q /kaggle/input/quantez-finetune/my_finetuned_model.zip -d /kaggle/working/

# Load the fine-tuned model and processor
from transformers import VisionEncoderDecoderModel, TrOCRProcessor

# Load the model
model = VisionEncoderDecoderModel.from_pretrained("/kaggle/working/my_finetuned_model")

# Load the processor
processor = TrOCRProcessor.from_pretrained("/kaggle/working/my_finetuned_model")


In [None]:
submission_df = pd.read_excel("/kaggle/input/ai-of-god-3/Public_data/submission.csv.xlsx")
base_dir = '/kaggle/input/ai-of-god-3/Public_data/test_images'
submission_data = []

In [None]:
def natural_sort_key(s):
    return [int(text) if text.isdigit() else text for text in re.split(r'(\d+)', s)]

In [None]:
for folder in sorted(os.listdir(base_dir)):
    folder_path = os.path.join(base_dir, folder)
    page_number = folder.split('_')[-1]
    
    if os.path.isdir(folder_path):
        for image_file in sorted(os.listdir(folder_path), key=natural_sort_key):
            if image_file.endswith('.png'):  
                image_path = os.path.join(folder_path, image_file)
                line_number = image_file.split('_')[-1].split('.')[0]
                formatted_image_id = f'P_{page_number}_L_{line_number}'
                # predicted_text = apply_ocr(image_path)

                # # Incorrect Spanish sentence
                # incorrect_text = predicted_text
                
                # # Check and correct the sentence
                # matches = tool.check(incorrect_text)
                # predicted_text = language_tool_python.utils.correct(incorrect_text, matches)

                image = Image.open(image_path)
                image = image.convert('RGB')

                pixel_values = processor(images=image, return_tensors="pt").pixel_values

                generated_ids = model.generate(pixel_values)
                generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

                predicted_text = generated_text
                                
                submission_data.append({'unique id': formatted_image_id, 'prediction': predicted_text})
                

In [None]:
for index, row in submission_df.iterrows():
    matching_prediction = next((pred for pred in submission_data if pred['unique id'] == row['unique id']), None)
    if matching_prediction:
        submission_df.at[index, 'prediction'] = matching_prediction['prediction']  


In [None]:
submission_df.rename(columns={'unique id': 'unique Id'}, inplace=True)

In [None]:
submission_df.to_csv('submission.csv', index=False)

print("submission file created successfully!")

In [None]:
submission_df