In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import os
from tqdm import tqdm
from src.constants import entity_unit_map
import ast

# Define constants
BATCH_SIZE = 32
NUM_EPOCHS = 10
LEARNING_RATE = 0.001
TRAIN_IMAGES_DIR = 'train_images'
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Custom dataset
class ProductImageDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform
        self.entity_names = sorted(self.data['entity_name'].unique())
        self.entity_name_to_idx = {name: idx for idx, name in enumerate(self.entity_names)}
        self.entity_units = {entity: sorted(units) for entity, units in entity_unit_map.items()}
        self.max_units = max(len(units) for units in self.entity_units.values())

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.basename(self.data.iloc[idx]['image_link'])
        img_path = os.path.join(self.img_dir, img_name)
        img = Image.open(img_path).convert('RGB')
        
        if self.transform:
            img = self.transform(img)
        
        entity_name = self.data.iloc[idx]['entity_name']
        entity_value, entity_unit = self.parse_entity_value(self.data.iloc[idx]['entity_value'])
        
        entity_name_tensor = torch.zeros(len(self.entity_names), dtype=torch.float32)
        entity_name_tensor[self.entity_name_to_idx[entity_name]] = 1
        
        entity_unit_tensor = torch.zeros(self.max_units, dtype=torch.float32)
        if entity_name in self.entity_units and entity_unit in self.entity_units[entity_name]:
            unit_idx = self.entity_units[entity_name].index(entity_unit)
            entity_unit_tensor[unit_idx] = 1
        
        return img, entity_name_tensor, torch.tensor(entity_value, dtype=torch.float32), entity_unit_tensor, entity_name

    def parse_entity_value(self, value_str):
        try:
            value, unit = value_str.rsplit(' ', 1)
            if unit == 'fluid':
                value, unit = value_str.rsplit(' ', 2)[0], ' '.join(value_str.rsplit(' ', 2)[1:])
            return float(value), unit
        except ValueError:
            try:
                value_list = ast.literal_eval(value_str)
                if isinstance(value_list, list) and len(value_list) == 2:
                    return sum(value_list) / 2, 'unknown'
            except:
                pass
        
        return 0.0, 'unknown'

# Data transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load data
train_dataset = ProductImageDataset('dataset/train.csv', TRAIN_IMAGES_DIR, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)

# Custom model
class ProductImageModel(nn.Module):
    def __init__(self, num_entity_names, max_units):
        super(ProductImageModel, self).__init__()
        self.resnet = models.resnet152(weights=models.ResNet152_Weights.IMAGENET1K_V1)
        num_ftrs = self.resnet.fc.in_features
        self.resnet.fc = nn.Identity()
        
        self.fc_entity_name = nn.Linear(num_ftrs, num_entity_names).float()
        self.fc_entity_value = nn.Linear(num_ftrs, 1).float()
        self.fc_entity_units = nn.Linear(num_ftrs, max_units).float()

    def forward(self, x):
        features = self.resnet(x)
        entity_name_out = self.fc_entity_name(features)
        entity_value_out = self.fc_entity_value(features).squeeze(1)
        entity_units_out = self.fc_entity_units(features)
        return entity_name_out, entity_value_out, entity_units_out

# Initialize model
model = ProductImageModel(len(train_dataset.entity_names), train_dataset.max_units)
model.to(DEVICE)

# Loss and optimizer
criterion_classification = nn.BCEWithLogitsLoss()
criterion_regression = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5)

# Training loop
for epoch in range(NUM_EPOCHS):
    model.train()
    total_loss = 0
    progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{NUM_EPOCHS}')
    
    for images, entity_names, entity_values, entity_units, entity_name_str in progress_bar:
        images = images.to(DEVICE).float()
        entity_names = entity_names.to(DEVICE).float()
        entity_values = entity_values.to(DEVICE).float()
        entity_units = entity_units.to(DEVICE).float()
        
        # Forward pass
        entity_name_out, entity_value_out, entity_units_out = model(images)
        
        # Compute losses
        loss_entity_name = criterion_classification(entity_name_out, entity_names)
        loss_entity_value = criterion_regression(entity_value_out, entity_values)
        loss_entity_units = criterion_classification(entity_units_out, entity_units)
        
        loss = loss_entity_name + loss_entity_value + loss_entity_units
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        progress_bar.set_postfix({'loss': loss.item()})
    
    avg_loss = total_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{NUM_EPOCHS}], Average Loss: {avg_loss:.4f}')
    
    # Update learning rate
    scheduler.step(avg_loss)

# Save the model
torch.save({
    'epoch': NUM_EPOCHS,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': avg_loss,
}, 'product_image_model.pth')

print("Training completed. Model saved.")

# Evaluation and prediction function
def predict(model, image):
    model.eval()
    with torch.no_grad():
        entity_name_out, entity_value_out, entity_units_out = model(image.unsqueeze(0).to(DEVICE).float())
        
        predicted_entity = train_dataset.entity_names[torch.argmax(entity_name_out).item()]
        predicted_value = entity_value_out.item()
        predicted_unit_idx = torch.argmax(entity_units_out).item()
        predicted_unit = train_dataset.entity_units.get(predicted_entity, ['unknown'])[predicted_unit_idx] if predicted_unit_idx < len(train_dataset.entity_units.get(predicted_entity, [])) else 'unknown'
        
        return predicted_entity, predicted_value, predicted_unit

# Evaluation
model.eval()
correct_predictions = 0
total_samples = 0

predictions = []
with torch.no_grad():
    for images, _, _, _, entity_name_str in tqdm(train_loader, desc="Evaluating"):
        images = images.to(DEVICE).float()
        
        for i, image in enumerate(images):
            predicted_entity, predicted_value, predicted_unit = predict(model, image)
            formatted_prediction = f"{predicted_value:.2f} {predicted_unit}"
            predictions.append(formatted_prediction)
            
            if predicted_entity == entity_name_str[i]:
                correct_predictions += 1
            total_samples += 1

accuracy = correct_predictions / total_samples
print(f"Evaluation Results:")
print(f"Classification Accuracy: {accuracy:.4f}")

# Save predictions
test_df = pd.read_csv('dataset/test.csv')
test_df['prediction'] = predictions[:len(test_df)]  # Ensure we have the correct number of predictions
test_df[['index', 'prediction']].to_csv('test_out.csv', index=False)

print("Predictions saved to test_out.csv")

---

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image, ImageFile
import pandas as pd
import os
from tqdm import tqdm
from torch.cuda.amp import autocast, GradScaler
import ast

# Allow loading truncated images
ImageFile.LOAD_TRUNCATED_IMAGES = True

# Define constants
BATCH_SIZE = 128
NUM_EPOCHS = 10
LEARNING_RATE = 0.001
TRAIN_IMAGES_DIR = 'train_images'
TEST_IMAGES_DIR = 'test_images'
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
ACCUMULATION_STEPS = 2
CHECKPOINT_PATH = 'product_image_model_checkpoint.pth'
MODEL_PATH = 'product_image_model_final.pth'
TRAIN_CSV_PATH = 'dataset/train.csv'
TEST_CSV_PATH = 'dataset/test.csv'
OUTPUT_CSV_PATH = 'dataset/test_out.csv'

# Load entity_unit_map
with open('src/constants.py', 'r') as f:
    exec(f.read())

# Preprocessing function
def preprocess_data(csv_file):
    data = pd.read_csv(csv_file)
    original_count = len(data)
    
    def is_valid_sample(row):
        try:
            value_str = row['entity_value']
            entity_name = row['entity_name']
            value, unit = value_str.rsplit(' ', 1)
            if unit == 'fluid':
                value, unit = value_str.rsplit(' ', 2)[0], ' '.join(value_str.rsplit(' ', 2)[1:])
            float(value)  # Check if value can be converted to float
            return unit in entity_unit_map.get(entity_name, [])
        except:
            return False

    filtered_data = data[data.apply(is_valid_sample, axis=1)]
    filtered_count = len(filtered_data)
    
    print(f"Original sample count: {original_count}")
    print(f"Filtered sample count: {filtered_count}")
    print(f"Removed {original_count - filtered_count} samples ({(original_count - filtered_count) / original_count:.2%})")
    
    return filtered_data

# Custom dataset
class ProductImageDataset(Dataset):
    def __init__(self, data, img_dir, transform=None):
        self.data = data
        self.img_dir = img_dir
        self.transform = transform
        self.entity_names = sorted(self.data['entity_name'].unique())
        self.entity_name_to_idx = {name: idx for idx, name in enumerate(self.entity_names)}
        self.entity_units = {entity: sorted(units) for entity, units in entity_unit_map.items()}
        self.max_units = max(len(units) for units in self.entity_units.values())

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.basename(self.data.iloc[idx]['image_link'])
        img_path = os.path.join(self.img_dir, img_name)
        try:
            img = Image.open(img_path).convert('RGB')
        except Exception as e:
            print(f"Error loading image {img_path}: {str(e)}")
            img = Image.new('RGB', (224, 224), color='black')
        
        if self.transform:
            img = self.transform(img)
        
        entity_name = self.data.iloc[idx]['entity_name']
        
        # Check if 'entity_value' column exists in the DataFrame
        if 'entity_value' in self.data.columns:
            entity_value, entity_unit = self.parse_entity_value(self.data.iloc[idx]['entity_value'])
        else:
            # If 'entity_value' doesn't exist, use placeholder values
            entity_value, entity_unit = 0.0, 'unknown'
        
        entity_name_tensor = torch.zeros(len(self.entity_names), dtype=torch.float32)
        entity_name_tensor[self.entity_name_to_idx[entity_name]] = 1
        
        entity_unit_tensor = torch.zeros(self.max_units, dtype=torch.float32)
        if entity_unit in self.entity_units.get(entity_name, []):
            unit_idx = self.entity_units[entity_name].index(entity_unit)
            entity_unit_tensor[unit_idx] = 1
        
        return img, entity_name_tensor, torch.tensor(entity_value, dtype=torch.float32), entity_unit_tensor, entity_name

    def parse_entity_value(self, value_str):
        value, unit = value_str.rsplit(' ', 1)
        if unit == 'fluid':
            value, unit = value_str.rsplit(' ', 2)[0], ' '.join(value_str.rsplit(' ', 2)[1:])
        return float(value), unit

# Custom model
class ProductImageModel(nn.Module):
    def __init__(self, num_entity_names, max_units):
        super(ProductImageModel, self).__init__()
        self.resnet = models.resnet50(pretrained=True)
        num_ftrs = self.resnet.fc.in_features
        self.resnet.fc = nn.Identity()
        
        self.fc_entity_name = nn.Linear(num_ftrs, num_entity_names).float()
        self.fc_entity_value = nn.Linear(num_ftrs, 1).float()
        self.fc_entity_units = nn.Linear(num_ftrs, max_units).float()

    def forward(self, x):
        features = self.resnet(x)
        entity_name_out = self.fc_entity_name(features)
        entity_value_out = self.fc_entity_value(features).squeeze(1)
        entity_units_out = self.fc_entity_units(features)
        return entity_name_out, entity_value_out, entity_units_out

# Training function
def train_model():
    # Data transforms
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # Preprocess and load data
    filtered_data = preprocess_data(TRAIN_CSV_PATH)
    train_dataset = ProductImageDataset(filtered_data, TRAIN_IMAGES_DIR, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)

    # Initialize model
    model = ProductImageModel(len(train_dataset.entity_names), train_dataset.max_units)
    model.to(DEVICE)

    # Loss and optimizer
    criterion_classification = nn.BCEWithLogitsLoss()
    criterion_regression = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5)

    # Initialize the GradScaler for mixed precision training
    scaler = GradScaler()

    # Load checkpoint if it exists
    start_epoch = 0
    if os.path.exists(CHECKPOINT_PATH):
        checkpoint = torch.load(CHECKPOINT_PATH)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        scaler.load_state_dict(checkpoint['scaler_state_dict'])
        start_epoch = checkpoint['epoch'] + 1
        print(f"Resuming training from epoch {start_epoch}")
    else:
        print("Starting training from scratch")

    # Training loop
    for epoch in range(start_epoch, NUM_EPOCHS):
        model.train()
        total_loss = 0
        progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{NUM_EPOCHS}')
        
        for i, (images, entity_names, entity_values, entity_units, entity_name_str) in enumerate(progress_bar):
            images = images.to(DEVICE)
            entity_names = entity_names.to(DEVICE)
            entity_values = entity_values.to(DEVICE)
            entity_units = entity_units.to(DEVICE)
            
            # Mixed precision training
            with autocast():
                # Forward pass
                entity_name_out, entity_value_out, entity_units_out = model(images)
                
                # Compute losses
                loss_entity_name = criterion_classification(entity_name_out, entity_names)
                loss_entity_value = criterion_regression(entity_value_out, entity_values)
                loss_entity_units = criterion_classification(entity_units_out, entity_units)
                
                loss = loss_entity_name + loss_entity_value + loss_entity_units
                loss = loss / ACCUMULATION_STEPS  # Normalize the loss

            # Backward and optimize
            scaler.scale(loss).backward()
            
            if (i + 1) % ACCUMULATION_STEPS == 0:
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()
            
            total_loss += loss.item() * ACCUMULATION_STEPS
            progress_bar.set_postfix({'loss': loss.item() * ACCUMULATION_STEPS})
        
        avg_loss = total_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{NUM_EPOCHS}], Average Loss: {avg_loss:.4f}')
        
        # Update learning rate
        scheduler.step(avg_loss)

        # Save checkpoint
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'scaler_state_dict': scaler.state_dict(),
            'loss': avg_loss,
        }, CHECKPOINT_PATH)
        print(f"Checkpoint saved at epoch {epoch+1}")

    # Save the final model
    torch.save({
        'epoch': NUM_EPOCHS,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': avg_loss,  # This line is now safe as avg_loss is defined in the loop
    }, MODEL_PATH)

    print("Training completed. Final model saved.")

# Prediction function
@torch.no_grad()
def predict(model, image, train_dataset):
    model.eval()
    with autocast():
        entity_name_out, entity_value_out, entity_units_out = model(image.unsqueeze(0).to(DEVICE))
        
        predicted_entity = train_dataset.entity_names[torch.argmax(entity_name_out).item()]
        predicted_value = entity_value_out.item()
        predicted_unit_idx = torch.argmax(entity_units_out).item()
        predicted_unit = train_dataset.entity_units[predicted_entity][predicted_unit_idx] if predicted_unit_idx < len(train_dataset.entity_units[predicted_entity]) else 'unknown'
        
        return predicted_entity, predicted_value, predicted_unit

# Predictor function for the sample code
def predictor(image_link, category_id, entity_name, model, train_dataset, transform):
    try:
        img = Image.open(image_link).convert('RGB')
        img_tensor = transform(img).unsqueeze(0).to(DEVICE)

        predicted_entity, predicted_value, predicted_unit = predict(model, img_tensor, train_dataset)
        formatted_prediction = f"{predicted_value:.2f} {predicted_unit}"
        return formatted_prediction
    except Exception as e:
        print(f"Error processing image {image_link}: {str(e)}")
        return ""

# Main function
def main():
    # Train the model
    # train_model()

    # Load the trained model for prediction
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    train_dataset = ProductImageDataset(pd.read_csv(TRAIN_CSV_PATH), TRAIN_IMAGES_DIR, transform=transform)
    test_dataset = ProductImageDataset(pd.read_csv(TEST_CSV_PATH), TEST_IMAGES_DIR, transform=transform)
    
    model = ProductImageModel(len(train_dataset.entity_names), train_dataset.max_units)
    checkpoint = torch.load(MODEL_PATH, map_location=DEVICE)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.to(DEVICE)
    model.eval()

    # Make predictions on the test set
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
    predictions = []

    with torch.no_grad():
        for images, _, _, _, _ in tqdm(test_loader, desc="Predicting"):
            images = images.to(DEVICE)
            
            for image in images:
                predicted_entity, predicted_value, predicted_unit = predict(model, image, train_dataset)
                formatted_prediction = f"{predicted_value:.2f} {predicted_unit}"
                predictions.append(formatted_prediction)
                print(f"Predicted: {formatted_prediction}")  # Print each prediction

    # Save predictions
    test_df = pd.read_csv(TEST_CSV_PATH)
    test_df['prediction'] = predictions
    test_df[['index', 'prediction']].to_csv(OUTPUT_CSV_PATH, index=False)
    print(f"Predictions saved to {OUTPUT_CSV_PATH}")

if __name__ == "__main__":
    main()

In [None]:
import torch
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import pandas as pd
import random
import os

# Load the TrOCR model and processor
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-stage1')
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-stage1')

# Load the train.csv file
train_df = pd.read_csv('dataset/train.csv')

# Set the path to your train images directory
TRAIN_IMAGES_DIR = 'train_images'

# Function to extract text from an image using TrOCR
def extract_text(image_path):
    image = Image.open(image_path).convert("RGB")
    pixel_values = processor(images=image, return_tensors="pt").pixel_values

    generated_ids = model.generate(pixel_values)
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
    
    return generated_text

# Function to get random samples from the train set
def get_random_samples(n=5):
    return train_df.sample(n)

# Main function to process random samples
def process_random_samples(num_samples=5):
    samples = get_random_samples(num_samples)
    
    for _, row in samples.iterrows():
        image_path = os.path.join(TRAIN_IMAGES_DIR, os.path.basename(row['image_link']))
        
        # Extract text using TrOCR
        extracted_text = extract_text(image_path)
        
        print(f"Image: {row['image_link']}")
        print(f"Entity Name: {row['entity_name']}")
        print(f"Actual Label: {row['entity_value']}")
        print(f"Extracted Text: {extracted_text}")
        print("-" * 50)

# Run the main function
if __name__ == "__main__":
    process_random_samples()

# Reference: https://huggingface.co/microsoft/trocr-base-stage1

In [None]:
import pytesseract
from PIL import Image
import pandas as pd
import random
import os
import logging

# Configure logging
logging.basicConfig(filename='ocr_errors.log', level=logging.ERROR)

# Load the train.csv file
train_df = pd.read_csv('dataset/train.csv')

# Set the path to your train images directory
TRAIN_IMAGES_DIR = 'train_images'

# Function to extract text from an image using Tesseract OCR
def extract_text(image_path):
    try:
        image = Image.open(image_path)
        text = pytesseract.image_to_string(image)
        return text.strip()
    except Exception as e:
        logging.error(f"Error processing image {image_path}: {str(e)}")
        return None

# Function to get random samples from the train set
def get_random_samples(n=5):
    return train_df.sample(n)

# Main function to process random samples
def process_random_samples(num_samples=5):
    samples = get_random_samples(num_samples)
    
    for _, row in samples.iterrows():
        image_path = os.path.join(TRAIN_IMAGES_DIR, os.path.basename(row['image_link']))
        
        extracted_text = extract_text(image_path)
        
        if extracted_text is not None:
            print(f"Image: {row['image_link']}")
            print(f"Entity Name: {row['entity_name']}")
            print(f"Actual Label: {row['entity_value']}")
            print(f"Extracted Text: {extracted_text}")
            print("-" * 50)
        else:
            print(f"Failed to process image: {row['image_link']}")

# Run the main function
if __name__ == "__main__":
    try:
        process_random_samples()
    except pytesseract.TesseractNotFoundError:
        print("Tesseract is not installed or not in your PATH.")
        print("Please install Tesseract using your package manager.")
        print("For Ubuntu/Debian: sudo apt-get install tesseract-ocr")
        print("For CentOS/RHEL: sudo yum install tesseract")
        print("For more information, visit: https://github.com/tesseract-ocr/tessdoc/blob/main/Installation.md")
    except Exception as e:
        print(f"An unexpected error occurred: {str(e)}")
        logging.error(f"Unexpected error in main execution: {str(e)}")

In [None]:
import easyocr
import pandas as pd
import random
import os

# Load the train.csv file
train_df = pd.read_csv('dataset/train.csv')

# Set the path to your train images directory
TRAIN_IMAGES_DIR = 'train_images'

# Initialize the EasyOCR reader
reader = easyocr.Reader(['en'])  # Adjust languages as needed

# Function to extract text from an image using EasyOCR
def extract_text(image_path):
    result = reader.readtext(image_path)
    return ' '.join([text for _, text, _ in result])

# Function to get random samples from the train set
def get_random_samples(n=5):
    return train_df.sample(n)

# Main function to process random samples
def process_random_samples(num_samples=5):
    samples = get_random_samples(num_samples)
    
    for _, row in samples.iterrows():
        image_path = os.path.join(TRAIN_IMAGES_DIR, os.path.basename(row['image_link']))
        
        # Extract text using EasyOCR
        extracted_text = extract_text(image_path)
        
        print(f"Image: {row['image_link']}")
        print(f"Entity Name: {row['entity_name']}")
        print(f"Actual Label: {row['entity_value']}")
        print(f"Extracted Text: {extracted_text}")
        print("-" * 50)

# Run the main function
if __name__ == "__main__":
    process_random_samples()

In [None]:
import torch
from transformers import DonutProcessor, VisionEncoderDecoderModel
from PIL import Image
import pandas as pd
import random
import os

# Load the Donut model and processor
processor = DonutProcessor.from_pretrained("jinhybr/OCR-Donut-CORD")
model = VisionEncoderDecoderModel.from_pretrained("jinhybr/OCR-Donut-CORD")

# Load the train.csv file
train_df = pd.read_csv('dataset/train.csv')

# Set the path to your train images directory
TRAIN_IMAGES_DIR = 'train_images'

# Function to extract text from an image using Donut
def extract_text(image_path, entity_name):
    image = Image.open(image_path).convert("RGB")
    pixel_values = processor(image, return_tensors="pt").pixel_values

    task_prompt = "<s_cord-v2>"
    # task_prompt = f"<s_docvqa><s_question>Extract all {entity_name} measurement quantities with units and numerical quantity of the unit, strictly numeircal measurement quantities with units only</s_question><s_answer>"
    decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids

    outputs = model.generate(
        pixel_values,
        decoder_input_ids=decoder_input_ids,
        max_length=model.decoder.config.max_position_embeddings,
        early_stopping=True,
        pad_token_id=processor.tokenizer.pad_token_id,
        eos_token_id=processor.tokenizer.eos_token_id,
        use_cache=True,
        num_beams=1,
        bad_words_ids=[[processor.tokenizer.unk_token_id]],
        return_dict_in_generate=True,
    )

    sequence = processor.batch_decode(outputs.sequences)[0]
    sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
    sequence = processor.token2json(sequence)

    return sequence

# Function to get random samples from the train set
def get_random_samples(n=5):
    return train_df.sample(n)

# Main function to process random samples
def process_random_samples(num_samples=15):
    samples = get_random_samples(num_samples)
    
    for _, row in samples.iterrows():
        image_path = os.path.join(TRAIN_IMAGES_DIR, os.path.basename(row['image_link']))
        
        # Extract text using Donut
        extracted_text = extract_text(image_path, row['entity_name'])
        
        print(f"Image: {row['image_link']}")
        print(f"Entity Name: {row['entity_name']}")
        print(f"Actual Label: {row['entity_value']}")
        print(f"Extracted Text: {extracted_text}")
        print("-" * 50)

# Run the main function
if __name__ == "__main__":
    process_random_samples()

In [None]:
import re
import torch
from transformers import DonutProcessor, VisionEncoderDecoderModel
from PIL import Image
import pandas as pd
import random
import os

# Load the Donut model and processor
processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa", use_fast=False)
model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")


device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Load the train.csv file
train_df = pd.read_csv('dataset/train.csv')

# Set the path to your train images directory
TRAIN_IMAGES_DIR = 'train_images'

# Function to extract text from an image using Donut
def extract_text(image_path, entity_name):
    image = Image.open(image_path).convert("RGB")
    pixel_values = processor(image, return_tensors="pt").pixel_values

    # Prepare decoder inputs with a suitable prompt for our task
    task_prompt = "<s_rvlcdip>"
    decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids

    outputs = model.generate(
        pixel_values.to(device),
        decoder_input_ids=decoder_input_ids.to(device),
        max_length=model.decoder.config.max_position_embeddings,
        pad_token_id=processor.tokenizer.pad_token_id,
        eos_token_id=processor.tokenizer.eos_token_id,
        use_cache=True,
        bad_words_ids=[[processor.tokenizer.unk_token_id]],
        return_dict_in_generate=True,
    )

    sequence = processor.batch_decode(outputs.sequences)[0]
    sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
    sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
    
    return sequence

# Function to get random samples from the train set
def get_random_samples(n=5):
    return train_df.sample(n)

# Main function to process random samples
def process_random_samples(num_samples=15):
    samples = get_random_samples(num_samples)
    
    for _, row in samples.iterrows():
        image_path = os.path.join(TRAIN_IMAGES_DIR, os.path.basename(row['image_link']))
        
        # Extract text using Donut
        extracted_text = extract_text(image_path, row['entity_name'])
        
        print(f"Image: {row['image_link']}")
        print(f"Entity Name: {row['entity_name']}")
        print(f"Actual Label: {row['entity_value']}")
        print(f"Extracted Text: {extracted_text}")
        print("-" * 50)

# Run the main function
if __name__ == "__main__":
    process_random_samples()

In [None]:
classes = """entity_unit_map = {
    'width': {'centimetre', 'foot', 'inch', 'metre', 'millimetre', 'yard'},
    'depth': {'centimetre', 'foot', 'inch', 'metre', 'millimetre', 'yard'},
    'height': {'centimetre', 'foot', 'inch', 'metre', 'millimetre', 'yard'},
    'item_weight': {'gram',
        'kilogram',
        'microgram',
        'milligram',
        'ounce',
        'pound',
        'ton'},
    'maximum_weight_recommendation': {'gram',
        'kilogram',
        'microgram',
        'milligram',
        'ounce',
        'pound',
        'ton'},
    'voltage': {'kilovolt', 'millivolt', 'volt'},
    'wattage': {'kilowatt', 'watt'},
    'item_volume': {'centilitre',
        'cubic foot',
        'cubic inch',
        'cup',
        'decilitre',
        'fluid ounce',
        'gallon',
        'imperial gallon',
        'litre',
        'microlitre',
        'millilitre',
        'pint',
        'quart'}
}"""

In [None]:
import torch
from transformers import AutoProcessor, LlavaForConditionalGeneration
from PIL import Image
import pandas as pd
import random
import os
from constants import entity_unit_map  # Import entity_unit_map from constants.py

# Load the LLaVA model and processor
model_name = "llava-hf/llava-1.5-7b-hf"
processor = AutoProcessor.from_pretrained(model_name)
model = LlavaForConditionalGeneration.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)

# Add <image> token to tokenizer and resize model embeddings
processor.tokenizer.add_tokens(["<image>"], special_tokens=True)
model.resize_token_embeddings(len(processor.tokenizer))

# Set up multi-GPU processing if available
if torch.cuda.device_count() > 1:
    model = torch.nn.DataParallel(model)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the train.csv file
train_df = pd.read_csv('dataset/train.csv')

# Set the path to your train images directory
TRAIN_IMAGES_DIR = 'train_images'

# Function to extract text from an image using LLaVA
def extract_text(image_path, entity_name):
    image = Image.open(image_path).convert("RGB")
    
    # Get the specific units for the given entity_name
    entity_units = entity_unit_map.get(entity_name, [])
    units_str = ", ".join(entity_units) if entity_units else "unknown"
    
    prompt = f"""USER: <image>
Extract numerical quantities and their corresponding unit belonging to the class `{entity_name}` from the image. 

Output a JSON in the following format:
```
{{
    [
        {{
            "value": `Double Float`,
            "unit": `String` (one of: {units_str})
        }},
        <Repeat for other quantities>
    ]
}}
ASSISTANT:"""
    
    inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)

    with torch.cuda.amp.autocast():  # Use mixed precision
        if isinstance(model, torch.nn.DataParallel):
            outputs = model.module.generate(**inputs, max_new_tokens=300)
        else:
            outputs = model.generate(**inputs, max_new_tokens=300)
    generated_text = processor.batch_decode(outputs, skip_special_tokens=True)[0]
    
    return generated_text.strip()

# Function to get random samples from the train set
def get_random_samples(n=5):
    return train_df.sample(n)

# Main function to process random samples
def process_random_samples(num_samples=20):
    samples = get_random_samples(num_samples)
    
    for _, row in samples.iterrows():
        image_path = os.path.join(TRAIN_IMAGES_DIR, os.path.basename(row['image_link']))
        
        # Extract text using LLaVA
        extracted_text = extract_text(image_path, row['entity_name'])
        # Extract only the ASSISTANT message from the extracted text
        assistant_message = extracted_text.split("ASSISTANT:")[-1].strip()
        
        print(f"Image: {row['image_link']}")
        print(f"Entity Name: {row['entity_name']}")
        print(f"Actual Label: {row['entity_value']}")
        print(f"Extracted Text: {assistant_message}")
        print("-" * 50)

# Run the main function
if __name__ == "__main__":
    torch.cuda.empty_cache()  # Clear CUDA cache before running
    process_random_samples()

In [None]:
import torch
from transformers import DonutProcessor, VisionEncoderDecoderModel
from PIL import Image
import pandas as pd
import random
import os

# Load the Donut model and processor
# processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")
# model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-rvlcdip")
# processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
# model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-cord-v2")
processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Load the train.csv file
train_df = pd.read_csv('dataset/train.csv')

# Set the path to your train images directory
TRAIN_IMAGES_DIR = 'train_images'

# Function to extract text from an image using Donut
def extract_text(image_path, entity_name):
    image = Image.open(image_path).convert("RGB")
    pixel_values = processor(image, return_tensors="pt").pixel_values

    # task_prompt = "<s_rvlcdip>"
    # task_prompt = "<s_cord-v2>"
    task_prompt = f"<s_docvqa><s_question> Extract measurement quantities with units and numerical quantity of the unit corresponding to {entity_name} </s_question><s_answer>"
    decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids

    outputs = model.generate(
        pixel_values.to(device),
        decoder_input_ids=decoder_input_ids.to(device),
        max_length=model.decoder.config.max_position_embeddings,
        pad_token_id=processor.tokenizer.pad_token_id,
        eos_token_id=processor.tokenizer.eos_token_id,
        use_cache=True,
        bad_words_ids=[[processor.tokenizer.unk_token_id]],
        return_dict_in_generate=True,
    )
    print(processor.batch_decode(outputs.sequences))
    sequence = processor.batch_decode(outputs.sequences)[0]
    sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
    sequence = processor.token2json(sequence)
    
    return sequence

# Function to get random samples from the train set
def get_random_samples(n=5):
    return train_df.sample(n)

# Main function to process random samples
def process_random_samples(num_samples=15):
    samples = get_random_samples(num_samples)
    
    for _, row in samples.iterrows():
        image_path = os.path.join(TRAIN_IMAGES_DIR, os.path.basename(row['image_link']))
        
        # Extract text using Donut
        extracted_text = extract_text(image_path, row['entity_name'])
        
        print(f"Image: {row['image_link']}")
        print(f"Entity Name: {row['entity_name']}")
        print(f"Actual Label: {row['entity_value']}")
        print(f"Extracted Text: {extracted_text}")
        print("-" * 50)

# Run the main function
if __name__ == "__main__":
    process_random_samples()

In [None]:
from transformers import MgpstrProcessor, MgpstrForSceneTextRecognition
import requests
from PIL import Image

# Load the MGP-STR processor and model
processor = MgpstrProcessor.from_pretrained('alibaba-damo/mgp-str-base')
model = MgpstrForSceneTextRecognition.from_pretrained('alibaba-damo/mgp-str-base')

# Function to extract text from an image using MGP-STR
def extract_text(image_path):
    # Load and preprocess the image
    image = Image.open(image_path).convert("RGB")
    pixel_values = processor(images=image, return_tensors="pt").pixel_values

    # Perform inference
    outputs = model(pixel_values)

    # Decode the output
    result = processor.batch_decode(outputs.logits)
    return result['generated_text']

# Function to get random samples from the train set
def get_random_samples(train_df, n=5):
    return train_df.sample(n)

# Main function to process random samples
def process_random_samples(train_df, train_images_dir, num_samples=5):
    samples = get_random_samples(train_df, num_samples)
    
    for _, row in samples.iterrows():
        image_path = os.path.join(train_images_dir, os.path.basename(row['image_link']))
        
        # Extract text using MGP-STR
        extracted_text = extract_text(image_path)
        
        print(f"Image: {row['image_link']}")
        print(f"Entity Name: {row['entity_name']}")
        print(f"Actual Label: {row['entity_value']}")
        print(f"Extracted Text: {extracted_text}")
        print("-" * 50)

# Run the main function
if __name__ == "__main__":
    import pandas as pd
    import os

    # Load the train.csv file
    train_df = pd.read_csv('dataset/train.csv')

    # Set the path to your train images directory
    TRAIN_IMAGES_DIR = 'train_images'

    process_random_samples(train_df, TRAIN_IMAGES_DIR)

In [None]:

import torch
from transformers import pipeline
from PIL import Image
import pandas as pd
import os
import hjson
from tqdm import tqdm
from constants import entity_unit_map
from sanity import sanity_check
from src.utils import download_images
os.environ["TOKENIZERS_PARALLELISM"] = True
# Try to import bitsandbytes and set up quantization
try:
    import bitsandbytes
    from transformers import BitsAndBytesConfig
    
    # Set up quantization configuration
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16
    )
    
    # Set up the pipeline with quantization
    model_id = "llava-hf/llava-1.5-7b-hf"
    pipe = pipeline("image-to-text", model=model_id, model_kwargs={"quantization_config": quantization_config})
    print("Using 4-bit quantization.")
except ImportError:
    print("bitsandbytes is not installed. Falling back to default configuration.")
    
    # Set up the pipeline without quantization
    model_id = "llava-hf/llava-1.5-7b-hf"
    pipe = pipeline("image-to-text", model=model_id)
    print("Using default configuration without quantization.")

# Load the test.csv file
test_df = pd.read_csv('dataset/test.csv')

# Set the path to your test images directory
TEST_IMAGES_DIR = 'test_images'

# Download test images if not already downloaded
download_images(test_df, TEST_IMAGES_DIR)

def extract_text(image_path, entity_name):
    image = Image.open(image_path).convert("RGB")
    
    entity_units = entity_unit_map.get(entity_name, [])
    units_str = ", ".join(entity_units) if entity_units else "unknown"
    
    prompt = f"""USER: <image>
Extract numerical quantities and their corresponding unit belonging to the class `{entity_name}` from the image. 

Output a JSON in the following format:
```
{{
    "predictions": [
        {{
            "value": <Double Float>,
            "unit": <String> (one of: {units_str})
        }},
        <Repeat for other quantities> (max 3)
    ]
}}
```
If no relevant information is found, return an empty list.
ASSISTANT:"""

    outputs = pipe(image, prompt=prompt, generate_kwargs={"max_new_tokens": 1024})
    return outputs[0]['generated_text']

def process_extracted_text(text):
    try:
        # Extract the JSON part from the response
        json_str = text.split("```")[1].strip()
        data = hjson.loads(json_str)
        predictions = data.get("predictions", [])
        
        if predictions:
            # Perform max voting
            value_unit_pairs = [(pred['value'], pred['unit']) for pred in predictions]
            if value_unit_pairs:
                most_common = max(set(value_unit_pairs), key=value_unit_pairs.count)
                return f"{most_common[0]:.2f} {most_common[1]}"
            else:
                # If no pairs found, return the first entry
                pred = predictions[0]
                return f"{pred['value']:.2f} {pred['unit']}"
        else:
            return ""
    except:
        return ""

def process_test_set():
    predictions = []
    
    for _, row in tqdm(test_df.iterrows(), total=len(test_df)):
        image_path = os.path.join(TEST_IMAGES_DIR, os.path.basename(row['image_link']))
        
        extracted_text = extract_text(image_path, row['entity_name'])
        prediction = process_extracted_text(extracted_text)
        predictions.append(prediction)
        print(f"Predicted: {prediction}")
        
    test_df['prediction'] = predictions
    output_df = test_df[['index', 'prediction']]
    output_df.to_csv('test_out.csv', index=False)
    print("Predictions saved to test_out.csv")

    # Run sanity check
    sanity_check('dataset/test.csv', 'test_out.csv')

if __name__ == "__main__":
    torch.cuda.empty_cache()
    process_test_set()


In [None]:
import torch
from transformers import AutoProcessor, LlavaForConditionalGeneration
from PIL import Image
import pandas as pd
import os
import json
from tqdm import tqdm
from constants import entity_unit_map
from sanity import sanity_check
from src.utils import download_images
from torch.utils.data import Dataset, DataLoader
os.environ["TOKENIZERS_PARALLELISM"] = 'true'
# Device selection
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the LLaVA model and processor
model_name = "llava-hf/llava-1.5-7b-hf"
processor = AutoProcessor.from_pretrained(model_name)
model = LlavaForConditionalGeneration.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")

processor.tokenizer.add_tokens(["<image>"], special_tokens=True)
model.resize_token_embeddings(len(processor.tokenizer))

# Custom dataset
class ImageDataset(Dataset):
    def __init__(self, df, img_dir, processor):
        self.df = df
        self.img_dir = img_dir
        self.processor = processor

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_path = os.path.join(self.img_dir, os.path.basename(row['image_link']))
        image = Image.open(image_path).convert("RGB")
        entity_name = row['entity_name']
        
        entity_units = entity_unit_map.get(entity_name, [])
        units_str = ", ".join(entity_units) if entity_units else "unknown"
        
        prompt = f"""USER: <image>
Extract numerical quantities and their corresponding unit belonging to the class `{entity_name}` from the image. 

Output a JSON in the following format:
```
{{
    "predictions": [
        {{
            "value": <Double Float>,
            "unit": <String> (one of: {units_str})
        }},
        <Repeat for other quantities> (max 3)
    ]
}}
```
If no relevant information is found, return an empty list.
ASSISTANT:"""
        
        inputs = self.processor(text=prompt, images=image, return_tensors="pt")
        return {
            'pixel_values': inputs.pixel_values.squeeze(),
            'input_ids': inputs.input_ids.squeeze(),
            'attention_mask': inputs.attention_mask.squeeze(),
            'image_path': image_path,
            'entity_name': entity_name
        }

# Batch processing function
def process_batch(batch, model, processor):
    pixel_values = batch['pixel_values'].to(device)
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)

    with torch.cuda.amp.autocast():
        outputs = model.generate(
            pixel_values=pixel_values,
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=4096
        )
    
    generated_texts = processor.batch_decode(outputs, skip_special_tokens=True)
    return generated_texts

def process_extracted_text(text):
    try:
        json_start = text.find('ASSISTANT:') + len('ASSISTANT:')
        json_text = text[json_start:].strip()
        
        import hjson
        data = hjson.loads(json_text)
        predictions = data.get("predictions", [])
        
        if predictions:
            value_unit_pairs = [(pred['value'], pred['unit']) for pred in predictions]
            if value_unit_pairs:
                most_common = max(set(value_unit_pairs), key=value_unit_pairs.count)
                return f"{most_common[0]:.2f} {most_common[1]}"
            else:
                pred = predictions[0]
                return f"{pred['value']:.2f} {pred['unit']}"
        else:
            return ""
    except hjson.HjsonDecodeError:
        print(f"Failed to parse Hjson: {json_text}")
        return ""
    except Exception as e:
        print(f"Error processing text: {e}")
        return ""

def process_test_set():
    # Load the test.csv file
    test_df = pd.read_csv('dataset/test.csv')
    
    # Set the path to your test images directory
    TEST_IMAGES_DIR = 'test_images'
    
    # Download test images if not already downloaded
    download_images(test_df, TEST_IMAGES_DIR)
    
    # Create dataset and dataloader
    dataset = ImageDataset(test_df, TEST_IMAGES_DIR, processor)
    dataloader = DataLoader(dataset, batch_size=4, num_workers=4, pin_memory=True)
    
    predictions = []
    
    for batch in tqdm(dataloader, total=len(dataloader)):
        generated_texts = process_batch(batch, model, processor)
        
        for text in generated_texts:
            prediction = process_extracted_text(text)
            predictions.append(prediction)
            print(f"Predicted: {prediction}")
    
    test_df['prediction'] = predictions
    output_df = test_df[['index', 'prediction']]
    output_df.to_csv('test_out.csv', index=False)
    print("Predictions saved to test_out.csv")

    # Run sanity check
    sanity_check('dataset/test.csv', 'test_out.csv')

if __name__ == "__main__":
    torch.cuda.empty_cache()
    process_test_set()

----
----
----

In [None]:
import pandas as pd
import re
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
from tqdm import tqdm
from constants import entity_unit_map

# Device selection
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize OCR model
model_ocr = ocr_predictor(pretrained=True, assume_straight_pages=False, export_as_straight_boxes=True)

# Load the data
train_df = pd.read_csv('dataset/train.csv')
test_df = pd.read_csv('dataset/test.csv')

# Set the path to your train/test images directory
TRAIN_IMAGES_DIR = 'train_images'
TEST_IMAGES_DIR = 'test_images'

# Function to extract text from the OCR result
def extract_text(ocr_result):
    extracted_text = []
    for page in ocr_result['pages']:
        for block in page['blocks']:
            for line in block['lines']:
                for word in line['words']:
                    extracted_text.append(word['value'])
    return ' '.join(extracted_text)

# Function to process a single image
def process_image(image_path):
    try:
        doc = DocumentFile.from_images(image_path)
        result = model_ocr(doc)
        extracted_text = extract_text(result.export())
        return extracted_text
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return ""

# Initialize the BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Filter the text for numerical values and context
def filter_text(text):
    sentences = re.findall(r'([^.]*?\d+[^.]*\.)', text)
    return ' '.join(sentences)

# Preprocess data for training
def preprocess_data(df, images_dir):
    texts = []
    labels = []

    for _, row in tqdm(df.iterrows(), total=len(df), desc="Preprocessing data"):
        image_path = os.path.join(images_dir, os.path.basename(row['image_link']))
        extracted_text = process_image(image_path)
        filtered_text = filter_text(extracted_text)
        entity_value = row['entity_value']
        
        if str(entity_value) in filtered_text:
            labels.append(1)
        else:
            labels.append(0)
        
        texts.append(filtered_text)

    return texts, labels

# Custom Dataset
class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Prepare the dataset
print("Preparing training dataset...")
texts, labels = preprocess_data(train_df, TRAIN_IMAGES_DIR)

# Tokenize the text data
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors='pt')
labels = torch.tensor(labels)

dataset = CustomDataset(inputs, labels)

# Split into training and evaluation sets
train_size = int(0.8 * len(dataset))
train_dataset, eval_dataset = torch.utils.data.random_split(dataset, [train_size, len(dataset) - train_size])

# Set up training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
)

# Set up the trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

# Train the model
print("Training the model...")
trainer.train()

# Prediction function using BERT
def predict_entity_value(entity_name, extracted_text):
    filtered_text = filter_text(extracted_text)
    inputs = tokenizer(filtered_text, return_tensors='pt', truncation=True, padding=True)
    outputs = model(**inputs)
    logits = outputs.logits
    prediction = torch.argmax(logits, dim=1).item()
    
    if prediction == 1:
        entity_units = entity_unit_map.get(entity_name, set())
        unit_pattern = '|'.join(entity_units)
        candidates = re.findall(rf'\d+(\.\d+)?\s*({unit_pattern})', filtered_text)
        if candidates:
            return f"{candidates[0][0]} {candidates[0][1]}"
    return ""

# Perform predictions on test set
print("Performing predictions on test set...")
predictions = []
for _, row in tqdm(test_df.iterrows(), total=len(test_df), desc="Predicting"):
    image_path = os.path.join(TEST_IMAGES_DIR, os.path.basename(row['image_link']))
    extracted_text = process_image(image_path)
    entity_name = row['entity_name']
    
    predicted_value = predict_entity_value(entity_name, extracted_text)
    predictions.append(predicted_value)

# Create output DataFrame
output_df = pd.DataFrame({
    'index': test_df.index,
    'prediction': predictions
})
output_df.to_csv('test_out.csv', index=False)

print("Prediction process completed. Results saved to 'test_out.csv'.")

In [None]:
import pandas as pd
import re
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
from tqdm import tqdm
from constants import entity_unit_map
import random

# Device selection
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize OCR model
model_ocr = ocr_predictor(pretrained=True, assume_straight_pages=False, export_as_straight_boxes=True)

# Load the data
train_df = pd.read_csv('dataset/train.csv')

# Set the path to your train images directory
TRAIN_IMAGES_DIR = 'train_images'

# Function to extract text from the OCR result
def extract_text(ocr_result):
    extracted_text = []
    for page in ocr_result['pages']:
        for block in page['blocks']:
            for line in block['lines']:
                for word in line['words']:
                    extracted_text.append(word['value'])
    return ' '.join(extracted_text)

# Function to process a single image
def process_image(image_path):
    try:
        doc = DocumentFile.from_images(image_path)
        result = model_ocr(doc)
        extracted_text = extract_text(result.export())
        return extracted_text
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return ""

# Initialize the BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Filter the text for numerical values and context
def filter_text(text):
    sentences = re.findall(r'([^.]*?\d+[^.]*\.)', text)
    return ' '.join(sentences)

# Preprocess text
def preprocess_text(text):
    import re
    text = text.lower()
    text = re.sub(r'[^a-z0-9.\s]', ' ', text)  # Remove special characters
    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces with a single space
    return text.strip()

# Prediction function using BERT
def predict_entity_value(entity_name, extracted_text):
    filtered_text = filter_text(extracted_text)
    preprocessed_text = preprocess_text(filtered_text)
    inputs = tokenizer(preprocessed_text, return_tensors='pt', truncation=True, padding=True)
    outputs = model(**inputs)
    logits = outputs.logits
    prediction = torch.argmax(logits, dim=1).item()
    
    if prediction == 1:
        entity_units = entity_unit_map.get(entity_name, set())
        unit_pattern = '|'.join(entity_units)
        candidates = re.findall(rf'\d+(\.\d+)?\s*({unit_pattern})', preprocessed_text)
        if candidates:
            return f"{candidates[0][0]} {candidates[0][1]}"
    return ""

# Perform predictions on 5-10 random samples
num_samples = random.randint(5, 10)
sample_df = train_df.sample(n=num_samples)

print(f"Performing predictions on {num_samples} random samples...")
for _, row in tqdm(sample_df.iterrows(), total=num_samples, desc="Predicting"):
    image_path = os.path.join(TRAIN_IMAGES_DIR, os.path.basename(row['image_link']))
    extracted_text = process_image(image_path)
    entity_name = row['entity_name']
    actual_value = row['entity_value']
    
    predicted_value = predict_entity_value(entity_name, extracted_text)
    
    print(f"Input: Entity: {entity_name}, Extracted Text: {extracted_text}")
    print(f"Labelled Output: {actual_value}")
    print(f"Prediction: {predicted_value}")
    print("-" * 100)  # Separator for readability

print("Inference on random samples completed.")

In [13]:
import pandas as pd
import re
import os

from doctr.io import DocumentFile
from doctr.models import ocr_predictor
from tqdm import tqdm
from constants import entity_unit_map
import random

# Device selection
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize OCR model
model_ocr = ocr_predictor(pretrained=True, assume_straight_pages=False, export_as_straight_boxes=True)

# Load the data
train_df = pd.read_csv('dataset/train.csv')

# Set the path to your train images directory
TRAIN_IMAGES_DIR = 'train_images'

# Function to extract text from the OCR result
def extract_text(ocr_result):
    extracted_text = []
    for page in ocr_result['pages']:
        for block in page['blocks']:
            for line in block['lines']:
                for word in line['words']:
                    extracted_text.append(word['value'])
    return ' '.join(extracted_text)

# Function to process a single image
def process_image(image_path):
    try:
        doc = DocumentFile.from_images(image_path)
        result = model_ocr(doc)
        extracted_text = extract_text(result.export())
        return extracted_text
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return ""

# Text preprocessing
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9.\s\'"]', ' ', text)  # Remove special characters except ' and "
    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces with a single space
    return text.strip()

# Unit standardization
unit_mappings = {
    # Length units
    'cm': 'centimetre', 'mm': 'millimetre', 'm': 'metre', 'in': 'inch', 'ft': 'foot', 'yd': 'yard',
    'centimetres': 'centimetre', 'millimetres': 'millimetre', 'metres': 'metre',
    'inches': 'inch', 'feet': 'foot', 'yards': 'yard',
    'cm.': 'centimetre', 'mm.': 'millimetre', 'm.': 'metre', 'in.': 'inch', 'ft.': 'foot', 'yd.': 'yard',
    '"': 'inch', "'": 'foot', 'candelas': 'candela', 'cd': 'candela', 'candela': 'candela', 'candelas': 'candela', 'candel': 'candela',

    # Weight units
    'mg': 'milligram', 'g': 'gram', 'kg': 'kilogram', 'oz': 'ounce', 'lb': 'pound', 't': 'ton',
    'μg': 'microgram', 'microg': 'microgram',
    'grams': 'gram', 'kilograms': 'kilogram', 'milligrams': 'milligram', 'ounces': 'ounce',
    'pounds': 'pound', 'tons': 'ton', 'lbs': 'pound',
    'mg.': 'milligram', 'g.': 'gram', 'kg.': 'kilogram', 'oz.': 'ounce', 'lb.': 'pound', 'lbs.': 'pound',
    'μg.': 'microgram',

    # Volume units
    'ml': 'millilitre', 'l': 'litre', 'cl': 'centilitre', 'dl': 'decilitre', 'fl oz': 'fluid ounce',
    'pt': 'pint', 'qt': 'quart', 'gal': 'gallon', 'imp gal': 'imperial gallon',
    'cu ft': 'cubic foot', 'cu in': 'cubic inch', 'cu m': 'cubic metre',
    'μl': 'microlitre', 'microl': 'microlitre', 'c': 'cup',
    'litres': 'litre', 'millilitres': 'millilitre', 'centilitres': 'centilitre', 'decilitres': 'decilitre',
    'pints': 'pint', 'quarts': 'quart', 'gallons': 'gallon', 'imperial gallons': 'imperial gallon',
    'cubic feet': 'cubic foot', 'cubic inches': 'cubic inch', 'cubic meters': 'cubic metre',
    'cubic metres': 'cubic metre', 'fluid ounces': 'fluid ounce', 'cups': 'cup',
    'ml.': 'millilitre', 'l.': 'litre', 'cl.': 'centilitre', 'dl.': 'decilitre',
    'pt.': 'pint', 'qt.': 'quart', 'gal.': 'gallon',
    'cu. ft.': 'cubic foot', 'cu. in.': 'cubic inch', 'cu. m': 'cubic metre',
    'fl. oz.': 'fluid ounce', 'fluid oz': 'fluid ounce', 'fluid oz.': 'fluid ounce',
    'us gal': 'gallon', 'us gallon': 'gallon', 'us gallons': 'gallon',
    'us fl oz': 'fluid ounce', 'us fluid ounce': 'fluid ounce', 'us fluid ounces': 'fluid ounce',
    'us pint': 'pint', 'us pints': 'pint', 'us quart': 'quart', 'us quarts': 'quart',
    'microlitres': 'microlitre',

    # Electrical units
    'mv': 'millivolt', 'kv': 'kilovolt', 'v': 'volt',
    'w': 'watt', 'kw': 'kilowatt',
    'volts': 'volt', 'millivolts': 'millivolt', 'kilovolts': 'kilovolt',
    'watts': 'watt', 'kilowatts': 'kilowatt',
    'mv.': 'millivolt', 'kv.': 'kilovolt', 'v.': 'volt',
    'w.': 'watt', 'kw.': 'kilowatt'
}

def standardize_unit(unit):
    unit = unit.lower()
    return unit_mappings.get(unit, unit)

# Entity-specific patterns
entity_patterns = {
    'item_weight': r'(\d+(\.\d+)?)\s*(milligram|kilogram|microgram|gram|ounce|ton|pound|mg|kg|g|oz|lb|lbs|μg|t|microg|grams|kilograms|milligrams|ounces|pounds|tons)',
    'maximum_weight_recommendation': r'(\d+(\.\d+)?)\s*(milligram|kilogram|microgram|gram|ounce|ton|pound|mg|kg|g|oz|lb|lbs|μg|t|microg|grams|kilograms|milligrams|ounces|pounds|tons)',
    'width': r'(\d+(\.\d+)?)\s*(centimetre|foot|millimetre|metre|inch|yard|cm|mm|m|in|ft|yd|centimetres|millimetres|metres|inches|feet|yards|"|\')',
    'height': r'(\d+(\.\d+)?)\s*(centimetre|foot|millimetre|metre|inch|yard|cm|mm|m|in|ft|yd|centimetres|millimetres|metres|inches|feet|yards|"|\')',
    'depth': r'(\d+(\.\d+)?)\s*(centimetre|foot|millimetre|metre|inch|yard|cm|mm|m|in|ft|yd|centimetres|millimetres|metres|inches|feet|yards|"|\')',
    'voltage': r'(\d+(\.\d+)?)\s*(millivolt|kilovolt|volt|mv|kv|v|volts|millivolts|kilovolts)',
    'wattage': r'(\d+(\.\d+)?)\s*(kilowatt|watt|kw|w|watts|kilowatts)',
    'item_volume': r'(\d+(\.\d+)?)\s*(cubic foot|cubic metre|microlitre|cup|fluid ounce|centilitre|imperial gallon|us gallon|pint|decilitre|litre|millilitre|quart|cubic inch|gallon|cu ft|cu m|cu in|ml|l|cl|dl|fl oz|pt|qt|gal|imp gal|us gal|μl|c|microl|cubic feet|cubic inches|cubic meters|cubic metres|fluid ounces|gallons|imperial gallons|us gallons|litres|millilitres|pints|quarts|us pints|us quarts|us fluid ounces)'
}

# Contextual keyword matching
entity_keywords = {
    'item_weight': ['weight', 'weighs', 'mass', 'heavy', 'light', 'lb', 'kg', 'grams', 'ounces', 'net weight', 'gross weight', 'tare weight', 'product weight', 'item weight', 'unit weight', 'shipping weight', 'payload', 'heft', 'load', 'bulk', 'density', 'avoirdupois', 'poundage', 'tonnage', 'weightiness', 'gravitas', 'ponderosity', 'substance', 'ballast', 'burden', 'encumbrance', 'gravity', 'heaviness', 'mass', 'pressure', 'tonnage', 'weight force', 'weightage', 'dead weight', 'live weight', 'curb weight', 'dry weight', 'unladen weight', 'laden weight', 'kerb weight', 'gross vehicle weight', 'candle'],
    'maximum_weight_recommendation': ['max weight', 'maximum weight', 'weight limit', 'weight capacity', 'load capacity', 'can hold up to', 'supports up to', 'max load', 'weight rating', 'safe working load', 'recommended max weight', 'weight restriction', 'not to exceed', 'maximum load', 'weight threshold', 'upper weight limit', 'peak weight', 'weight tolerance', 'maximum carrying capacity', 'weight bearing limit', 'load limit', 'weight allowance', 'maximum permissible weight', 'weight ceiling', 'weight boundary', 'weight cutoff', 'weight maximum', 'weight cap', 'weight constraint', 'weight barrier', 'weight ceiling', 'weight threshold', 'weight upper bound', 'weight top end', 'weight peak', 'weight apex', 'weight zenith', 'weight summit', 'weight pinnacle', 'weight acme'],
    'width': ['width', 'wide', 'across', 'breadth', 'span', 'horizontal', 'side to side', 'lateral', 'diameter', 'girth', 'W:', 'W.', 'width:', 'wide:', 'cross section', 'transverse dimension', 'broadness', 'wideness', 'beam', 'thickness', 'gauge', 'caliber', 'amplitude', 'expanse', 'spread', 'broadness', 'extent', 'measurement', 'size', 'dimension', 'proportion', 'magnitude', 'scope', 'range', 'compass', 'reach', 'extension', 'expansion', 'stretch', 'span', 'latitude', 'bore', 'calibre', 'thickness', 'cross-section', 'profile'],
    'height': ['height', 'tall', 'high', 'elevation', 'vertical length', 'stature', 'altitude', 'top to bottom', 'upright', 'rise', 'H:', 'H.', 'height:', 'tall:', 'vertical dimension', 'clearance', 'tallness', 'highness', 'loftiness', 'prominence', 'eminence', 'towering', 'vertical extent', 'headroom', 'ceiling height', 'vertical distance', 'vertical measurement', 'vertical span', 'vertical reach', 'vertical dimension', 'vertical size', 'vertical proportion', 'vertical magnitude', 'vertical scope', 'vertical range', 'vertical compass', 'vertical extension', 'vertical expansion', 'vertical stretch', 'vertical elevation', 'vertical rise', 'vertical lift', 'vertical climb', 'vertical ascent', 'vertical growth'],
    'depth': ['depth', 'deep', 'thickness', 'front to back', 'length', 'extent', 'distance', 'profundity', 'dimension', 'reach', 'D:', 'D.', 'depth:', 'deep:', 'longitudinal dimension', 'deepness', 'profoundness', 'penetration', 'recession', 'inwardness', 'immersion', 'submersion', 'sinking', 'hollowness', 'concavity', 'vertical distance', 'vertical extent', 'vertical dimension', 'vertical measurement', 'vertical reach', 'vertical penetration', 'vertical recession', 'vertical immersion', 'vertical submersion', 'vertical sinking', 'vertical depression', 'vertical cavity', 'vertical hollow', 'vertical recess', 'vertical indentation', 'vertical pit', 'vertical chasm', 'vertical abyss', 'vertical gorge', 'vertical ravine'],
    'voltage': ['voltage', 'volts', 'V', 'electrical potential', 'electromotive force', 'power supply', 'input voltage', 'output voltage', 'operating voltage', 'rated voltage', 'AC voltage', 'DC voltage', 'potential difference', 'electric pressure', 'tension', 'EMF', 'volt rating', 'voltage drop', 'voltage range', 'nominal voltage', 'supply voltage', 'line voltage', 'phase voltage', 'peak voltage', 'RMS voltage', 'breakdown voltage', 'threshold voltage', 'forward voltage', 'reverse voltage', 'standoff voltage', 'surge voltage', 'ripple voltage', 'voltage regulation', 'voltage stability', 'voltage tolerance', 'voltage fluctuation', 'voltage sag', 'voltage spike', 'voltage dip', 'voltage surge', 'voltage transient'],
    'wattage': ['wattage', 'watts', 'power', 'energy consumption', 'power output', 'W', 'power rating', 'power consumption', 'energy usage', 'power draw', 'electrical power', 'rated power', 'power capacity', 'energy demand', 'power requirement', 'power level', 'energy efficiency', 'power dissipation', 'power supply', 'power specification', 'power demand', 'power input', 'power output', 'power throughput', 'power handling', 'power delivery', 'power transfer', 'power conversion', 'power generation', 'power production', 'power yield', 'power expenditure', 'power utilization', 'power allocation', 'power budget', 'power threshold', 'power limit', 'power range', 'power margin', 'power reserve'],
    'item_volume': ['volume', 'capacity', 'contains', 'content', 'holds', 'storage', 'liquid capacity', 'fluid volume', 'internal volume', 'container volume', 'total volume', 'net volume', 'gross volume', 'fill capacity', 'cubic capacity', 'volumetric capacity', 'displacement', 'interior space', 'holding capacity', 'storage space', 'cubic volume', 'spatial volume', 'volumetric content', 'volumetric measurement', 'volumetric size', 'volumetric dimension', 'volumetric extent', 'volumetric magnitude', 'volumetric quantity', 'volumetric amount', 'volumetric proportion', 'volumetric ratio', 'volumetric fraction', 'volumetric part', 'volumetric segment', 'volumetric section', 'volumetric division', 'volumetric portion', 'volumetric share', 'volumetric allotment']
}

def extract_value_unit(text, pattern, allowed_units):
    if not isinstance(pattern, str):
        return []
    matches = re.findall(pattern, text)
    extractions = []
    for match in matches:
        value = float(match[0])
        unit = match[2]
        unit_standard = standardize_unit(unit)
        if unit_standard in allowed_units:
            extractions.append((value, unit_standard))
    return extractions

def find_value_with_context(text, entity, pattern, allowed_units):
    if not isinstance(pattern, str):
        return []
    keywords = entity_keywords.get(entity, [])
    extractions = []
    for keyword in keywords:
        keyword_positions = [m.start() for m in re.finditer(keyword, text)]
        for pos in keyword_positions:
            window = text[max(0, pos - 50): pos + 50]
            extractions.extend(extract_value_unit(window, pattern, allowed_units))
    if not extractions:
        extractions = extract_value_unit(text, pattern, allowed_units)
    return extractions

def extract_entity_value(entity, text):
    clean_text = preprocess_text(text)
    allowed_units = entity_unit_map.get(entity, set())
    pattern = entity_patterns.get(entity, '')
    extractions = find_value_with_context(clean_text, entity, pattern, allowed_units)
    
    if extractions:
        if entity in ['width', 'height', 'depth']:
            # Check if we have multiple extractions close together (potential dimensions)
            if len(extractions) >= 2:
                # Sort extractions based on their position in the text
                sorted_extractions = sorted(extractions, key=lambda x: clean_text.find(f"{x[0]} {x[1]}"))
                
                if entity == 'width':
                    return sorted_extractions[0]
                elif entity == 'depth':
                    return sorted_extractions[1] if len(sorted_extractions) > 1 else sorted_extractions[0]
                elif entity == 'height':
                    return sorted_extractions[-1]  # Return the last value for height
            else:
                # If we only have one extraction, return it
                return extractions[0]
        else:
            return extractions[0]
    
    return "", ""

# Perform predictions on 5-10 random samples
num_samples = 15
sample_df = train_df.sample(n=num_samples)

print(f"Performing predictions on {num_samples} random samples...")
for _, row in tqdm(sample_df.iterrows(), total=num_samples, desc="Predicting"):
    image_path = os.path.join(TRAIN_IMAGES_DIR, os.path.basename(row['image_link']))
    extracted_text = process_image(image_path)
    entity_name = row['entity_name']
    actual_value = row['entity_value']
    
    predicted_value, predicted_unit = extract_entity_value(entity_name, extracted_text)
    
    print(f"Input: Entity: {entity_name}, Extracted Text: {extracted_text}")
    print(f"Labelled Output: {actual_value}")
    print(f"Prediction: {predicted_value} {predicted_unit}")
    print("-" * 100)  # Separator for readability

print("Inference on random samples completed.")

  state_dict = torch.load(archive_path, map_location="cpu")


Performing predictions on 15 random samples...


Predicting:   7%|▋         | 1/15 [00:00<00:09,  1.50it/s]

Input: Entity: width, Extracted Text: Size ofPumpkins - N - - 6.5cm 8cm  à
Labelled Output: 8.0 centimetre
Prediction: 6.5 centimetre
----------------------------------------------------------------------------------------------------


Predicting:  13%|█▎        | 2/15 [00:01<00:12,  1.03it/s]

Input: Entity: wattage, Extracted Text: S FRINTING MOONLIGHT L un Haepas  NOON NIG Gc GIE tvn me  e 2AS EaN b / 1V a 0008 LEDWILB (0 5c 6 L & 5.9in G6 - - 3D User 16Colors Manual Moon Lamp 6  R d -
Labelled Output: 1.0 watt
Prediction:  
----------------------------------------------------------------------------------------------------


Predicting:  20%|██        | 3/15 [00:02<00:10,  1.16it/s]

Input: Entity: width, Extracted Text: Enlarged Letter Tray Organize More Files 13.7" 4 TV 3.3" 9.2"
Labelled Output: 9.2 inch
Prediction: 13.7 inch
----------------------------------------------------------------------------------------------------


Predicting:  27%|██▋       | 4/15 [00:04<00:14,  1.29s/it]

Input: Entity: item_weight, Extracted Text: ase auy CLASSIC NESCAFE a CI NE pa - NESC consumarsi A,7g aconn - CLASSIC NESCAFE rM CLASSIC WVEVTO entro pe à ou - Mptpees ouM AA . entroline NESCAFO CLASSIC NESCAFE CLASSIC NESCAFE  1,7g,DOECON CLASSIC 2 CLASSIC ESCAFE NESCAFE NESCAFE. BUSIW a086w9 MnP 810 NESCAFE, SASASAER 6NSAGOLRZO entrotine NNS Ç4ASS/C - 6W5 NESCAFE. MooI MEG ERUNATAZA 024 > - WASSAYSNN WAYATANNDNC Cumi 100% omnd NODNISER OH oXOnOoweRwATOkO Pavvolyent Gustointenso PURO CAFEE OMEVISQONEKOARZON FIIEMSS a CAFFE OIEZHSOOOLESORZON AOAASMOmA au a  - I &y A -  a
Labelled Output: 1.7 gram
Prediction: 7.0 gram
----------------------------------------------------------------------------------------------------


Predicting:  33%|███▎      | 5/15 [00:05<00:11,  1.19s/it]

Input: Entity: item_weight, Extracted Text: NATURE'S BOUNTY NOIIn1055I0 RAPIDE BIOTINE 2500 mcg  Aide à maintenir la santé des cheveux et de la peau Aide à soutenir la santé des ongles 100 Comprimés NPN 80043208
Labelled Output: 2500.0 microgram
Prediction:  
----------------------------------------------------------------------------------------------------


Predicting:  40%|████      | 6/15 [00:06<00:09,  1.03s/it]

Input: Entity: item_weight, Extracted Text: TGLCO. THE GOOD LIFE COMPANY MORNING MOTIVATION FREEZE-DRIED FROM 100% ARABICA BEANS 100g/35
Labelled Output: 100 gram
Prediction: 100.0 gram
----------------------------------------------------------------------------------------------------


Predicting:  47%|████▋     | 7/15 [00:07<00:07,  1.01it/s]

Input: Entity: width, Extracted Text: 0.87'caliber The 0.87'caliber is suitable for most kinds of bicycles. Universal, fine polished and non-slip, adjustable buckle. -
Labelled Output: 0.87 inch
Prediction: 0.87 foot
----------------------------------------------------------------------------------------------------


Predicting:  53%|█████▎    | 8/15 [00:09<00:09,  1.30s/it]

Input: Entity: item_weight, Extracted Text: VITAMIN D3 Supports Calcium / Metabolism, Bone PACK Health and Immune Function* Jarrow Formulase Vitamin D3 provides cholecalcifero: Jarrow which is the form produced FORMULAS by the (human) skin in Golecddifero response to UVB exposure Vitamin In Extra Virgin Olive Oil (sunlight) and may also 125 MCG improve vitamin D status NI000S dE better than equivalent Supports Caldum Metabolism* amounts of ergocalcifero Bone Health* puD Immune Function* (D2).* 100 SUPPLEMENT DIETARY SOFTGELS *These statements have not been evaluated by the Food and Drug Administration. This product is not intended to diagnose treat, cure or preventany disease.
Labelled Output: 125.0 microgram
Prediction:  
----------------------------------------------------------------------------------------------------


Predicting:  60%|██████    | 9/15 [00:09<00:06,  1.11s/it]

Input: Entity: item_weight, Extracted Text: SOURGE llon FIBS Sugartree / - gh Gleie Choco Chip Biscuits 125 04402
Labelled Output: 125.0 gram
Prediction:  
----------------------------------------------------------------------------------------------------


Predicting:  67%|██████▋   | 10/15 [00:10<00:04,  1.03it/s]

Input: Entity: width, Extracted Text: 145cm 40cm
Labelled Output: 40.0 centimetre
Prediction: 145.0 centimetre
----------------------------------------------------------------------------------------------------


Predicting:  73%|███████▎  | 11/15 [00:11<00:03,  1.10it/s]

Input: Entity: depth, Extracted Text: Wall Mirror & Home decor Mirror Oval imylmes G 24" Horizontal or vertical hanging 36" TII
Labelled Output: 36.0 inch
Prediction: 36.0 inch
----------------------------------------------------------------------------------------------------


Predicting:  80%|████████  | 12/15 [00:13<00:04,  1.46s/it]

Input: Entity: wattage, Extracted Text: Dimensions and Accessories 10cm/3.94in 20cm/7.87in 8cm/3.15in Aufladbare Innen Wandleuchte Betriebsanleitung (Deutsch) Bei diesem Produkt handelt tessichum eine Wandleuchte mit Bewegungssensor. Bitte uberprufen Sie. ob Teile beschadigt sind. wenn Sie die Leuchte erhalten. + W/Y Das Produkt kann mit einem wiederauadbaren Zyklus betrieben werdenund wird mit einem us-Schontstelenkabel geliefert Es kann zum Aufladen direkt an das Produkt angeschlossen werden, die : - Ladestatusanzeige ist Die Ladestatusanzeige istrot und blinkt wahrend NET: des Ladevorgangs sie hort auf zu blinken, wenn das Gerat vollstandig geladen ist. 3H Warum leuchtet sie tagsUber nicht? Produkt ist lichtempfindich und IduAIOAsCAkoNwC 5H bei guten Mrccurcalnd ausgeschalte. und der LL dunklen Lichtverhalinissen automatisch 8H Spezifikationen: MNasw  Lichtquelle: Wiederaufladbar Batterie * LED-Licht a Lichtfarbe: Licht SEALANTFIX warmes Montage: Klebrig. magnetisch - Erfassungsbere

Predicting:  87%|████████▋ | 13/15 [00:14<00:02,  1.29s/it]

Input: Entity: item_weight, Extracted Text: NEW! - GUSTO PAPRIKA CHRS EXTRA GVSTOSE - - No) ol1o 14 S SEMI I4 GIRASOLE SENZA GWTINE PALM OIL 25g a * FREE
Labelled Output: 25.0 gram
Prediction: 4.0 gram
----------------------------------------------------------------------------------------------------


Predicting:  93%|█████████▎| 14/15 [00:16<00:01,  1.35s/it]

Input: Entity: width, Extracted Text: Androic 7.1.2 - OS Built-in Android OS.download APP,online movies,play games 06:12 de ) d a a Goog You Tube NETFLIX Google play A ls - à - China Dofar o FileExplorer Sanvry a o - a - Gr SAbg Screen Apps HDMI - Settings Settings aouu a A OIE OTHA MNPROJECTOR Compatible with multiple devices - ( % - - 1SAS WiFi Bluetooth 2.4G/5G
Labelled Output: 3.0 inch
Prediction:  
----------------------------------------------------------------------------------------------------


Predicting: 100%|██████████| 15/15 [00:17<00:00,  1.15s/it]

Input: Entity: height, Extracted Text: Product Size 5.5"/14cm 15.7"/40cm 7.9"/20cm 0000 C - s 000000 U 0000C - Bottles Weight Dimensions Outlet 6-8 5.51 Ib 15.7'Lx7.9"H 100-240V 40cm X 20cm X 14cm
Labelled Output: 14.0 centimetre
Prediction: 14.0 centimetre
----------------------------------------------------------------------------------------------------
Inference on random samples completed.





Predicting:  33%|███▎      | 5/15 [00:05<00:11,  1.11s/it]
Input: Entity: item_volume, Extracted Text: E 7 CAFFEINE N A C * BETA-ALANINE J - / ALINE 7 CAFFEINE * I 7 CAFFEINE L CAFFEINE OSE * BETA-ALANINE A CA - 7 CAFFEINE LE BETA-ALANINE a BETA-ALANINE # AFFEINE a BETA-ALANIN a BETA-ALANINE N 4 1 - I - 0 - ON - n  YUZU - - - ECHELON CAYENNE ECHE 12 PACK 12 x 8.4 FL OZ YUZU CAYENNE 7 PRE-ORKOUTINACAN [250mL] cans MADEINTHE DIETARY SUPPLEMENT 8.4 FL OZ (250 mL)
Labelled Output: 8.4 fluid ounce
Prediction: 7.0 cup

In [14]:
Predicting:  60%|██████    | 9/15 [00:08<00:05,  1.07it/s]
Input: Entity: item_weight, Extracted Text: à  - ASIANAURA Scented Pillar Candie Set of 4 Candle ASIAN/AURA ASIAN/AURA Lemon Grass ASIANAURA Scented Lemon Grass Pillar Candle Scented Lemon Grass ASIANAURA Pillar Candle jented Lemon Pillar Candle Grass Scented Pillar Cand h
Labelled Output: 4 candela
Prediction:  

SyntaxError: invalid character '█' (U+2588) (3575373954.py, line 1)

In [None]:
Predicting:  40%|████      | 6/15 [00:05<00:09,  1.01s/it]
Input: Entity: width, Extracted Text: FOOD GRADE 304 STAINLESS STEEL You can take it with you when you travel. The compact design saves you a lot of space when packing. Chenpi Pu'er Tea Sweet honey water Warm lemon tea Health wolfberry water 205mm/8.07inch Espresso Baby's milk >  205mm/8.07nch
Labelled Output: 205.0 millimetre
Prediction: 8.07 inch
--------------------------