In [23]:
import os
import numpy as np
import pandas as pd
import re
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler
from transformers import LayoutLMTokenizer, LayoutLMForTokenClassification
import pytesseract
from PIL import Image
import requests
from io import BytesIO
from albumentations import Compose, Resize, RandomCrop, RandomBrightnessContrast
import torch  # For CUDA
from sklearn.model_selection import train_test_split, StratifiedKFold
import mlflow  # For experiment tracking
import matplotlib.pyplot as plt
import sys
import os

# Assuming your script is in the same directory as src
sys.path.append(os.path.abspath("D:\Projects\Amazon-ML-Challenge-Team-AutoGen-\student_resource 3\src"))

from utils import download_images  # Assuming this function is defined in utils.py
from constants import entity_unit_map, allowed_units  # Importing the unit map

  sys.path.append(os.path.abspath("D:\Projects\Amazon-ML-Challenge-Team-AutoGen-\student_resource 3\src"))


In [24]:
TRAIN_CSV = 'dataset/train.csv'
TEST_CSV = 'dataset/test.csv'

In [25]:
def download_images(image_links, save_dir='images'):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    image_paths = []
    for i, link in enumerate(image_links):
        response = requests.get(link)
        image = Image.open(BytesIO(response.content))
        image_path = os.path.join(save_dir, f'image_{i}.jpg')
        image.save(image_path)
        image_paths.append(image_path)
    return image_paths

In [58]:
from PIL import Image
import numpy as np
import albumentations as A

def preprocess_images(image_paths, target_size=(224, 224)):
    augment = A.Compose([
        A.Resize(height=target_size[0], width=target_size[1]),
        A.RandomBrightnessContrast(p=0.2)
    ])
    
    images = []
    for path in image_paths:
        # Open the image, convert to RGB, and convert to numpy array
        image = Image.open(path).convert('RGB')
        image = np.array(image)
        
        # Apply the augmentations
        augmented = augment(image=image)
        images.append(augmented['image'])
    
    return np.array(images)


In [59]:
def extract_text_from_images(image_paths):
    texts = []
    for path in image_paths:
        image = Image.open(path)
        text = pytesseract.image_to_string(image)
        texts.append(text)
    return texts

In [60]:
unit_abbreviations = {
    # For 'item_weight' and 'maximum_weight_recommendation'
    'gram': ['g', 'gr', 'gm', 'grams', 'grm'],
    'kilogram': ['kg', 'kilograms', 'kgs'],
    'milligram': ['mg', 'milligrams', 'mgs'],
    'microgram': ['µg', 'mcg', 'micrograms'],
    'ounce': ['oz', 'ounces', 'ozs'],
    'pound': ['lb', 'lbs', 'pounds'],
    'ton': ['t', 'tons', 'tonne', 'tonnes'],

    # For 'item_volume'
    'millilitre': ['ml', 'milliliters', 'millilitres'],
    'litre': ['l', 'lit', 'liters', 'litres'],
    'cubic_centimetre': ['cc', 'cm³', 'cubic centimeters', 'cubic centimetres'],
    'cubic_metre': ['m³', 'cubic meters', 'cubic metres'],
    'gallon': ['gal', 'gallons'],
    'quart': ['qt', 'quarts'],
    'pint': ['pt', 'pints'],
    'cup': ['c', 'cups'],

    # For 'voltage'
    'volt': ['v', 'volts'],
    'kilovolt': ['kv', 'kilovolts'],
    'millivolt': ['mv', 'millivolts'],

    # For 'wattage'
    'watt': ['w', 'watts'],
    'kilowatt': ['kw', 'kilowatts'],
    'megawatt': ['mw', 'megawatts'],
    'gigawatt': ['gw', 'gigawatts'],

    # For 'height', 'depth', and 'width'
    'millimetre': ['mm', 'millimeters', 'millimetres'],
    'centimetre': ['cm', 'centimeters', 'centimetres'],
    'metre': ['m', 'meters', 'metres'],
    'kilometre': ['km', 'kilometers', 'kilometres'],
    'inch': ['in', 'inches'],
    'foot': ['ft', 'feet'],
    'yard': ['yd', 'yards'],
    'mile': ['mi', 'miles'],

    # Other common units
    'degree_celsius': ['°C', 'C', 'degrees Celsius'],
    'degree_fahrenheit': ['°F', 'F', 'degrees Fahrenheit'],
    'calorie': ['cal', 'calories'],
    'kilocalorie': ['kcal', 'kcals'],
    'joule': ['j', 'joules'],
    'pascal': ['Pa', 'pascals'],
    'bar': ['bar', 'bars'],
    'psi': ['psi', 'pounds per square inch'],
    'newton': ['N', 'newtons'],
    'fluid_ounce': ['fl oz', 'fluid ounces'],
}

def standardize_unit(value, entity_name):
    for unit, abbreviations in unit_abbreviations.items():
        for abbr in abbreviations:
            if re.search(r'\b' + re.escape(abbr) + r'\b', value):
                standardized_value = re.sub(abbr, unit, value)
                return standardized_value
    return value  # If no abbreviation found, return the original value


In [61]:
def initialize_resnet_model(num_classes):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    
    # Mixed Precision
    policy = tf.keras.mixed_precision.Policy('mixed_float16')
    tf.keras.mixed_precision.set_global_policy(policy)
    
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [62]:
def initialize_layoutlm_model(num_labels):
    tokenizer = LayoutLMTokenizer.from_pretrained('microsoft/layoutlm-base-uncased')
    model = LayoutLMForTokenClassification.from_pretrained('microsoft/layoutlm-base-uncased', num_labels=num_labels)
    return tokenizer, model

In [63]:
def predict_with_resnet(model, images):
    predictions = model.predict(images)
    return np.argmax(predictions, axis=1)

def predict_with_layoutlm(model, tokenizer, texts):
    encodings = tokenizer(texts, truncation=True, padding=True, return_tensors='pt')
    outputs = model(**encodings)
    return np.argmax(outputs.logits.detach().numpy(), axis=1)

In [64]:
def generate_predictions(test_df, resnet_model, layoutlm_model, tokenizer):
    image_links = test_df['image_link'].tolist()
    image_paths = download_images(image_links)
    images = preprocess_images(image_paths)
    
    # Feature Extraction with ResNet
    resnet_predictions = predict_with_resnet(resnet_model, images)
    
    # Text Extraction and LayoutLM Predictions
    texts = extract_text_from_images(image_paths)
    cleaned_texts = [standardize_unit(text, None) for text in texts]  # Pass None for entity_name if not needed
    layoutlm_predictions = predict_with_layoutlm(layoutlm_model, tokenizer, cleaned_texts)
    
    # Combine predictions
    predictions = [f"{resnet_pred} {layoutlm_pred}" for resnet_pred, layoutlm_pred in zip(resnet_predictions, layoutlm_predictions)]
    return predictions

In [68]:
def cumulative_training(train_df, model_save_path, batch_size=100, max_batches=1500):
    best_accuracy = 0
    best_model = None

    for batch in range(1, max_batches + 1):
        print(f"Processing batch {batch}...")
        
        # Download and preprocess images for the current batch
        image_links = train_df['image_link'].head(batch * batch_size).tolist()
        image_paths = download_images(image_links)
        images = preprocess_images(image_paths)
        labels = train_df['entity_value'].head(batch * batch_size).values
        
        # Ensure labels are integers
        labels = labels.astype(np.int32)
        
        # Create TensorFlow dataset
        dataset = tf.data.Dataset.from_tensor_slices((images, labels))
        dataset = dataset.shuffle(buffer_size=1000).batch(batch_size)
        
        # Split dataset into training and validation sets
        dataset_size = batch * batch_size
        validation_size = int(0.2 * dataset_size)
        train_size = dataset_size - validation_size
        
        train_dataset = dataset.take(train_size)
        validation_dataset = dataset.skip(train_size)
        
        # Initialize and compile the model
        model = initialize_resnet_model(num_classes=30)
        
        # Learning Rate Scheduling
        def lr_scheduler(epoch, lr):
            if epoch < 10:
                return lr
            else:
                return lr * tf.math.exp(-0.1)
        
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
            ModelCheckpoint(model_save_path, save_best_only=True, monitor='val_accuracy'),
            LearningRateScheduler(lr_scheduler)
        ]
        
        # Ensure that the data passed to the model is in the correct format
        history = model.fit(
            train_dataset,
            epochs=5,
            validation_data=validation_dataset,
            callbacks=callbacks
        )
        
        # Evaluate model
        accuracy = max(history.history['val_accuracy'])
        print(f"Batch {batch} Accuracy: {accuracy:.4f}")
        
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = load_model(model_save_path)
    
    print(f"Best Accuracy: {best_accuracy:.4f}")
    return best_model


In [69]:
def save_predictions(test_df, predictions, submission_file='submission.csv'):
    test_df['prediction'] = predictions
    test_df.to_csv(submission_file, index=False)
    print(f"Predictions saved to {submission_file}")

In [67]:
if __name__ == "__main__":
    # Adjust NUM_CLASSES as needed
    NUM_CLASSES = 30  # Example number of classes, adjust based on your task
    model_save_path = 'best_model.keras'
    
    # Load datasets
    train_df = pd.read_csv(TRAIN_CSV)
    test_df = pd.read_csv(TEST_CSV)
    
    # Cumulative Training
    best_model = cumulative_training(train_df, model_save_path, batch_size=100)
    
    # Initialize LayoutLM and ResNet models
    tokenizer, layoutlm_model = initialize_layoutlm_model(num_labels=NUM_CLASSES)
    resnet_model = load_model(model_save_path, custom_objects={'ResNet50': ResNet50})
    
    # Generate predictions
    predictions = generate_predictions(test_df, resnet_model, layoutlm_model, tokenizer)
    
    # Save predictions
    save_predictions(test_df, predictions)


Processing batch 1...
Epoch 1/5


UnimplementedError: Graph execution error:

Detected at node compile_loss/sparse_categorical_crossentropy/Cast_1 defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "C:\Users\Sudee\AppData\Roaming\Python\Python312\site-packages\ipykernel_launcher.py", line 18, in <module>

  File "C:\Users\Sudee\AppData\Roaming\Python\Python312\site-packages\traitlets\config\application.py", line 1075, in launch_instance

  File "C:\Users\Sudee\AppData\Roaming\Python\Python312\site-packages\ipykernel\kernelapp.py", line 739, in start

  File "C:\Users\Sudee\AppData\Roaming\Python\Python312\site-packages\tornado\platform\asyncio.py", line 205, in start

  File "d:\python3.12\Lib\asyncio\base_events.py", line 641, in run_forever

  File "d:\python3.12\Lib\asyncio\base_events.py", line 1986, in _run_once

  File "d:\python3.12\Lib\asyncio\events.py", line 88, in _run

  File "C:\Users\Sudee\AppData\Roaming\Python\Python312\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue

  File "C:\Users\Sudee\AppData\Roaming\Python\Python312\site-packages\ipykernel\kernelbase.py", line 534, in process_one

  File "C:\Users\Sudee\AppData\Roaming\Python\Python312\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell

  File "C:\Users\Sudee\AppData\Roaming\Python\Python312\site-packages\ipykernel\ipkernel.py", line 362, in execute_request

  File "C:\Users\Sudee\AppData\Roaming\Python\Python312\site-packages\ipykernel\kernelbase.py", line 778, in execute_request

  File "C:\Users\Sudee\AppData\Roaming\Python\Python312\site-packages\ipykernel\ipkernel.py", line 449, in do_execute

  File "C:\Users\Sudee\AppData\Roaming\Python\Python312\site-packages\ipykernel\zmqshell.py", line 549, in run_cell

  File "C:\Users\Sudee\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 3075, in run_cell

  File "C:\Users\Sudee\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 3130, in _run_cell

  File "C:\Users\Sudee\AppData\Roaming\Python\Python312\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner

  File "C:\Users\Sudee\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 3334, in run_cell_async

  File "C:\Users\Sudee\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 3517, in run_ast_nodes

  File "C:\Users\Sudee\AppData\Roaming\Python\Python312\site-packages\IPython\core\interactiveshell.py", line 3577, in run_code

  File "C:\Users\Sudee\AppData\Local\Temp\ipykernel_25928\2534985517.py", line 11, in <module>

  File "C:\Users\Sudee\AppData\Local\Temp\ipykernel_25928\425488394.py", line 48, in cumulative_training

  File "d:\python3.12\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "d:\python3.12\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 320, in fit

  File "d:\python3.12\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 121, in one_step_on_iterator

  File "d:\python3.12\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 108, in one_step_on_data

  File "d:\python3.12\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 54, in train_step

  File "d:\python3.12\Lib\site-packages\keras\src\trainers\trainer.py", line 359, in _compute_loss

  File "d:\python3.12\Lib\site-packages\keras\src\trainers\trainer.py", line 327, in compute_loss

  File "d:\python3.12\Lib\site-packages\keras\src\trainers\compile_utils.py", line 611, in __call__

  File "d:\python3.12\Lib\site-packages\keras\src\trainers\compile_utils.py", line 652, in call

  File "d:\python3.12\Lib\site-packages\keras\src\losses\loss.py", line 56, in __call__

  File "d:\python3.12\Lib\site-packages\keras\src\tree\tree_api.py", line 148, in map_structure

  File "d:\python3.12\Lib\site-packages\keras\src\tree\optree_impl.py", line 79, in map_structure

  File "d:\python3.12\Lib\site-packages\optree\ops.py", line 747, in tree_map

  File "d:\python3.12\Lib\site-packages\keras\src\losses\loss.py", line 57, in <lambda>

  File "d:\python3.12\Lib\site-packages\keras\src\ops\core.py", line 822, in convert_to_tensor

  File "d:\python3.12\Lib\site-packages\keras\src\backend\tensorflow\core.py", line 132, in convert_to_tensor

Cast string to float is not supported
	 [[{{node compile_loss/sparse_categorical_crossentropy/Cast_1}}]] [Op:__inference_one_step_on_iterator_64225]