EAST MODEL Implementation

In [3]:
!pip install opencv-python pytesseract




In [4]:
!pip install imutils

import numpy as np
import cv2

Collecting imutils
  Downloading imutils-0.5.4.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: imutils
  Building wheel for imutils (setup.py) ... [?25ldone
[?25h  Created wheel for imutils: filename=imutils-0.5.4-py3-none-any.whl size=25837 sha256=8bf526087d1f5fa5f803c827586fce14a8aa7a37a105517950467092b6a92b9f
  Stored in directory: /root/.cache/pip/wheels/85/cf/3a/e265e975a1e7c7e54eb3692d6aa4e2e7d6a3945d29da46f2d7
Successfully built imutils
Installing collected packages: imutils
Successfully installed imutils-0.5.4


IMAGE PROCESSING

In [5]:
from PIL import Image

In [6]:
def denoise_image(img):
    # Apply a Gaussian Blur with a kernel size of (5, 5)
    denoised = cv2.GaussianBlur(img, (5, 5), 0)
    return denoised

def deblur_image(img,strength):
    kernel = np.array([[0, -strength, 0],
                       [-strength, 1+4*strength, -strength],
                       [0, -strength, 0]])
    deblurred = cv2.filter2D(img, -1, kernel)
    return deblurred
# Step 3: Enlarge using PIL’s LANCZOS interpolation
def enlarge_image(img, new_width, new_height):
    img_pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

    enlarged = img_pil.resize((new_width, new_height), Image.LANCZOS)

    enlarged_cv2 = cv2.cvtColor(np.array(enlarged), cv2.COLOR_RGB2BGR)
    return enlarged_cv2

# Step 4: Sharpen the enlarged image (less intense sharpening)
def sharpen_image(img,strength):
    kernel = np.array([[0, -strength, 0],
                       [-strength, 1+4*strength, -strength],
                       [0, -strength, 0]])
    sharpened = cv2.filter2D(img, -1, kernel)
    return sharpened

# Step 5: Adjust contrast using CLAHE (less aggressive)
def adjust_contrast(img):
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    clahe = cv2.createCLAHE(clipLimit=0.5, tileGridSize=(8, 8))  # Lower contrast adjustment
    lab[:, :, 0] = clahe.apply(lab[:, :, 0])
    contrast_enhanced = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
    return contrast_enhanced

def add_padding(img, padding_size):
    padded_image = cv2.copyMakeBorder(
        img, padding_size, padding_size, padding_size, padding_size,
        cv2.BORDER_REPLICATE
    )
    h, w, c = padded_image.shape

    for i in range(padding_size):
        padded_image[:, i] =padded_image[:, padding_size+2]

    return padded_image


# Full pipeline
def enhance_image(image, new_width, new_height,s):
    # Step 1: Denoise
    denoised_img = denoise_image(image)

    # Step 2: Deblur (Sharpening as basic deblurring)
    deblurred_img = deblur_image(denoised_img,s)

    # Step 3: Enlarge using PIL’s LANCZOS
    enlarged_img = enlarge_image(deblurred_img, new_width, new_height)

    # Step 4: Sharpen the enlarged image
    sharpened_img = sharpen_image(enlarged_img,s)

    # Step 5: Adjust contrast
    final_image = adjust_contrast(sharpened_img)

    f1 = add_padding(final_image, 3)
    f2 = add_padding(f1, 3)
    f3 = add_padding(f2, 4)

    return f3

In [7]:
def process_img(img):
    h,w =(img.shape)[:2]
    w_scale=1000/w
    h_scale=1000/h
    mini=min(w_scale,h_scale)
    new_height = int(h)  # Define the new height for enlargement
    new_width = int(w)  # Define the new width for enlargement
    if(mini>1):
      new_width=int(h*mini)
      new_height=int(w*mini)

    output=enhance_image(img, new_width, new_height,0.017)
    return output

In [8]:
def process_YOLO_sub_image(img, xmin, xmax, ymin, ymax, scale_factor=4):
    center_x = (xmin + xmax) // 2
    center_y = (ymin + ymax) // 2

    # Step 2: Calculate the width and height of the bounding box
    width = xmax - xmin
    height = ymax - ymin

    # Step 3: Enlarge the width and height by the scale factor
    new_width = int(width * scale_factor)
    new_height = int(height * scale_factor)

    # Step 4: Calculate the new xmin, xmax, ymin, ymax keeping the center constant
    new_xmin = center_x - new_width // 2
    new_xmax = center_x + new_width // 2
    new_ymin = center_y - new_height // 2
    new_ymax = center_y + new_height // 2

    # Ensure the new coordinates are within the image boundaries
    new_xmin = max(0, new_xmin)
    new_xmax = min(img.shape[1], new_xmax)
    new_ymin = max(0, new_ymin)
    new_ymax = min(img.shape[0], new_ymax)

    # Step 5: Extract the enlarged region from the original image
    enlarged_region = img[new_ymin:new_ymax, new_xmin:new_xmax]

    return enlarged_region

EasyOCR Implementation

In [None]:
!pip install easyocr
!pip install torch torchvision torchaudio

import torch
!pip install timm
import timm  # For loading the Swin Transformer
import cv2
import os
import torch.nn as nn

!pip install opencv-python-headless matplotlib

import easyocr

import matplotlib.pyplot as plt



In [None]:
reader = easyocr.Reader(['en'], gpu=True)

In [None]:
def detect_text(image_pixel):

    # Detect text from the image
    results = reader.readtext(image_pixel)

    # Loop over the detected text boxes and draw them on the image
    full = ''
    for (bbox, text, prob) in results:
        # Unpack the bounding box coordinates
        if text is not None:
            if prob >= 0.3:
                full += text + ' '
    return full
    


In [None]:
# # Initialize EasyOCR reader

# def recognize_text_with_easyocr(image_pixels):
#     result = reader.readtext(image_pixels)
#     """ Recognize text using EasyOCR """
#     for (bbox, text, prob) in result:
#         if text is not None:
#           if prob < 0.2 :
#             updated_image_pixels = image_processing(image_pixels)
#             result_new = reader.readtext(updated_image_pixels)
#             for (bbox, text, prob) in result_new:
#               if text is not None:
#                 if prob >= 0.2:
#                   return text
#           else:
#             return text
#         print("none detected")
#     return ""


In [None]:
YOLO Model

In [None]:
!pip3 install ultralytics

In [None]:
import cv2
import matplotlib.pyplot as plt

In [None]:
# Load the model weights in Kaggle
from ultralytics import YOLO

# Load the model from the uploaded file (adjust the path as necessary)
model = YOLO('/kaggle/input/yolo-model/best.pt')


In [None]:
def preprocess_image(image_pixels):
    # Convert image to RGB (YOLO expects RGB, OpenCV loads images in BGR)
    image_rgb = cv2.cvtColor(image_pixels, cv2.COLOR_BGR2RGB)
    
    # Resize image to YOLO model input size (640x640 by default for YOLOv8)
    image_resized = cv2.resize(image_rgb, (640, 640))
    
    # Convert to numpy array and normalize the pixel values (0-1)
    image_resized = image_resized / 255.0
    
    # Transpose the image to match (channels, height, width)
    image_transposed = image_resized.transpose(2, 0, 1)
    
    # Add batch dimension (1, channels, height, width)
    image_batched = np.expand_dims(image_transposed, axis=0)
    
    # Convert to PyTorch tensor
    image_tensor = torch.from_numpy(image_batched).float()
    
    return image_tensor

In [None]:
def run_YOLO(image_pixels):
    results = model(image_pixels)
    return results

In [None]:
def YOLO_output(image_pixels):
    
    class_dict = {0: 'depth', 1: 'height', 2: 'width'}
    prediction_dict = {'depth' : [], 'height' : [], 'width' : []}
    
    image_pixels = preprocess_image(image_pixels)
    
    results = run_YOLO(image_pixels)
    for result in results:
        boxes = result.boxes  # Get bounding boxes for each detected object
        for box in boxes:
            # Extract the coordinates and other details
            x1, y1, x2, y2 = box.xyxy[0]  # Get the bounding box coordinates
            
            x1_val = int(x1.cpu().item())
            y1_val = int(y1.cpu().item())
            x2_val = int(x2.cpu().item())
            y2_val = int(y2.cpu().item())
            
            img_new = process_YOLO_sub_image(image_pixels, x1_val, x2_val, y1_val, y2_val)
            
            class_id = box.cls[0]  # Get the class ID
            confidence = box.conf[0]  # Get the confidence score
            
            if class_id in model.names:
                class_name = model.names[class_id]
            else:
                class_name = "Unknown class"
            def is_image_array(array):
                # Check if the array is a 2D (grayscale) or 3D (color) image
                if array.ndim == 2:  # Grayscale image
                    return True
                elif array.ndim == 3:
                    # Check if the last dimension corresponds to RGB/BGR channels
                    if array.shape[2] == 3:  # RGB or BGR
                        return True
    
                return False

            def check_image_format(array):
                if not isinstance(array, np.ndarray):
                    return False, "Not a NumPy array"

                if is_image_array(array):
                    return True,
                else:
                    return False, "The array does not match common image formats."
            
            
            if check_image_format(img_new) and is_image_array(img_new):
                
                prediction_dict[class_name] = [detect_text(img_new), confidence]
        
    return prediction_dict

Question, Context, Answer extraction

In [None]:
import pandas as pd
import gc

# Iterate through all variables in the global namespace
for name in dir():
    obj = globals()[name]
    if isinstance(obj, pd.DataFrame):
        del globals()[name]

# Optional: Run garbage collector to free memory
gc.collect()


In [None]:
# prompt: create new table df_qna with columns 'question', 'context', 'answer' all of string type


df_qna = pd.DataFrame(columns=['question', 'context', 'answer', 'unit'], dtype=str)


In [None]:
df = pd.read_csv('/kaggle/input/train-data/train.csv')

In [None]:
df.head()

In [None]:
# prompt: create a new dataframe by sampling 50 images from df where df['entity_name'] == "item_weight"

import pandas as pd

# Sample 50 images where 'entity_name' is "item_weight"
df_sampled_weight = df[df['entity_name'] == "item_weight"].sample(n=100, random_state = 42)

df_sampled_max_weight = df[df['entity_name'] == "maximum_weight_recommendation"].sample(n=150, random_state = 42)

df_sampled_wattage = df[df['entity_name'] == "wattage"].sample(n=20, random_state = 42)

df_sampled_voltage = df[df['entity_name'] == "voltage"].sample(n=20, random_state = 42)


df_sampled_volume = df[df['entity_name'] == "item_volume"].sample(n=20, random_state = 42)


# Create a new dataframe with the sampled data


In [None]:
df_sampled_voltage.describe()

In [None]:

df_qna_weight = pd.DataFrame(columns=['question', 'context', 'answer', 'unit'], dtype=str)
df_qna_max_weight = pd.DataFrame(columns=['question', 'context', 'answer', 'unit'], dtype=str)
df_qna_wattage = pd.DataFrame(columns=['question', 'context', 'answer', 'unit'], dtype=str)
df_qna_voltage = pd.DataFrame(columns=['question', 'context', 'answer', 'unit'], dtype=str)
df_qna_volume = pd.DataFrame(columns=['question', 'context', 'answer', 'unit'], dtype=str)

In [None]:
# prompt: for each row in df_sampled, populate each row of the df_qna as follows :
# 'question' column = "What is the maximum weight capacity of the product?"
# 'context' column = row['image_link']
# 'answer' column = row['entity_value'].split()[0]
# '

for index, row in df_sampled_weight.iterrows():
  df_qna_weight = pd.concat([df_qna_weight, pd.DataFrame([{'question': "What is the weight value of the product?",
                          'context': row['image_link'],
                          'answer': row['entity_value'].split()[0],
                          'unit' : row['entity_value'].split()[-1]}])], ignore_index=True)

for index, row in df_sampled_max_weight.iterrows():
  df_qna_max_weight = pd.concat([df_qna_max_weight, pd.DataFrame([{'question': "What is the maximum weight capacity of the product?",
                          'context': row['image_link'],
                          'answer': row['entity_value'].split()[0],
                          'unit' : row['entity_value'].split()[-1]}])], ignore_index=True)

for index, row in df_sampled_wattage.iterrows():
  df_qna_wattage = pd.concat([df_qna_wattage, pd.DataFrame([{'question': "What is the wattage value of the product?",
                          'context': row['image_link'],
                          'answer': row['entity_value'].split()[0],
                          'unit' : row['entity_value'].split()[-1]}])], ignore_index=True)

for index, row in df_sampled_voltage.iterrows():
  df_qna_voltage = pd.concat([df_qna_voltage, pd.DataFrame([{'question': "What is the voltage value of the product?",
                          'context': row['image_link'],
                          'answer': row['entity_value'].split()[0], 
                          'unit' : row['entity_value'].split()[-1]}])], ignore_index=True)
for index, row in df_sampled_volume.iterrows():
  df_qna_volume = pd.concat([df_qna_volume, pd.DataFrame([{'question': "What is the volume value of the product?",
                          'context': row['image_link'],
                          'answer': row['entity_value'].split()[0], 
                          'unit' : row['entity_value'].split()[-2] + ' ' + row['entity_value'].split()[-1]}])], ignore_index=True)


In [None]:
df_qna_volume

Processing of Volume

In [None]:
import re
def remove_first_number(text):
    # Split the text into words
    words = text.split()
    
    # Check if the first word is a number
    if words and re.match(r'^\d+(\.\d+)?$', words[0]):  # Handles integers and floats
        # Remove the first word
        words = words[1:]
    
    # Join the remaining words back into a string
    return ' '.join(words)

# Apply the function to the 'xyz' column
df_qna_volume['unit'] = df_qna_volume['unit'].astype(str).apply(remove_first_number)

In [None]:
df_qna_volume

In [None]:
def is_number(string):
  try:
    float(string)
    return True
  except ValueError:
    return False

# Function to clean the dataframe
def clean_dataframe(df):
  df = df[df['answer'].apply(is_number)]  # Keep only rows where 'answer' is a number
  return df

# Apply the cleaning function to each dataframe
df_qna_weight = clean_dataframe(df_qna_weight)
df_qna_max_weight = clean_dataframe(df_qna_max_weight)
df_qna_wattage = clean_dataframe(df_qna_wattage)
df_qna_voltage = clean_dataframe(df_qna_voltage)
df_qna_volume = clean_dataframe(df_qna_volume)

In [None]:
entity_unit_map = {
  "width": {
    "centimetre",
    "foot",
    "millimetre",
    "metre",
    "inch",
    "yard"
  },
  "depth": {
    "centimetre",
    "foot",
    "millimetre",
    "metre",
    "inch",
    "yard"
  },
  "height": {
    "centimetre",
    "foot",
    "millimetre",
    "metre",
    "inch",
    "yard"
  },
  "item_weight": {
    "milligram",
    "kilogram",
    "microgram",
    "gram",
    "ounce",
    "ton",
    "pound"
  },
  "maximum_weight_recommendation": {
    "milligram",
    "kilogram",
    "microgram",
    "gram",
    "ounce",
    "ton",
    "pound"
  },
  "voltage": {
    "millivolt",
    "kilovolt",
    "volt"
  },
  "wattage": {
    "kilowatt",
    "watt"
  },
  "item_volume": {
    "cubic foot",
    "microlitre",
    "cup",
    "fluid ounce",
    "centilitre",
    "imperial gallon",
    "pint",
    "decilitre",
    "litre",
    "millilitre",
    "quart",
    "cubic inch",
    "gallon"
  }
}

In [None]:
item_volume_units = entity_unit_map["item_volume"]

# Create a filtered DataFrame where the 'unit' is NOT in the width units
df_qna_volume = df_qna_volume[df_qna_volume['unit'].isin(item_volume_units)]

wattage_units = entity_unit_map["wattage"]

# Create a filtered DataFrame where the 'unit' is NOT in the width units
df_qna_wattage = df_qna_wattage[df_qna_wattage['unit'].isin(wattage_units)]

item_weight_units = entity_unit_map["item_weight"]

# Create a filtered DataFrame where the 'unit' is NOT in the width units
df_qna_weight = df_qna_weight[df_qna_weight['unit'].isin(item_weight_units)]

max_weight_units = entity_unit_map["maximum_weight_recommendation"]

# Create a filtered DataFrame where the 'unit' is NOT in the width units
df_qna_max_weight = df_qna_max_weight[df_qna_max_weight['unit'].isin(max_weight_units)]

voltage_units = entity_unit_map["voltage"]

# Create a filtered DataFrame where the 'unit' is NOT in the width units
df_qna_voltage = df_qna_voltage[df_qna_voltage['unit'].isin(voltage_units)]



In [None]:
df_qna_weight

In [None]:
df_qna_volume.head()

In [None]:
df_qna_weight = df_qna_weight.drop_duplicates(subset=['context'], keep='first')
df_qna_max_weight = df_qna_max_weight.drop_duplicates(subset=['context'], keep='first')
df_qna_wattage = df_qna_wattage.drop_duplicates(subset=['context'], keep='first')
df_qna_voltage = df_qna_voltage.drop_duplicates(subset=['context'], keep='first')
df_qna_volume = df_qna_volume.drop_duplicates(subset=['context'], keep='first')

In [None]:
df_qna_max_weight.head()

In [None]:
import requests
from PIL import Image
import numpy as np
from io import BytesIO

def download_image_to_numpy(image_url):
    # Download the image from the given URL
    response = requests.get(image_url)

    # Check if the download was successful
    if response.status_code == 200:
        # Open the image from the downloaded content
        img = Image.open(BytesIO(response.content))

        # Convert the image to a numpy array (pixel array)
        img_array = np.array(img)

        return img_array
    else:
        raise Exception(f"Failed to download image, status code: {response.status_code}")


In [None]:

# Convert the NumPy array to a Pillow Imag

def get_context(image_url):
  np = download_image_to_numpy(image_url)
  processed_img = process_img(np)
  return detect_text(processed_img)

In [None]:
# # prompt: take the first row's context column from df_qna

# first_row_context = df_qna.iloc[0]['context']

# img = download_image_to_numpy(first_row_context)

# processed_images = crop_image(img)

# print(processed_images)


In [None]:
# prompt: appapily get_context function on df['context']

df_qna_weight['context_new'] = df_qna_weight['context'].apply(get_context)
df_qna_max_weight['context_new'] = df_qna_max_weight['context'].apply(get_context)
df_qna_voltage['context_new'] = df_qna_voltage['context'].apply(get_context)
df_qna_wattage['context_new'] = df_qna_wattage['context'].apply(get_context)
df_qna_volume['context_new'] = df_qna_volume['context'].apply(get_context)


In [None]:
df_qna_weight.head()

In [None]:
df_qna_max_weight.head()

In [None]:
df_qna_volume

In [None]:
df_qna_wattage

In [None]:
# prompt: in the 'answer' column, if the string is of the form somenumber.000.. then replace it with somenumber
def remove_trailing_zeros(value):
    # Check if the value is a string
    if not isinstance(value, str):
        raise ValueError("Input must be a string")

    # Split the string by the decimal point
    parts = value.split('.')

    # If there is no decimal point or there are no trailing zeros, return the original string
    if len(parts) == 1:
        return value
    else:
        # Remove trailing zeros from the fractional part
        integer_part, fractional_part = parts
        fractional_part = fractional_part.rstrip('0')

        # If the fractional part is empty after stripping zeros, return just the integer part
        if not fractional_part:
            return integer_part
        else:
            # Otherwise, return the combined integer and non-zero fractional part
            return f"{integer_part}.{fractional_part}"

df_qna_weight['answer'] = df_qna_weight['answer'].apply(remove_trailing_zeros)
df_qna_max_weight['answer'] = df_qna_max_weight['answer'].apply(remove_trailing_zeros)
df_qna_wattage['answer'] = df_qna_wattage['answer'].apply(remove_trailing_zeros)
df_qna_voltage['answer'] = df_qna_voltage['answer'].apply(remove_trailing_zeros)
df_qna_volume['answer'] = df_qna_volume['answer'].apply(remove_trailing_zeros)


In [None]:
# prompt: create new df_qna with concatenation of all rows of df_qna_weight, df_qna_max_weight, df_qna_wattage, df_qna_voltage (which increases rows of the df not columns).

df_qna = pd.concat([df_qna_weight, df_qna_max_weight, df_qna_wattage, df_qna_voltage, df_qna_volume], ignore_index=True)


In [None]:
df_qna.describe()


In [None]:
df_qna.head()

In [None]:
df_qna['context_new'] = df_qna['context_new'].str.lower()

In [None]:
df_qna.head()

In [None]:
df_qna.describe()

Question Answer Model Fine Tuning

In [None]:
# prompt: For each row in df_qna add to a list train_data the following dictionary:
# {'context':  row[''context_new'],
#  'qas': [{'id': '00001',
#    'is_impossible': True,
#    'question': row['question'],
#    'answers': [{'text': row['answer']', 'answer_start': start index of row['answer'] in row[''context_new']}]},

train_data = []
for link in df_qna['context'].unique():
    qas = []
    filtered_df = df_qna[df_qna['context'] == link]
    for index, row in filtered_df.iterrows():
      context = row['context_new']
      answer_start = context.find(str(row['answer']))
      if answer_start == -1:
        continue
      qas.append({
          'id': str(len(qas) + 1).zfill(5),
          'is_impossible': False,
          'question': row['question'],
          'answers': [{'text': row['answer'], 'answer_start': answer_start}]
      })
    train_data.append({
      'context': context,
      'qas': qas})


In [None]:
train_data[:2]

In [None]:
len(train_data)

In [None]:
print(df_qna['context'].nunique())

In [None]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(train_data, test_size=0.2, random_state=42)

In [None]:
import json

with open('qna_train.json', 'w', encoding='utf-8') as f:
    json.dump(train, f, ensure_ascii=False, indent=4)

with open('qna_test.json', 'w', encoding='utf-8') as f:
    json.dump(test, f, ensure_ascii=False, indent=4)


In [None]:
with open(r"qna_train.json", "r") as read_file:
    train = json.load(read_file)

with open(r'qna_test.json', 'r') as read_file:
    test = json.load(read_file)

Model Fine Tuning

In [None]:
!pip3 install simpletransformers

In [None]:
import logging
from simpletransformers.question_answering import QuestionAnsweringModel, QuestionAnsweringArgs

In [None]:
train_args = {
    'overwrite_output_dir': True,
    'evaluate_during_training' :True,
    "max_seq_length": 150,
    "num_train_epochs": 25, #25, after experimentations
    "evaluate_during_training_steps": 500,
    "save_model_every_epoch": False,
    "save_eval_checkpoints": False,
    "n_best_size":1, #batch_size is another important argument
    "train_batch_size": 20,
}

In [None]:
bert_model = QuestionAnsweringModel("bert",
                               "bert-base-uncased",
                               args = train_args,
                                use_cuda = True)

In [None]:
bert_model.train_model(train, eval_data = test, output_dir = '/kaggle/working')

Output Prediction

In [None]:
df_test = pd.read_csv('/kaggle/input/train-data/test.csv')

In [None]:
import re

# Define your mappings of units to standardized terms
unit_mapping = {
    r'\bcm\b|\bcms\b|\bc.m.\b': 'centimetre',
    r'\bft\b': 'foot',
    r'\bmm\b|\bm.m.\b': 'millimetre',
    r'\bm\b': 'metre',
    r'\bin\b|\b"\b': 'inch',
    r'\byd\b': 'yard',
    r"(\d+)\s*(cm|cms|c.m.)\b": r"'\1 centimetre'",
    r"(\d+)\s*(ft)\b": r"'\1 foot'",
    r"(\d+)\s*(mm|m.m.)\b": r"'\1 millimetre'",
    r"(\d+)\s*(m)\b": r"'\1 metre'",
    r"(\d+)\s*(in)\b": r"'\1 inch'",
    r"(\d+)\s*(yd)\b": r"'\1 yard'",
    r'(\bft3\b|\bcu ft\b)': r' cubic foot', 
    r'(\bμL\b|\bμl\b)': r' microlitre',
    r'\bcup\b': r' cup',
    r'(\bfl oz\b|\bfl\. oz\.\b)': r' fluid ounce',
    r'(\bimperial gallon\b)': r' imperial gallon',
    r'\bgal\b': r' gallon',
    r'(\bpint\b|\bpt\b)': r' pint',
    r'\bdl\b': r' decilitre',
    r'(\bl\b|\bls\b)': r' litre',
    r'(\bml\b|\bmls\b)': r' millilitre',
    r'\bqt\b': r' quart', 
    r'(\bcubic in\b|\bcu inch\b|\bcu in\b|\bcui\b|\bcu\. in\.\b)': r' cubic inch',
    r'(\d+)\s*(ft3|cu ft)\b': r'\1 cubic foot',
    r'(\d+)\s*(μL|μl)\b': r'\1 microlitre',
    r'(\d+)\s*(cup)\b': r'\1 cup',
    r'(\d+)\s*(fl oz|fl\. oz\.)\b': r'\1 fluid ounce',
    r'(\d+)\s*(imperial gallon)\b': r'\1 imperial gallon',
    r'(\d+)\s*(gal)\b': r'\1 gallon',
    r'(\d+)\s*(pint|pt)\b': r'\1 pint',
    r'(\d+)\s*(dl)\b': r'\1 decilitre',
    r'(\d+)\s*(l|ls)\b': r'\1 litre',
    r'(\d+)\s*(ml|mls)\b': r'\1 millilitre',
    r'(\d+)\s*(w)\b': r'\1 watt',
    r'(\d+)\s*(kw)\b': r'\1 kilowatt',
    r'\bw\b|\bws\b': 'watt',
    r'\bkw\b|\bkws\b': 'kilowatt',
    r'(\d+)\s*(v)\b': r'\1 volt',
    r'(\d+)\s*(mv)\b': r'\1 millivolt',
    r'(\d+)\s*(kv)\b': r'\1 kilovolt',
    r'\bv\b|\bvs\b': 'volt',
    r'\bmv\b|\bmvs\b': 'millivolt',
    r'\bkv\b|\bkvs\b': 'kilovolt',
    r'(\d+)\s*(kg|kgs)\b': r'\1 kilogram',
    r'(\d+)\s*(mg|mgs)\b': r'\1 milligram',
    r'(\d+)\s*(g|gm)\b': r'\1 gram',
    r'(\d+)\s*(oz|ozs)\b': r'\1 ounce',
    r'(\d+)\s*(lb|lbs)\b': r'\1 pound',
    r'(\d+)\s*(mcg|mcgs)\b': r'\1 microgram',
    r'(\d+)\s*(ton)\b': r'\1 ton',
    r'\bkg\b|\bkgs\b': 'kilogram',
    r'\bmg\b|\bmgs\b': 'milligram',
    r'\bg\b|\bgm\b': 'gram',
    r'\boz\b|\bozs\b': 'ounce',
    r'\blb\b|\blbs\b': 'pound',
    r'\bmcg\b|\bmcgs\b': 'microgram',
    r'\bton\b': 'ton',
}

def replace_units(text, mapping):
    for pattern, replacement in mapping.items():
        text = re.sub(pattern, replacement, text)
    return text

# Function to find numbers and units in separate lists
def find_numbers_and_units_after_processing(text, unit_mapping = unit_mapping):
    # Run the replace_units function first to standardize units
    processed_text = replace_units(text, unit_mapping)
    
    # Split the text into tokens (words and numbers)
    tokens = re.findall(r'\d+\.\d+|\d+|\w+|\S', processed_text)  # Find numbers, words, and symbols
    
    # Lists to hold the results
    numbers = []
    units = []
    number_unit_pairs = []

    # Iterate over tokens and find numbers followed by units
    i = 0
    while i < len(tokens):
        if re.match(r'^\d+(\.\d+)?$', tokens[i]):  # Check if the token is a number (including decimals)
            # Check if the next token is in the mapping output values (standardized units)
            if i + 1 < len(tokens) and tokens[i + 1] in unit_mapping.values():
                # If found, append the number and the unit to their respective lists
                numbers.append(tokens[i])
                units.append(tokens[i + 1])
                # Also, append the combined number-unit pair to the third list
                number_unit_pairs.append(f"{tokens[i]} {tokens[i + 1]}")
                i += 2  # Skip the next token as it's already processed
            else:
                i += 1  # Move to the next token
        else:
            i += 1  # Move to the next token if it's not a number
    return numbers, units, number_unit_pairs

def yolo_text(text):
    numbers, units, number_unit_pairs = find_numbers_and_units_after_processing(text)
    return numbers[0] + ' ' + units[0]
def qna_unit(context, value):
    try:
        value = int(value)
    except:
        return ''
    numbers, units, number_unit_pairs = find_numbers_and_units_after_processing(text)
    i=0
    for num in numbers:
        if num == value:
            return number_unit_pairs[i]
        i+=1
    return ''
            

In [None]:
def apply_yolo(row):
    img = download_image_to_numpy(row['image_link'])
    yolo_dict = YOLO_output(img)
    max_value = -float('inf')  # Initialize to negative infinity
    max_index = -1

    if len(yolo_dict.get(row['entity_name'].lower())) !=0 :
        for index, items in enumerate(yolo_dict[row['entity'].lower()]):
            if items[1] > max_value:
                max_value = array[1]
                max_index = index
    if max_index==-1:
        return ''
    
    else:
        return yolo_text(yolo_dict.get(row['entity'].lower())[index][0])
        
    

In [None]:
def set_question(row):
    if row['entity_name'] == 'item_weight':
        return "What is the weight value of the product?"
    elif row['entity_name'] == 'maximum_weight_recommendation':
        return "What is the maximum weight capacity of the product?"
    elif row['entity_name'] == 'wattage':
        return "What is the wattage value of the product?"
    elif row['entity_name'] == 'voltage':
        return "What is the voltage value of the product?"
    else:
        return "What is the volume value of the product?"

In [None]:
#Some changes to be made regarding how answer is returned. Must be returned as a string
def predict(context, question):
    to_predict = [
    {
        "context": str(context),
        "qas": [
            {
                "question": str(question),
                "id": "0",
            }
        ],
    }]
 
    answers, probabilities = bert_model.predict(to_predict, n_best_size=1)
    return qna_text(context, answers[0].get('answer')[0])

In [None]:
def apply_qna(row):
    context = get_context(row['image_link']).lower()
    print(context)
    question = set_question(row)
    if context is not None:
        result = predict(context,question)
        if result == "empty":
            return ""
        return result
    return ''
        

In [None]:
def assign_prediction(df):
    # Iterate over each row and apply the condition
    for index, row in df.iterrows():
        if row['entity_name'] in ['height', 'width', 'depth']:
            df.at[index, 'prediction'] = apply_yolo(row)
        else:
            df.at[index, 'prediction'] = apply_qna(row)
    
    return df

In [None]:
df_new = assign_prediction(df_test.head(20000))

In [None]:
df_new.head(15)

In [None]:
df_first_1000.head(25)

In [None]:
print(type(first_row))

In [None]:
predict(get_context(first_row['image_link']), set_question(first_row))

In [None]:
print(first_row['image_link'])

In [None]:
print(df_test_temp[df_test_temp['entity_name'].isin(['wattage'])])

In [None]:
df_test_temp['question'] = df_test_temp['image_link'].apply(get_context)


In [None]:
def answer_extract(string):
    return string.split()[0]

df_test_temp['answer'] = df_test_temp['entity_value'].apply(answer_extract)

In [None]:
df_test_temp.rename(columns={'question': 'context'}, inplace=True)

In [None]:
df_test_temp.head()

In [None]:
# Function to check if a string is a number
def is_number(string):
    try:
        float(string)
        return True
    except ValueError:
        return False

# Function to clean the dataframe
def clean_dataframe(df):
    df = df[df['answer'].apply(is_number)]  # Keep only rows where 'answer' is a number
    return df

# Apply the cleaning function to the DataFrame
df_test_temp = clean_dataframe(df_test_temp)


In [None]:
df_test_temp.head()

In [None]:
df_test_temp['answer'] = df_test_temp['answer'].apply(remove_trailing_zeros)

In [None]:
df_test_temp.head()

In [None]:
# Define a function to set the 'question' based on 'entity_name'
def set_question(row):
    if row['entity_name'] == 'item_weight':
        return "What is the weight value of the product?"
    elif row['entity_name'] == 'maximum_weight_recommendation':
        return "What is the maximum weight capacity of the product?"
    elif row['entity_name'] == 'wattage':
        return "What is the wattage value of the product?"
    elif row['entity_name'] == 'voltage':
        return "What is the voltage value of the product?"

# Apply the function to the DataFrame
df_test_temp['question'] = df_test_temp.apply(set_question, axis=1)



In [None]:
df_test_temp.head()

In [None]:
def predict(context, question):
    to_predict = [
    {
        "context": context,
        "qas": [
            {
                "question": question,
                "id": "0",
            }
        ],
    }]
 
    answers, probabilities = bert_model.predict(to_predict, n_best_size=1)
    return (answers, probabilities)

first = df_qna.iloc[9]



In [None]:
print(first['context'])

In [None]:
print(first['question'])

In [None]:
print(first['answer'])

In [None]:
print(predict(first['context_new'], first['question']))

In [None]:
df_test_temp.head()

In [None]:
df_test = pd.read_csv('/kaggle/input/train-data/test.csv')

In [None]:
df_test.head()

In [None]:
first_row_weight = df_test[df_test['entity_name'] == 'item_weight'].iloc[1]
first_row_max_weight = df_test[df_test['entity_name'] == 'maximum_weight_recommendation'].iloc[1]
first_row_wattage = df_test[df_test['entity_name'] == 'wattage'].iloc[1]
first_row_voltage = df_test[df_test['entity_name'] == 'voltage'].iloc[1]

In [None]:
context = get_context(first_row_voltage['image_link'])

In [None]:
print(context)

In [None]:

to_predict = [
    {
        "context": context,
        "qas": [
            {
                "question": "What is the wattage value of the product?",
                "id": "0",
            }
        ],
    }
]
 
answers, probabilities = bert_model.predict(to_predict, n_best_size=1)
print(answers, probabilities)

In [None]:
print(first_row_wattage['image_link'])

In [None]:
to_predict = [
    {
        "context": get_context(first_row_max_weight['image_link']),
        "qas": [
            {
                "question": "What is the maximum weight capacity of the product?",
                "id": "0",
            }
        ],
    }
]
 
answers, probabilities = model.predict(to_predict, n_best_size=1)
print(answers)

In [None]:
to_predict = [
    {
        "context": get_context(first_row_wattage['image_link']),
        "qas": [
            {
                "question": "What is the wattage value of the product?",
                "id": "0",
            }
        ],
    }
]
 
answers, probabilities = model.predict(to_predict, n_best_size=1)
print(answers)

In [None]:
to_predict = [
    {
        "context": get_context(first_row_voltage['image_link']),
        "qas": [
            {
                "question": "What is the voltage value of the product?",
                "id": "0",
            }
        ],
    }
]
 
answers, probabilities = model.predict(to_predict, n_best_size=1)
print(answers)