In [1]:
%pip install google-cloud-vision





In [2]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from PIL import Image
from io import BytesIO
import requests
import re
from google.cloud import vision
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.callbacks import ModelCheckpoint


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Initialize Google Vision client
client = vision.ImageAnnotatorClient()

# Define constants
IMAGE_SIZE = (224, 224)
NUM_CLASSES = 8
BATCH_SIZE = 32
EPOCHS = 10
NUM_CLASSES = 8  # Adjust based on your dataset

In [5]:

# Define the mapping of categories to unit types
category_to_units = {
    'item_weight': ['gram', 'kilogram', 'milligram', 'microgram', 'ounce', 'pound', 'ton'],
    'item_volume': ['millilitre', 'litre', 'cubic_centimetre', 'cubic_metre', 'gallon', 'quart', 'pint', 'cup'],
    'voltage': ['volt', 'kilovolt', 'millivolt'],
    'wattage': ['watt', 'kilowatt', 'megawatt', 'gigawatt'],
    'maximum_weight_recommendation': ['gram', 'kilogram', 'milligram', 'microgram', 'ounce', 'pound', 'ton'],
    'height': ['millimetre', 'centimetre', 'metre', 'kilometre', 'inch', 'foot', 'yard', 'mile'],
    'depth': ['millimetre', 'centimetre', 'metre', 'kilometre', 'inch', 'foot', 'yard', 'mile'],
    'width': ['millimetre', 'centimetre', 'metre', 'kilometre', 'inch', 'foot', 'yard', 'mile']
}

In [6]:
# Function to map predicted category to units
def map_category_to_units(predicted_category):
    if predicted_category in category_to_units:
        return category_to_units[predicted_category]
    return []


In [7]:
def load_image_from_url(url):
    try:
        response = requests.get(url)
        img_array = np.asarray(bytearray(response.content), dtype=np.uint8)
        img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
        return img
    except Exception as e:
        print(f"Error loading image from URL: {e}")
        return None

# OCR function using Google Vision API
def extract_text_from_image_google_vision(image_path):
    with open(image_path, 'rb') as image_file:
        content = image_file.read()

    image = vision.Image(content=content)
    response = client.text_detection(image=image)
    texts = response.text_annotations
    if texts:
        return texts[0].description  # Extract the most prominent text
    return ""

In [8]:
# Function to process extracted text and identify numerical values with units
def process_extracted_text(text):
    match = re.search(r'(\d+\.?\d*)\s*([a-zA-Z]+)', text)
    if match:
        value = match.group(1)  # e.g., 500
        unit = match.group(2)  # e.g., g
        return value, unit
    return None, None

In [9]:
train_df = pd.read_csv('/content/drive/MyDrive/train.csv')
test_df = pd.read_csv('/content/drive/MyDrive/test.csv')

In [10]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit the LabelEncoder on the entity_name column
label_encoder.fit(train_df['entity_name'])

# Define the data generator function
def data_generator(df, batch_size):
    while True:
        for i in range(0, len(df), batch_size):
            batch_df = df.iloc[i:i + batch_size]
            images = []
            labels = []
            for _, row in batch_df.iterrows():
                img_url = row['image_link']
                img = load_image_from_url(img_url)
                if img is not None:
                    img_resized = cv2.resize(img, IMAGE_SIZE)
                    img_array = img_to_array(img_resized)
                    images.append(img_array)
                    labels.append(row['entity_name'])  # Store the string label

            images = np.array(images)

            # Encode string labels to integers
            encoded_labels = label_encoder.transform(labels)

            # Convert integer labels to categorical (one-hot encoded)
            categorical_labels = to_categorical(encoded_labels, num_classes=NUM_CLASSES)

            yield images, categorical_labels


In [17]:
train_df.shape

(10000, 4)

In [11]:
train_df=train_df.head(10000)

In [None]:
test_df.shape

(131187, 4)

In [12]:
test_df=test_df.head(10000)

In [13]:
# Load pre-trained ResNet50 model for image feature extraction
base_model = tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


In [14]:
# Freeze base model layers
for layer in base_model.layers:
    layer.trainable = False

In [15]:
# Define the combined model
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(NUM_CLASSES, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [16]:
# Define data generators for training and validation
train_generator = data_generator(train_df, BATCH_SIZE)
val_generator = data_generator(test_df, BATCH_SIZE)

In [18]:
from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint_path = "model_checkpoint.weights.h5"
checkpoint = ModelCheckpoint(checkpoint_path, save_weights_only=True, save_best_only=True, monitor='val_accuracy', mode='max')

history = model.fit(train_generator,
                    steps_per_epoch=len(train_df) // BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_data=val_generator,
                    validation_steps=len(test_df) // BATCH_SIZE,
                    callbacks=[checkpoint])


Epoch 1/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m741s[0m 2s/step - accuracy: 0.6982 - loss: 8.9016 - val_accuracy: 0.1666 - val_loss: 7.2162
Epoch 2/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m691s[0m 2s/step - accuracy: 0.8294 - loss: 0.7478 - val_accuracy: 0.1672 - val_loss: 8.5347
Epoch 3/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m695s[0m 2s/step - accuracy: 0.8622 - loss: 0.4608 - val_accuracy: 0.1688 - val_loss: 10.5319
Epoch 4/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m676s[0m 2s/step - accuracy: 0.8883 - loss: 0.3180 - val_accuracy: 0.1800 - val_loss: 11.0704
Epoch 5/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m649s[0m 2s/step - accuracy: 0.9003 - loss: 0.2851 - val_accuracy: 0.1805 - val_loss: 10.4253
Epoch 6/10
[1m312/312[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m636s[0m 2s/step - accuracy: 0.9047 - loss: 0.2636 - val_accuracy: 0.1849 - val_loss: 13.2601
Epoch 7/10
[1m312

In [19]:
# Print the final training and validation accuracy
final_train_accuracy = history.history['accuracy'][-1]  # Accuracy of the last epoch
final_val_accuracy = history.history['val_accuracy'][-1]  # Validation accuracy of the last epoch

print(f"Final Training Accuracy: {final_train_accuracy:.4f}")
print(f"Final Validation Accuracy: {final_val_accuracy:.4f}")


Final Training Accuracy: 0.9276
Final Validation Accuracy: 0.1832


In [21]:
def predict_image_category(image_url):
    # Load and preprocess the image
    img = load_image_from_url(image_url)
    if img is not None:
        img_resized = cv2.resize(img, IMAGE_SIZE)
        img_array = img_to_array(img_resized)
        img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

        # Predict using the model
        predictions = model.predict(img_array)
        predicted_class = np.argmax(predictions, axis=1)  # Get the index of the max prediction
        predicted_label = label_encoder.inverse_transform(predicted_class)[0]  # Convert back to label

        print(f"Predicted Category: {predicted_label}")
        return predicted_label
    else:
        print("Error loading image.")
        return None


In [22]:
def extract_text_tesseract(image_url):
    img = load_image_from_url(image_url)
    if img is not None:
        text = pytesseract.image_to_string(img)
        return text.strip()
    return ""


In [23]:
pip install pytesseract

Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Downloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.13


In [24]:
import pytesseract
from PIL import Image
import cv2

In [25]:
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'


In [26]:
def predict_and_compare(image_url):
    # Predict category using the model
    predicted_category = predict_image_category(image_url)

    # Extract text using Tesseract OCR
    tesseract_text = extract_text_tesseract(image_url)
    print(f"Tesseract OCR Extracted Text: {tesseract_text}")

    # Compare results
    print(f"Predicted Category: {predicted_category}")
    print(f"Tesseract Text: {tesseract_text}")


In [42]:
image_url = 'https://m.media-amazon.com/images/I/81poV0Le5lL.jpg'  # Replace with actual image URL
predict_and_compare(image_url)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Predicted Category: item_weight
Tesseract OCR Extracted Text: PRAKTISCH UND EFFIKTIV

fur eine Vielzahl an Grof&{en und Formen

r) BSE) eae Hochwertiges
mit Click-Verschluss g Polyester Gewebe

AL bl Ean eta
UV Stabil und mea cla ce

) ORV TEY ate rear y 4
danke sehr robustem /) wasserabweisende
600D Oxford Polyester Ya iw sxe silos [Ue Ae
Predicted Category: item_weight
Tesseract Text: PRAKTISCH UND EFFIKTIV

fur eine Vielzahl an Grof&{en und Formen

r) BSE) eae Hochwertiges
mit Click-Verschluss g Polyester Gewebe

AL bl Ean eta
UV Stabil und mea cla ce

) ORV TEY ate rear y 4
danke sehr robustem /) wasserabweisende
600D Oxford Polyester Ya iw sxe silos [Ue Ae


In [28]:
!apt-get install tesseract-ocr


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  tesseract-ocr-eng tesseract-ocr-osd
The following NEW packages will be installed:
  tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd
0 upgraded, 3 newly installed, 0 to remove and 49 not upgraded.
Need to get 4,816 kB of archives.
After this operation, 15.6 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-eng all 1:4.00~git30-7274cfa-1.1 [1,591 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-osd all 1:4.00~git30-7274cfa-1.1 [2,990 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr amd64 4.1.1-2.1build1 [236 kB]
Fetched 4,816 kB in 3s (1,743 kB/s)
Selecting previously unselected package tesseract-ocr-eng.
(Reading database ... 123597 files and directories currently installed.)
Preparing to unpack .../tesseract-ocr-

In [29]:
!pip install pytesseract




In [30]:
import pytesseract
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'


In [43]:
from tensorflow.keras.models import save_model

# Save the entire model to an HDF5 file
model_save_path = 'saved_model.h5'
model.save()

