In [48]:
!pip install google-cloud-vision




In [49]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from PIL import Image
from io import BytesIO
import requests
import re
from google.cloud import vision
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.callbacks import ModelCheckpoint


In [50]:
# Initialize Google Vision client
client = vision.ImageAnnotatorClient()

# Define constants
IMAGE_SIZE = (224, 224)
NUM_CLASSES = 8
BATCH_SIZE = 32
EPOCHS = 3
NUM_CLASSES = 8  # Adjust based on your dataset

In [51]:

# Define the mapping of categories to unit types
category_to_units = {
    'item_weight': ['gram', 'kilogram', 'milligram', 'microgram', 'ounce', 'pound', 'ton'],
    'item_volume': ['millilitre', 'litre', 'cubic_centimetre', 'cubic_metre', 'gallon', 'quart', 'pint', 'cup'],
    'voltage': ['volt', 'kilovolt', 'millivolt'],
    'wattage': ['watt', 'kilowatt', 'megawatt', 'gigawatt'],
    'maximum_weight_recommendation': ['gram', 'kilogram', 'milligram', 'microgram', 'ounce', 'pound', 'ton'],
    'height': ['millimetre', 'centimetre', 'metre', 'kilometre', 'inch', 'foot', 'yard', 'mile'],
    'depth': ['millimetre', 'centimetre', 'metre', 'kilometre', 'inch', 'foot', 'yard', 'mile'],
    'width': ['millimetre', 'centimetre', 'metre', 'kilometre', 'inch', 'foot', 'yard', 'mile']
}

In [52]:
# Function to map predicted category to units
def map_category_to_units(predicted_category):
    if predicted_category in category_to_units:
        return category_to_units[predicted_category]
    return []


In [53]:
def load_image_from_url(url):
    try:
        response = requests.get(url)
        img_array = np.asarray(bytearray(response.content), dtype=np.uint8)
        img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
        return img
    except Exception as e:
        print(f"Error loading image from URL: {e}")
        return None

# OCR function using Google Vision API
def extract_text_from_image_google_vision(image_path):
    with open(image_path, 'rb') as image_file:
        content = image_file.read()

    image = vision.Image(content=content)
    response = client.text_detection(image=image)
    texts = response.text_annotations
    if texts:
        return texts[0].description  # Extract the most prominent text
    return ""

In [54]:
# Function to process extracted text and identify numerical values with units
def process_extracted_text(text):
    match = re.search(r'(\d+\.?\d*)\s*([a-zA-Z]+)', text)
    if match:
        value = match.group(1)  # e.g., 500
        unit = match.group(2)  # e.g., g
        return value, unit
    return None, None

In [55]:
train_df = pd.read_csv('/content/drive/MyDrive/AMAZONN/student_resource 3/student_resource 3/dataset/train.csv')
test_df = pd.read_csv('/content/drive/MyDrive/AMAZONN/student_resource 3/student_resource 3/dataset/test.csv')

In [56]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit the LabelEncoder on the entity_name column
label_encoder.fit(train_df['entity_name'])

# Define the data generator function
def data_generator(df, batch_size):
    while True:
        for i in range(0, len(df), batch_size):
            batch_df = df.iloc[i:i + batch_size]
            images = []
            labels = []
            for _, row in batch_df.iterrows():
                img_url = row['image_link']
                img = load_image_from_url(img_url)
                if img is not None:
                    img_resized = cv2.resize(img, IMAGE_SIZE)
                    img_array = img_to_array(img_resized)
                    images.append(img_array)
                    labels.append(row['entity_name'])  # Store the string label

            images = np.array(images)

            # Encode string labels to integers
            encoded_labels = label_encoder.transform(labels)

            # Convert integer labels to categorical (one-hot encoded)
            categorical_labels = to_categorical(encoded_labels, num_classes=NUM_CLASSES)

            yield images, categorical_labels


In [57]:
train_df.shape

(263859, 4)

In [58]:
test_df.shape

(131187, 4)

In [59]:
# Load pre-trained ResNet50 model for image feature extraction
base_model = tf.keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3))

In [60]:
# Freeze base model layers
for layer in base_model.layers:
    layer.trainable = False

In [61]:
# Define the combined model
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(NUM_CLASSES, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [62]:
# Define data generators for training and validation
train_generator = data_generator(train_df, BATCH_SIZE)
val_generator = data_generator(test_df, BATCH_SIZE)

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint_path = "model_checkpoint.weights.h5"
checkpoint = ModelCheckpoint(checkpoint_path, save_weights_only=True, save_best_only=True, monitor='val_accuracy', mode='max')

history = model.fit(train_generator,
                    steps_per_epoch=len(train_df) // BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_data=val_generator,
                    validation_steps=len(test_df) // BATCH_SIZE,
                    callbacks=[checkpoint])


Epoch 1/3
[1m   2/8245[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m19:15:26[0m 8s/step - accuracy: 0.2656 - loss: 25.1086