<a href="https://colab.research.google.com/github/thenarrator2/AmazonML/blob/main/ResNet50.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import io
import os
import pandas as pd
import numpy as np
import requests
from PIL import Image
from io import BytesIO

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.applications import ResNet50
import hashlib
from tensorflow.keras.callbacks import ReduceLROnPlateau

In [None]:
import pandas as pd
import numpy as np
from io import BytesIO
from sklearn.model_selection import train_test_split
import os
from PIL import Image
import hashlib
import requests
import logging
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import layers, Model

# Directory to save preprocessed images
processed_images_dir = 'processed_images'

if not os.path.exists(processed_images_dir):
    os.makedirs(processed_images_dir)

# Set up logging
logging.basicConfig(filename='image_processing.log', level=logging.INFO)

def load_data(file_path):
    df = pd.read_csv(file_path, sep=',')
    required_columns = ['image_link', 'group_id', 'entity_name', 'entity_value', 'text']
    for col in required_columns:
        if col not in df.columns:
            raise KeyError(f"Required column '{col}' not found in the dataset")
    return df

def process_image(image_url, target_size=(224, 224)):
    image_hash = hashlib.md5(image_url.encode()).hexdigest()
    image_filename = os.path.join(processed_images_dir, image_hash + '.npy')

    if os.path.exists(image_filename):
        logging.info(f"Loading cached image: {image_filename}")
        try:
            return np.load(image_filename)
        except Exception as e:
            logging.error(f"Error loading cached image {image_filename}: {e}")
            return np.zeros(target_size + (3,))
    else:
        try:
            response = requests.get(image_url, stream=True, timeout=10)
            response.raise_for_status()

            # Read the content into a BytesIO object
            img_data = BytesIO(response.content)

            # Try different modes to open the image
            for mode in ['RGB', 'RGBA', 'L']:
                try:
                    img = Image.open(img_data).convert(mode)
                    break
                except Exception as e:
                    logging.warning(f"Failed to open image in {mode} mode: {e}")
            else:
                raise IOError("Failed to open image in any mode")

            img = img.resize(target_size)
            img_array = np.array(img) / 255.0

            # Ensure the array has 3 channels
            if len(img_array.shape) == 2:
                img_array = np.stack((img_array,) * 3, axis=-1)
            elif img_array.shape[2] == 4:
                img_array = img_array[:,:,:3]

            np.save(image_filename, img_array)
            logging.info(f"Processed and cached image: {image_filename}")

            return img_array
        except requests.RequestException as e:
            logging.error(f"Error downloading image {image_url}: {e}")
        except IOError as e:
            logging.error(f"Error opening image {image_url}: {e}")
        except Exception as e:
            logging.error(f"Unexpected error processing image {image_url}: {e}")

        return np.zeros(target_size + (3,))

def process_text(text, max_length=100):
    if pd.isna(text):
        text = ""
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts([text])
    sequence = tokenizer.texts_to_sequences([text])
    return pad_sequences(sequence, maxlen=max_length)[0]

def encode_entities(df):
    entity_names = df['entity_name'].unique()
    entity_dict = {name: i for i, name in enumerate(entity_names)}
    df['entity_encoded'] = df['entity_name'].map(entity_dict)
    return df, len(entity_names)

def prepare_data(df):
    # Generate processed image paths
    df['processed_image_path'] = [os.path.join(processed_images_dir, f"{hashlib.md5(url.encode()).hexdigest()}.npy") for url in df['image_link']]

    # Load and preprocess images
    X_img = load_and_preprocess_images(df, image_dir=processed_images_dir)

    X_text = np.array([process_text(text) for text in df['text']])
    y = pd.get_dummies(df['entity_encoded']).values

    print(f"X_img shape: {X_img.shape}")
    print(f"X_text shape: {X_text.shape}")
    print(f"y shape: {y.shape}")
    return X_img, X_text, y

def load_and_preprocess_images(df, image_dir='processed_images', target_size=(224, 224)):
    images = []
    for idx, row in df.iterrows():
        image_path = row['processed_image_path']
        image_url = row['image_link']
        try:
            if os.path.exists(image_path):
                img_array = np.load(image_path)
            else:
                img_array = process_image(image_url, target_size)

            if img_array.shape == (target_size[0], target_size[1], 3):
                images.append(img_array)
            else:
                logging.warning(f"Image shape inconsistent for path: {image_path}, shape: {img_array.shape}")
                images.append(np.zeros(target_size + (3,)))
        except Exception as e:
            logging.error(f"Error loading/processing image {image_path}: {e}")
            images.append(np.zeros(target_size + (3,)))

    return np.array(images)

# Main execution
file_path = '/content/updated_dataset_with_ocr_results (1).csv'

try:
    df = load_data(file_path)
    df, num_entities = encode_entities(df)
    X_img, X_text, y = prepare_data(df)

    X_img_train, X_img_test, X_text_train, X_text_test, y_train, y_test = train_test_split(
        X_img, X_text, y, test_size=0.2, random_state=42
    )

    print(f"Training samples: {len(X_img_train)}")
    print(f"Test samples: {len(X_img_test)}")
    print(f"Number of entities: {num_entities}")

    np.save('X_img_train.npy', X_img_train)
    np.save('X_img_test.npy', X_img_test)
    np.save('X_text_train.npy', X_text_train)
    np.save('X_text_test.npy', X_text_test)
    np.save('y_train.npy', y_train)
    np.save('y_test.npy', y_test)

    print("Data preprocessing completed and saved.")

except KeyError as e:
    print(f"Error: {e}")
    print("Please check your dataset structure and ensure all required columns are present.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")
finally:
    print("Script execution completed.")

ERROR:root:Error opening image https://m.media-amazon.com/images/I/41hO04updoL.jpg: Failed to open image in any mode


In [None]:
# 1. Model Creation
def create_model(img_shape, text_shape, num_entities):
    # Image input branch
    img_input = layers.Input(shape=img_shape)
    base_model = ResNet50(weights='imagenet', include_top=False, input_tensor=img_input)
    x = layers.GlobalAveragePooling2D()(base_model.output)
    x = layers.Dense(256, activation='relu')(x)

    # Text input branch
    text_input = layers.Input(shape=(text_shape,))
    y = layers.Embedding(input_dim=10000, output_dim=100)(text_input)
    y = layers.LSTM(128)(y)
    y = layers.Dense(256, activation='relu')(y)

    # Combine branches
    combined = layers.concatenate([x, y])
    z = layers.Dense(128, activation='relu')(combined)
    z = layers.Dropout(0.5)(z)
    output = layers.Dense(num_entities, activation='sigmoid')(z)

    model = Model(inputs=[img_input, text_input], outputs=output)
    return model

# 2. Model Compilation
def compile_model(model):
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# 3. Model Training
def train_model(model, X_img_train, X_text_train, y_train, X_img_val, X_text_val, y_val, epochs=50, batch_size=32):
    lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

    history = model.fit(
        [X_img_train, X_text_train], y_train,
        validation_data=([X_img_val, X_text_val], y_val),
        epochs=epochs,
        batch_size=batch_size
    )
    return history

# Main execution
try:
    df = load_data(file_path)
    df, num_entities = encode_entities(df)
    X_img, X_text, y = prepare_data(df)

    X_img_train, X_img_test, X_text_train, X_text_test, y_train, y_test = train_test_split(
        X_img, X_text, y, test_size=0.2, random_state=42
    )

    img_shape = X_img_train.shape[1:]
    text_shape = X_text_train.shape[1]

    model = create_model(img_shape, text_shape, num_entities)
    model = compile_model(model)

    history = train_model(model, X_img_train, X_text_train, y_train, X_img_test, X_text_test, y_test)

    model.save('entity_extraction_model.h5')
    print("Model training completed and saved.")

except KeyError as e:
    print(f"Error: {e}")
    print("Please check your dataset structure and ensure all required columns are present.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")


In [None]:
def evaluate_model(model, X_img_test, X_text_test, y_test):
    y_pred = model.predict([X_img_test, X_text_test])
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true_classes = np.argmax(y_test, axis=1)

    print(classification_report(y_true_classes, y_pred_classes))
    return y_pred_classes, y_true_classes

def plot_confusion_matrix(y_true, y_pred, class_names):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.show()

def plot_training_history(history):
    plt.figure(figsize=(12, 4))
    plt.subplot(121)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(122)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

# Main execution
try:
    # Load the model and test data
    model = tf.keras.models.load_model('entity_extraction_model.h5')
    X_img_test = np.load('X_img_test.npy')
    X_text_test = np.load('X_text_test.npy')
    y_test = np.load('y_test.npy')

    # Load original dataframe to get entity names
    df = pd.read_csv('/content/updated_dataset_with_ocr_results (1).csv', sep=',')
    class_names = df['entity_name'].unique()

    y_pred_classes, y_true_classes = evaluate_model(model, X_img_test, X_text_test, y_test)
    plot_confusion_matrix(y_true_classes, y_pred_classes, class_names)

    # Note: We can't plot training history here as it's not saved.
    # If you want to plot it, you need to save the history in the training step.

    print("Model evaluation completed.")

except Exception as e:
    print(f"An error occurred during model evaluation: {e}")