<a href="https://colab.research.google.com/github/shivansh9604/Amazon_Hackathon/blob/main/amazon_hack.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install pillow requests



In [None]:
import requests
from PIL import Image
from io import BytesIO
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import Sequence

class ImageURLDataGenerator(Sequence):
    def __init__(self, image_urls, labels=None, batch_size=32, target_size=(224, 224), shuffle=True, augmentation=None):
        self.image_urls = image_urls
        self.labels = labels
        self.batch_size = batch_size
        self.target_size = target_size
        self.shuffle = shuffle
        self.augmentation = augmentation
        self.indexes = np.arange(len(self.image_urls))
        self.on_epoch_end()

    def __len__(self):
        return len(self.image_urls) // self.batch_size

    def __getitem__(self, index):
        batch_indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        batch_image_urls = [self.image_urls[k] for k in batch_indexes]
        X = self._generate_X(batch_image_urls)

        if self.labels is not None:
            batch_labels = [self.labels[k] for k in batch_indexes]
            y = self._generate_y(batch_labels)
            return X, y
        else:
            return X

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def _generate_X(self, batch_image_urls):
        X = np.empty((self.batch_size, *self.target_size, 3))
        for i, url in enumerate(batch_image_urls):
            img = self._load_image_from_url(url)
            X[i,] = img
        return X

    def _generate_y(self, batch_labels):
        return np.array(batch_labels)

    def _load_image_from_url(self, url):
        try:
            response = requests.get(url)
            img = Image.open(BytesIO(response.content)).convert('RGB')
            img = img.resize(self.target_size)
            img = np.array(img) / 255.0  # Normalize pixel values to [0, 1]
        except Exception as e:
            print(f"Error loading image: {url} - {e}")
            img = np.zeros((*self.target_size, 3))  # Return a blank image if there's an issue
        return img


In [None]:
import pandas as pd
from sklearn.preprocessing import LabelBinarizer

# Load train and test CSV files
train_df = pd.read_csv('train.csv')  # Adjust the file path
test_df = pd.read_csv('test.csv')

# Extract image URLs and labels from train dataset
train_image_urls = train_df['image_link'].tolist()
train_labels = train_df['entity_name'].tolist()  # Or 'entity_value'

# One-hot encode labels
label_binarizer = LabelBinarizer()
train_labels_one_hot = label_binarizer.fit_transform(train_labels)

# Extract image URLs from test dataset (without labels)
test_image_urls = test_df['image_link'].tolist()

# Set batch size and image size
batch_size = 32
target_size = (224, 224)

# Create training and test data generators
train_generator = ImageURLDataGenerator(train_image_urls, train_labels_one_hot, batch_size=batch_size, target_size=target_size)
test_generator = ImageURLDataGenerator(test_image_urls, batch_size=batch_size, target_size=target_size, shuffle=False)


In [None]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model

# Load pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers for classification
x = Flatten()(base_model.output)
x = Dense(1024, activation='relu')(x)
predictions = Dense(train_generator[0][1].shape[1], activation='softmax')(x)  # Number of classes based on labels

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [None]:
# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=10,
    validation_data=None)


Epoch 1/10


  self._warn_if_super_not_called()


[1m  61/8245[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m22:35:28[0m 10s/step - accuracy: 0.2332 - loss: 31.1769

KeyboardInterrupt: 