In [1]:
import os
from PIL import Image, UnidentifiedImageError
import torch
import torchvision.transforms as transforms
from tqdm import tqdm

In [2]:
IMAGES_FOLDER = 'E:/AI ML DL/Amazon_ML_Challenge/data/raw/images/'
PROCESSED_FOLDER = 'E:/AI ML DL/Amazon_ML_Challenge/data/processed/images/'
os.makedirs(PROCESSED_FOLDER, exist_ok=True)

In [3]:
def preprocess_image(image_path, output_path, size=(224, 224)):
    with Image.open(image_path) as img:
        img_resized = img.resize(size)
        img_resized.save(output_path)

In [4]:
for image_file in os.listdir(IMAGES_FOLDER):
    input_image_path = os.path.join(IMAGES_FOLDER, image_file)
    output_image_path = os.path.join(PROCESSED_FOLDER, image_file)
    preprocess_image(input_image_path, output_image_path)

In [None]:
def extract_image_features(image_path):
    with Image.open(image_path) as img:
        img_array = np.array(img)
        return img_array.flatten()  

sample_image_path = 'E:/AI ML DL/Amazon_ML_Challenge/data/processed/images/41-NCxNuBxL.jpg'
features = extract_image_features(sample_image_path)
print(features[:100])

In [7]:
import pandas as pd

sample_test_out_df = pd.read_csv('E:/AI ML DL/Amazon_ML_Challenge/data/raw/dataset/sample_test_out.csv')
sample_test_out_fail_df = pd.read_csv('E:/AI ML DL/Amazon_ML_Challenge/data/raw/dataset/sample_test_out_fail.csv')

In [None]:
sample_test_out_df.head()

In [None]:
sample_test_out_fail_df.head()

In [None]:
sample_test_out_df.info()

In [None]:
missing_predictions = sample_test_out_df[sample_test_out_df['prediction'].isnull()]
print(missing_predictions)

In [2]:
import os
from PIL import Image, ImageFile, UnidentifiedImageError
import torch
import torchvision.transforms as transforms
from tqdm import tqdm

In [3]:
TRAIN_IMAGES_FOLDER = r'E:\AI ML DL\Amazon_ML_Challenge\data\raw\images\train'
TEST_IMAGES_FOLDER = r'E:\AI ML DL\Amazon_ML_Challenge\data\raw\images\test'
OUTPUT_TRAIN_TENSOR_FOLDER = r'E:\AI ML DL\Amazon_ML_Challenge\data\processed\tensors\train'
OUTPUT_TEST_TENSOR_FOLDER = r'E:\AI ML DL\Amazon_ML_Challenge\data\processed\tensors\test'

In [7]:
ImageFile.LOAD_TRUNCATED_IMAGES = True

transform = transforms.Compose([
    transforms.Resize((224, 224)), 
    transforms.ToTensor()
])

In [8]:
def save_batch_tensors_to_disk(batch_tensors, output_folder, batch_idx):
    """Saves each tensor in the batch to a file."""
    for idx, tensor in enumerate(batch_tensors):
        tensor_path = os.path.join(output_folder, f'tensor_batch_{batch_idx}_img_{idx}.pt')
        torch.save(tensor, tensor_path)
        print(f"Saved {tensor_path}")

In [9]:
def convert_and_save_images_to_tensors(images_folder, output_folder, batch_size=32):
    os.makedirs(output_folder, exist_ok=True)  # Ensure the output folder exists
    image_files = os.listdir(images_folder)
    
    for i in range(0, len(image_files), batch_size):
        batch_files = image_files[i:i + batch_size]
        batch_tensors = []

        for image_file in tqdm(batch_files, desc=f"Converting batch {i // batch_size + 1}"):
            image_path = os.path.join(images_folder, image_file)
            tensor_image = image_to_tensor(image_path)
            if tensor_image is not None:
                batch_tensors.append(tensor_image)

        if batch_tensors:
            batch_tensors = torch.stack(batch_tensors)
            save_batch_tensors_to_disk(batch_tensors, output_folder, i // batch_size)

In [None]:
convert_and_save_images_to_tensors(TRAIN_IMAGES_FOLDER, OUTPUT_TRAIN_TENSOR_FOLDER, batch_size=32)
print("Finished processing train images.")

In [None]:
convert_and_save_images_to_tensors(TEST_IMAGES_FOLDER, OUTPUT_TEST_TENSOR_FOLDER, batch_size=32)
print("Finished processing test images.")