In [None]:
# prompt: remove sample_data folder

!rm -rf sample_data

In [None]:
# constants
RAW_DATASET_PATH = 'myntra_products.csv'
PROCESSED_DATASET_PATH = 'processed_dataset.csv'
# Set the directory where the model will be saved
MODEL_DIR = '/content/drive/machine-learning/vit_model'


In [None]:
import os
import torch
import pandas as pd
import requests
from PIL import Image
from io import BytesIO
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from transformers import AutoImageProcessor, AutoModelForImageClassification
from sklearn.preprocessing import LabelEncoder
from torch.optim.adamw import AdamW
from torch.optim.lr_scheduler import StepLR


In [None]:
# preprocess the dataset
import cv2
import numpy as np
from sklearn.cluster import KMeans
from tqdm import tqdm

# Parameters
BATCH_SIZE = 32
IMAGE_SIZE = (100, 100)  # Resize images to 100x100 for faster processing
CACHE_DIR = "cached_images"
os.makedirs(CACHE_DIR, exist_ok=True)

# Load dataset
df = pd.read_csv('myntra_products.csv')

def download_and_cache_image(image_url, cache_dir=CACHE_DIR):
    """Download image and cache it locally."""
    filename = os.path.join(cache_dir, os.path.basename(image_url).split('?')[0])
    if not os.path.exists(filename):
        try:
            response = requests.get(image_url)
            image = Image.open(BytesIO(response.content)).convert('RGB')
            image = image.resize(IMAGE_SIZE)
            image.save(filename)
        except Exception as e:
            print(f"Error downloading {image_url}: {e}")
            return None
    return filename

def get_dominant_color(image_path, k=4):
    """Use KMeans to find the dominant color."""
    image = cv2.imread(image_path)
    if image is None:
        return None
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.reshape((image.shape[0] * image.shape[1], 3))

    # Use KMeans clustering
    clt = KMeans(n_clusters=k)
    clt.fit(image)

    # Find the largest cluster
    counts = np.bincount(clt.labels_)
    dominant_color = clt.cluster_centers_[counts.argmax()]

    # Convert to hex
    dominant_color_hex = "#{:02x}{:02x}{:02x}".format(int(dominant_color[0]), int(dominant_color[1]), int(dominant_color[2]))
    return dominant_color_hex

def batch_process_images(df, batch_size=BATCH_SIZE):
    """Batch process images and extract colors."""
    colors = []
    for start in tqdm(range(0, len(df), batch_size), desc="Processing Batches"):
        batch = df.iloc[start:start + batch_size]
        for _, row in batch.iterrows():
            image_url = row['image_url']
            # Download and cache image
            cached_image_path = download_and_cache_image(image_url)
            if cached_image_path:
                color = get_dominant_color(cached_image_path)
            else:
                color = "N/A"
            colors.append(color)
    return colors

# Add colors to the dataset
df['colors'] = batch_process_images(df)

# Save the updated dataset
df.to_csv('myntra_products_with_colors.csv', index=False)
print("Dataset updated with colors and saved successfully!")


In [None]:

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')


# Load dataset
df = pd.read_csv(PROCESSED_DATASET_PATH)

# Data Preparation
import torch
from sklearn.preprocessing import LabelEncoder, StandardScaler
from torch.nn import functional as F

# Update Dataset class to include color feature
class FashionDataset(Dataset):
    def __init__(self, dataframe, feature_extractor, transform=None):
        self.dataframe = dataframe
        self.feature_extractor = feature_extractor
        self.transform = transform
        self.label_encoder = LabelEncoder()
        self.dataframe['gender_encoded'] = self.label_encoder.fit_transform(self.dataframe['gender'])

        # Encode color feature
        self.scaler = StandardScaler()
        self.color_features = self.extract_color_features(self.dataframe['colors'])

    def extract_color_features(self, color_column):
        """Convert hex color to RGB and scale the values."""
        color_values = color_column.apply(lambda x: int(x[1:], 16) if isinstance(x, str) and x.startswith('#') else 0)
        rgb_values = [[(val >> 16) & 255, (val >> 8) & 255, val & 255] for val in color_values]
        return self.scaler.fit_transform(rgb_values)

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        image_url = row['image_url']
        color_features = torch.tensor(self.color_features[idx], dtype=torch.float)

        # Download and process the image
        response = requests.get(image_url)
        image = Image.open(BytesIO(response.content)).convert('RGB')

        # Apply transformations
        if self.transform:
            image = self.transform(image)

        # Encode the label
        label = row['gender_encoded']

        # Extract pixel values using feature extractor
        pixel_values = self.feature_extractor(images=image, return_tensors="pt").pixel_values

        return {
            'pixel_values': pixel_values.squeeze(),
            'color_features': color_features,
            'labels': torch.tensor(label, dtype=torch.long)
        }



model_name_or_path = 'google/vit-base-patch16-224-in21k'

# Initialize the feature extractor
feature_extractor = AutoImageProcessor.from_pretrained(model_name_or_path, use_fast=True)

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Create dataset and dataloader
dataset = FashionDataset(df, feature_extractor, transform=transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

# Model Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForImageClassification.from_pretrained(
    model_name_or_path,
    num_labels=len(df['gender'].unique())
)
model.to(device)

# Training setup
optimizer = AdamW(model.parameters(), lr=5e-5)
scheduler = StepLR(optimizer, step_size=1, gamma=0.1)


# Training Loop
epochs = 3
for epoch in range(epochs):
    model.train()
    total_loss = 0

    for batch in dataloader:
        pixel_values = batch['pixel_values'].to(device)
        color_features = batch['color_features'].to(device)
        labels = batch['labels'].to(device).float()

        # Concatenate image features with color features
        image_features = torch.cat((pixel_values.view(pixel_values.size(0), -1), color_features), dim=1)

        # Forward pass
        outputs = model(pixel_values=image_features, labels=labels)
        loss = criterion(outputs.logits, labels)
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    # Step the scheduler
    scheduler.step()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(dataloader)}")

# Save the model and feature extractor
if not os.path.exists(MODEL_DIR):
    os.makedirs(MODEL_DIR)

model.save_pretrained(MODEL_DIR)
feature_extractor.save_pretrained(MODEL_DIR)
print(f"Model saved successfully to {MODEL_DIR}!")

# To reload the model and feature extractor for further training
# model = AutoModelForImageClassification.from_pretrained(MODEL_DIR).to(device)
# feature_extractor = AutoImageProcessor.from_pretrained(MODEL_DIR)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3, Loss: 0.00016135239695124696
Epoch 2/3, Loss: 1.992046260662011e-08
Epoch 3/3, Loss: 1.5682986936882415e-08


OSError: [Errno 95] Operation not supported: '/content/drive/machine-learning'