In [None]:
from google.colab import files
uploaded = files.upload()

import zipfile
import os

zip_filename = next(iter(uploaded))
extract_dir = '/content/dataset'
with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)
print(f"Extracted to {extract_dir}")

import cv2
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from sklearn.utils import shuffle

classes = ['clear skin', 'dark spot', 'puffy eyes', 'wrinkles']  # exact folder names you provided
IMG_SIZE = 224
RANDOM_STATE = 42

def load_images(data_dir, classes):
    data, labels = [], []
    print("Loading images...")
    for idx, label in enumerate(classes):
        path = os.path.join(data_dir, label)
        if not os.path.exists(path):
            print(f"ERROR: Folder not found: {path}")
            continue
        print(f"Processing class: {label}")
        for img_name in os.listdir(path):
            img_path = os.path.join(path, img_name)
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
                data.append(img.astype('float32'))
                labels.append(idx)
            else:
                print(f"Warning: Could not read {img_name}")
    return np.array(data), np.array(labels)

X, y = load_images(extract_dir, classes)
X, y = shuffle(X, y, random_state=RANDOM_STATE)
y_encoded = to_categorical(y, num_classes=len(classes))

print(f"Data shape: {X.shape}")
print(f"Labels shape: {y_encoded.shape}")

label_names = [classes[i] for i in y]
plt.figure(figsize=(8, 6))
bars = sns.countplot(x=label_names, palette="viridis")
plt.title("Class Distribution")
plt.xlabel("Classes")
plt.ylabel("Number of images")
for bar in bars.patches:
    bars.annotate(int(bar.get_height()),
                  (bar.get_x() + bar.get_width()/2, bar.get_height()),
                  ha='center', va='bottom')
plt.tight_layout()
plt.show()

# Data augmentation example
datagen = ImageDataGenerator(
    rotation_range=25,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
sample_img = X[0].reshape((1,) + X[0].shape)
plt.figure(figsize=(12, 3))
for i, batch in enumerate(datagen.flow(sample_img, batch_size=1)):
    plt.subplot(1, 5, i+1)
    plt.imshow(batch[0] / 255.0)
    plt.title(f"Augmented {i+1}")
    plt.axis('off')
    if i == 4:
        break
plt.suptitle("Sample Augmentations")
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

# Save arrays
np.save('/content/X_data.npy', X)
np.save('/content/y_labels.npy', y_encoded)
print("Preprocessed data saved as '/content/X_data.npy' and '/content/y_labels.npy'")
