In [None]:
pip install opendatasets --upgrade

Collecting opendatasets
  Downloading opendatasets-0.1.22-py3-none-any.whl.metadata (9.2 kB)
Downloading opendatasets-0.1.22-py3-none-any.whl (15 kB)
Installing collected packages: opendatasets
Successfully installed opendatasets-0.1.22


In [None]:
import opendatasets as od
dataset_url = 'https://www.kaggle.com/datasets/mnkbiswas/anime-face-with-eye-and-hair-color-tagged'
od.download(dataset_url)

Dataset URL: https://www.kaggle.com/datasets/mnkbiswas/anime-face-with-eye-and-hair-color-tagged
Downloading anime-face-with-eye-and-hair-color-tagged.zip to ./anime-face-with-eye-and-hair-color-tagged


100%|██████████| 457M/457M [00:00<00:00, 494MB/s]





In [None]:
import tarfile
import os

file_path = "/content/anime-face-with-eye-and-hair-color-tagged/data.tgz"        # your tgz file
extract_path = "/content/anime_dataset"  # folder to extract into

# Create folder if not exists
os.makedirs(extract_path, exist_ok=True)

# Extract .tgz file
with tarfile.open(file_path, "r:gz") as tar:
    tar.extractall(path=extract_path)

print("✅ Extraction completed!")
print("Files extracted:", os.listdir(extract_path)[:20])  # show first 20 files


  tar.extractall(path=extract_path)


✅ Extraction completed!
Files extracted: ['data']


In [None]:
# Only needed in Colab
!pip install tensorflow pandas pillow tqdm

import os
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam

print("✅ TensorFlow version:", tf.__version__)


✅ TensorFlow version: 2.19.0


In [None]:
csv_path = "/content/anime_dataset/data/tags.csv"
image_dir = "/content/anime_dataset/data/images"

df = pd.read_csv(csv_path, header=None)
df.columns = ["image_id", "tags"]
df["tags"] = df["tags"].str.lower()

# Extract hair & eye colors
df[["hair_color","eye_color"]] = df["tags"].str.extract(r"(\w+) hair (\w+) eyes")
df.dropna(subset=["hair_color","eye_color"], inplace=True)

df["image_name"] = df["image_id"].astype(str) + ".jpg"

# Map labels
hair2idx = {hair: idx for idx, hair in enumerate(df["hair_color"].unique())}
eye2idx = {eye: idx for idx, eye in enumerate(df["eye_color"].unique())}

df["hair_label"] = df["hair_color"].map(hair2idx).astype(int)
df["eye_label"] = df["eye_color"].map(eye2idx).astype(int)

num_hairs = len(hair2idx)
num_eyes = len(eye2idx)
cond_dim = num_hairs + num_eyes

print("Hair colors:", hair2idx)
print("Eye colors:", eye2idx)
print("Total condition dim:", cond_dim)
print("Number of images:", len(df))


Hair colors: {'aqua': 0, 'gray': 1, 'green': 2, 'orange': 3, 'red': 4, 'white': 5, 'black': 6, 'blonde': 7, 'blue': 8, 'brown': 9, 'pink': 10, 'purple': 11}
Eye colors: {'aqua': 0, 'black': 1, 'blue': 2, 'brown': 3, 'green': 4, 'orange': 5, 'pink': 6, 'purple': 7, 'red': 8, 'yellow': 9}
Total condition dim: 22
Number of images: 36740


In [None]:
IMG_SIZE = 64
BATCH_SIZE = 64

def load_image(img_name):
    path = os.path.join(image_dir, img_name)
    img = Image.open(path).convert("RGB").resize((IMG_SIZE, IMG_SIZE))
    img = np.array(img).astype("float32") / 127.5 - 1.0  # Normalize [-1,1]
    return img

def make_condition_vector(row):
    hair = np.zeros(num_hairs, dtype=np.float32)
    hair[row["hair_label"]] = 1
    eye = np.zeros(num_eyes, dtype=np.float32)
    eye[row["eye_label"]] = 1
    return np.concatenate([hair, eye], axis=0)

images = []
conditions = []

for _, row in tqdm(df.iterrows(), total=len(df)):
    try:
        img = load_image(row["image_name"])
        cond = make_condition_vector(row)
        images.append(img)
        conditions.append(cond)
    except:
        continue

images = np.array(images)
conditions = np.array(conditions)

dataset = tf.data.Dataset.from_tensor_slices((images, conditions))
dataset = dataset.shuffle(1000).batch(BATCH_SIZE, drop_remainder=True)
print("✅ Dataset ready:", images.shape, conditions.shape)


100%|██████████| 36740/36740 [00:18<00:00, 2019.55it/s]


✅ Dataset ready: (36740, 64, 64, 3) (36740, 22)


In [None]:
def build_generator(noise_dim, cond_dim):
    cond_input = layers.Input(shape=(cond_dim,))
    cond_emb = layers.Dense(32, activation="relu")(cond_input)

    noise_input = layers.Input(shape=(noise_dim,))
    x = layers.Concatenate()([noise_input, cond_emb])

    x = layers.Dense(8*8*256, use_bias=False)(x)
    x = layers.Reshape((8, 8, 256))(x)
    x = layers.BatchNormalization()(x); x = layers.ReLU()(x)

    x = layers.Conv2DTranspose(128, 4, strides=2, padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x); x = layers.ReLU()(x)

    x = layers.Conv2DTranspose(64, 4, strides=2, padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x); x = layers.ReLU()(x)

    out = layers.Conv2DTranspose(3, 4, strides=2, padding="same", activation="tanh")(x)

    return tf.keras.Model([noise_input, cond_input], out, name="Generator")


In [None]:
def build_discriminator(cond_dim):
    img_input = layers.Input(shape=(64,64,3))
    cond_input = layers.Input(shape=(cond_dim,))

    cond_map = layers.Dense(64*64, activation="relu")(cond_input)
    cond_map = layers.Reshape((64,64,1))(cond_map)

    x = layers.Concatenate()([img_input, cond_map])

    x = layers.Conv2D(64, 4, strides=2, padding="same")(x); x = layers.LeakyReLU(0.2)(x)
    x = layers.Conv2D(128, 4, strides=2, padding="same")(x); x = layers.LeakyReLU(0.2)(x)
    x = layers.Conv2D(256, 4, strides=2, padding="same")(x); x = layers.LeakyReLU(0.2)(x)

    x = layers.Flatten()(x)
    out = layers.Dense(1, activation="sigmoid")(x)

    return tf.keras.Model([img_input, cond_input], out, name="Discriminator")


In [None]:
noise_dim = 100
generator = build_generator(noise_dim, cond_dim)
discriminator = build_discriminator(cond_dim)

bce = tf.keras.losses.BinaryCrossentropy()
opt_G = Adam(0.0002, beta_1=0.5)
opt_D = Adam(0.0002, beta_1=0.5)

@tf.function
def train_step(real_img, cond):
    noise = tf.random.normal([BATCH_SIZE, noise_dim])

    # Train Discriminator
    with tf.GradientTape() as tape_D:
        fake_img = generator([noise, cond], training=True)
        real_out = discriminator([real_img, cond], training=True)
        fake_out = discriminator([fake_img, cond], training=True)

        d_loss_real = bce(tf.ones_like(real_out), real_out)
        d_loss_fake = bce(tf.zeros_like(fake_out), fake_out)
        d_loss = d_loss_real + d_loss_fake

    grads_D = tape_D.gradient(d_loss, discriminator.trainable_variables)
    opt_D.apply_gradients(zip(grads_D, discriminator.trainable_variables))

    # Train Generator
    noise = tf.random.normal([BATCH_SIZE, noise_dim])
    with tf.GradientTape() as tape_G:
        fake_img = generator([noise, cond], training=True)
        fake_out = discriminator([fake_img, cond], training=True)
        g_loss = bce(tf.ones_like(fake_out), fake_out)

    grads_G = tape_G.gradient(g_loss, generator.trainable_variables)
    opt_G.apply_gradients(zip(grads_G, generator.trainable_variables))

    return d_loss, g_loss

def train(dataset, epochs=5):
    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")
        for real_img, cond in tqdm(dataset):
            d_loss, g_loss = train_step(real_img, cond)
        print(f"✅ Epoch {epoch+1} | D_loss: {d_loss.numpy():.4f}, G_loss: {g_loss.numpy():.4f}")


In [None]:
train(dataset, epochs=20)

os.makedirs("models", exist_ok=True)
generator.save("models/generator_CGAN.h5")
print("✅ Generator saved at models/generator_CGAN.h5")



Epoch 1/20


100%|██████████| 574/574 [00:35<00:00, 16.01it/s]


✅ Epoch 1 | D_loss: 1.0509, G_loss: 1.4792

Epoch 2/20


100%|██████████| 574/574 [00:30<00:00, 18.59it/s]


✅ Epoch 2 | D_loss: 0.9579, G_loss: 1.2410

Epoch 3/20


100%|██████████| 574/574 [00:30<00:00, 18.72it/s]


✅ Epoch 3 | D_loss: 1.1683, G_loss: 1.7398

Epoch 4/20


100%|██████████| 574/574 [00:30<00:00, 18.55it/s]


✅ Epoch 4 | D_loss: 0.9149, G_loss: 1.1144

Epoch 5/20


100%|██████████| 574/574 [00:30<00:00, 18.62it/s]


✅ Epoch 5 | D_loss: 1.2779, G_loss: 1.5369

Epoch 6/20


100%|██████████| 574/574 [00:30<00:00, 18.65it/s]


✅ Epoch 6 | D_loss: 0.9644, G_loss: 1.2400

Epoch 7/20


100%|██████████| 574/574 [00:40<00:00, 14.02it/s]


✅ Epoch 7 | D_loss: 0.9352, G_loss: 1.6072

Epoch 8/20


100%|██████████| 574/574 [00:31<00:00, 18.50it/s]


✅ Epoch 8 | D_loss: 1.2687, G_loss: 0.7346

Epoch 9/20


100%|██████████| 574/574 [00:31<00:00, 18.49it/s]


✅ Epoch 9 | D_loss: 1.2503, G_loss: 1.2792

Epoch 10/20


100%|██████████| 574/574 [00:30<00:00, 18.72it/s]


✅ Epoch 10 | D_loss: 0.8905, G_loss: 1.1855

Epoch 11/20


100%|██████████| 574/574 [00:30<00:00, 18.62it/s]


✅ Epoch 11 | D_loss: 1.0071, G_loss: 1.1824

Epoch 12/20


100%|██████████| 574/574 [00:30<00:00, 18.60it/s]


✅ Epoch 12 | D_loss: 1.4253, G_loss: 1.7229

Epoch 13/20


100%|██████████| 574/574 [00:30<00:00, 18.65it/s]


✅ Epoch 13 | D_loss: 1.0394, G_loss: 1.3815

Epoch 14/20


100%|██████████| 574/574 [00:30<00:00, 18.67it/s]


✅ Epoch 14 | D_loss: 1.2304, G_loss: 0.8940

Epoch 15/20


100%|██████████| 574/574 [00:30<00:00, 18.65it/s]


✅ Epoch 15 | D_loss: 1.0907, G_loss: 1.3877

Epoch 16/20


100%|██████████| 574/574 [00:30<00:00, 18.63it/s]


✅ Epoch 16 | D_loss: 0.9336, G_loss: 1.4606

Epoch 17/20


100%|██████████| 574/574 [00:30<00:00, 18.63it/s]


✅ Epoch 17 | D_loss: 0.9894, G_loss: 0.6598

Epoch 18/20


100%|██████████| 574/574 [00:30<00:00, 18.64it/s]


✅ Epoch 18 | D_loss: 0.8725, G_loss: 1.2998

Epoch 19/20


100%|██████████| 574/574 [00:30<00:00, 18.63it/s]


✅ Epoch 19 | D_loss: 0.9725, G_loss: 0.7978

Epoch 20/20


100%|██████████| 574/574 [00:30<00:00, 18.63it/s]


✅ Epoch 20 | D_loss: 0.8701, G_loss: 0.8582
✅ Generator saved at models/generator_CGAN.h5


In [None]:
generator.save("models/generator_tf.h5")

