# Hair Type Classification using VGG16

In [4]:
import numpy as np
import pandas as pd
import tensorflow as tf
import cv2
import os
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing import image_dataset_from_directory

## Load Cleaned Dataset

In [5]:
df = pd.read_csv("cleaned_dataset_paths.csv")
df.head()

Unnamed: 0,image_path,label,height,width,channels,aspect_ratio,brightness
0,C:\Users\nisa2\.cache\kagglehub\datasets\kavya...,straight,591,473,3,0.800338,114.276347
1,C:\Users\nisa2\.cache\kagglehub\datasets\kavya...,straight,177,284,3,1.60452,200.348731
2,C:\Users\nisa2\.cache\kagglehub\datasets\kavya...,straight,279,181,3,0.648746,145.681063
3,C:\Users\nisa2\.cache\kagglehub\datasets\kavya...,straight,202,249,3,1.232673,157.705575
4,C:\Users\nisa2\.cache\kagglehub\datasets\kavya...,straight,172,151,3,0.877907,148.798552


## Prepare Dataset

In [10]:
# Encode labels
class_order = ["straight", "wavy", "curly", "kinky"]
df["label"] = df["label"].str.lower().str.strip()
label_to_int = {label: idx for idx, label in enumerate(class_order)} 
# map encoded labels
df["label_int"] = df["label"].map(label_to_int)
df.head()

Unnamed: 0,image_path,label,height,width,channels,aspect_ratio,brightness,label_int
0,C:\Users\nisa2\.cache\kagglehub\datasets\kavya...,straight,591,473,3,0.800338,114.276347,0
1,C:\Users\nisa2\.cache\kagglehub\datasets\kavya...,straight,177,284,3,1.60452,200.348731,0
2,C:\Users\nisa2\.cache\kagglehub\datasets\kavya...,straight,279,181,3,0.648746,145.681063,0
3,C:\Users\nisa2\.cache\kagglehub\datasets\kavya...,straight,202,249,3,1.232673,157.705575,0
4,C:\Users\nisa2\.cache\kagglehub\datasets\kavya...,straight,172,151,3,0.877907,148.798552,0


In [24]:
# image resizing
IMG_WIDTH = 224
IMG_HEIGHT = 224
BATCH_SIZE = 32

In [25]:
# train / val / test split
train_df, temp_df = train_test_split(df, test_size=0.30, stratify=df["label_int"], random_state=17)

val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df["label_int"], random_state=17)

len(train_df), len(val_df), len(test_df)

(987, 211, 212)

In [26]:
# Preprocessing and pipelines
def load_and_preprocess(path, label):
    # load img
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    # resize image
    img = tf.image.resize(img, (IMG_WIDTH, IMG_HEIGHT))
    # preprocess
    img = preprocess_input(img)

    return img, label

In [27]:
# Light augmentation

data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"), 
    tf.keras.layers.RandomRotation(0.05), 
    tf.keras.layers.RandomZoom(0.1), 
    tf.keras.layers.RandomBrightness(factor=0.05)
])

In [28]:
# tf.data 
def build_dataset(df, augment=False):
    paths = df["image_path"].values
    labels = df["label_int"].values

    dataset = tf.data.Dataset.from_tensor_slices((paths, labels))
    dataset = dataset.map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)

    if augment:
        dataset = dataset.map(lambda x, y: (data_augmentation(x), y), num_parallel_calls=tf.data.AUTOTUNE)

    dataset = dataset.shuffle(1000).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    
    return dataset

In [29]:
train_data = build_dataset(train_df, augment=True)
val_data = build_dataset(val_df, augment=False)
test_data = build_dataset(test_df, augment=False)

## Build VGG16 Model

## Train Model

## Results

## Extract Embeddings