In [11]:
import dlib
import cv2
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

## Functions to extract facial features

In [2]:
def detect_face_shape(landmark_points):
    # Extract relevant landmark points for face shape detection
    jawline_points = landmark_points[0:17]  # Points along the jawline
    cheekbone_points = [landmark_points[i] for i in range(27, 31)]  # Points around cheekbones
    chin_point = landmark_points[8]  # Point at the chin
    forehead_point = landmark_points[19]  # Point at the forehead

    # Calculate distances or ratios between key landmark points
    jawline_width = abs(jawline_points[0][0] - jawline_points[-1][0])
    face_height = abs(chin_point[1] - forehead_point[1])
    cheekbone_width = abs(cheekbone_points[0][0] - cheekbone_points[-1][0])

    # Define thresholds or reference values for each face shape
    # Example thresholds: oval, round, square
    oval_threshold = 1.2
    round_threshold = 1.0
    square_threshold = 0.8

    # Compare calculated ratios/distances with thresholds to classify face shape
    face_ratio = jawline_width / face_height
    if face_ratio > oval_threshold:
        return "Oval"
    elif face_ratio < round_threshold:
        return "Round"
    elif cheekbone_width / jawline_width < square_threshold:
        return "Square"
    else:
        return "Unknown"
    
def detect_hair_length(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    hair_contours = np.zeros_like(image)
    reference_object_size_inches = 6.0 
    reference_object_size_pixels = 100
    pixel_to_inch_ratio = reference_object_size_inches / reference_object_size_pixels
    hair_lengths = []
    for contour in contours:
        hair_length = cv2.arcLength(contour, closed=False)  # Hair length in pixels
        hair_lengths.append(hair_length)
    hair_lengths_inches = [length * pixel_to_inch_ratio for length in hair_lengths]
    avg_hair_length_inches = np.mean(hair_lengths_inches)
    short_threshold = 2
    medium_threshold = 4.5
    if avg_hair_length_inches < short_threshold:
        return "Short"
    elif avg_hair_length_inches < medium_threshold:
        return "Medium"
    else:
        return "Long"    

def detect_eye_distance(landmark_points):
    # Indices for left and right eye landmarks
    left_eye_indices = list(range(36, 42))
    right_eye_indices = list(range(42, 48))
    # Calculate the center of the left eye
    left_eye_center = np.mean(landmark_points[left_eye_indices], axis=0)
    # Calculate the center of the right eye
    right_eye_center = np.mean(landmark_points[right_eye_indices], axis=0)
    # Calculate the distance between the centers of the eyes
    eye_distance = np.linalg.norm(left_eye_center - right_eye_center)
    low_threshold = 30
    high_threshold = 38
    
    if eye_distance < low_threshold:
        return 'Low'
    elif eye_distance < high_threshold:
        return 'Average'
    else:
        return 'High'

def detect_narrow_eyes(landmark_points):
    # Indices for inner corners of the eyes
    left_eye_inner_corner = landmark_points[39]
    right_eye_inner_corner = landmark_points[42]
    # Calculate the horizontal distance between the inner corners of the eyes
    eye_distance = right_eye_inner_corner[0] - left_eye_inner_corner[0]
     # Define threshold for narrow eyes
    narrow_eyes_threshold = 15 # Adjust this threshold based on your observations
    # Determine if the person has narrow eyes based on eye distance
    if eye_distance < narrow_eyes_threshold:
        return "Yes"
    else:
        return "No"
def detect_glasses(landmark_points):
    # Define the regions around the eyes for glasses detection
    left_eye_region = landmark_points[36:42]  # Indices for left eye landmarks
    right_eye_region = landmark_points[42:48]  # Indices for right eye landmarks
    # Calculate the average distance between the eye landmarks to estimate eye size
    left_eye_distance = sum([((x2 - x1)**2 + (y2 - y1)**2)**0.5 for (x1, y1), (x2, y2) in zip(left_eye_region[:-1], left_eye_region[1:])]) / len(left_eye_region)
    right_eye_distance = sum([((x2 - x1)**2 + (y2 - y1)**2)**0.5 for (x1, y1), (x2, y2) in zip(right_eye_region[:-1], right_eye_region[1:])]) / len(right_eye_region)
    # Define a threshold for eye size difference to detect glasses
    eye_size_threshold = 0.5 
    if abs(left_eye_distance - right_eye_distance) > eye_size_threshold:
        return "Glasses"
    else:
        return "No glasses"
    
def detect_big_nose(landmark_points):
    # Indices for nose landmarks
    nose_indices = list(range(27, 36))
    
    # Calculate the width and height of the nose region
    nose_width = max(landmark_points[nose_indices][:, 0]) - min(landmark_points[nose_indices][:, 0])
    nose_height = max(landmark_points[nose_indices][:, 1]) - min(landmark_points[nose_indices][:, 1])
    
    # Define thresholds for big nose
    big_nose_width_threshold = 20  # Adjust this threshold based on your observations
    big_nose_height_threshold = 30 # Adjust this threshold based on your observations
    
    # Determine if the person has a big nose based on nose size
    if nose_width > big_nose_width_threshold and nose_height > big_nose_height_threshold:
        return "Yes"
    else:
        return "No"
def detect_big_lips(landmark_points):
    # Indices for mouth landmarks
    mouth_indices = list(range(48, 68))
    
    # Calculate the width and height of the mouth region
    mouth_width = max(landmark_points[mouth_indices][:, 0]) - min(landmark_points[mouth_indices][:, 0])
    mouth_height = max(landmark_points[mouth_indices][:, 1]) - min(landmark_points[mouth_indices][:, 1])
    
    # Define thresholds for big lips
    big_lips_width_threshold = 40  # Adjust this threshold based on your observations
    big_lips_height_threshold = 18  # Adjust this threshold based on your observations
    
    # Determine if the person has big lips based on mouth size
    if mouth_width > big_lips_width_threshold and mouth_height > big_lips_height_threshold:
        return "Yes"
    else:
        return "No"
    
def detect_mouth_width(landmark_points):
    # Indices for left and right mouth landmarks
    left_mouth_indices = [i for i in range(48, 54)]
    right_mouth_indices = [i for i in range(54, 60)]
    
    # Calculate the horizontal distance between the outermost mouth landmarks
    left_mouth_landmarks = landmark_points[left_mouth_indices]
    right_mouth_landmarks = landmark_points[right_mouth_indices]
    mouth_width = max(right_mouth_landmarks[:, 0]) - min(left_mouth_landmarks[:, 0])
    low_threshold = 25.5
    high_threshold = 34
    
    if mouth_width < low_threshold:
        return 'Small'
    elif mouth_width < high_threshold:
        return 'Average'
    else:
        return 'Large'

def detect_head_width(landmark_points):
    # Indices for left and right facial landmarks
    left_landmark_indices = [i for i in range(17, 21)]
    right_landmark_indices = [i for i in range(22, 27)]
    
    # Calculate the horizontal distance between the outermost facial landmarks
    left_landmarks = landmark_points[left_landmark_indices]
    right_landmarks = landmark_points[right_landmark_indices]
    head_width = max(right_landmarks[:, 0]) - min(left_landmarks[:, 0])
    low_threshold = 53.5
    high_threshold = 69.5
    
    if head_width < low_threshold:
        return 'Narrow'
    elif head_width < high_threshold:
        return 'Average'
    else:
        return 'Wide'
    
def detect_chin_size(landmark_points):
     
    # Indices for chin landmarks
    chin_indices = list(range(0, 17))
    
    # Calculate the vertical distance between the lowest point of the chin and the midpoint of the eyes
    chin_landmarks = landmark_points[chin_indices]
    chin_lowest_point = np.max(chin_landmarks[:, 1])
    eye_midpoint = np.mean(landmark_points[36:48], axis=0)[1]
    chin_size = chin_lowest_point - eye_midpoint
    low_threshold = 54
    high_threshold = 70
    
    if chin_size < low_threshold:
        return 'Small'
    elif chin_size < high_threshold:
        return 'Average'
    else:
        return 'Large'

## Creating a DataFrame from the image dataset

In [3]:
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')

In [4]:
def collect_data(image_path):
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)
    if len(faces) == 0:
        return None  # No face detected in the image
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    faces = detector(gray)
    for face in faces:
        landmarks = predictor(image, face)
        landmark_points = np.array([(landmarks.part(i).x, landmarks.part(i).y) for i in range(68)])
    data = {
    'face_shape': detect_face_shape(landmark_points),
    'hair_length': detect_hair_length(image),
    'eye_distance': detect_eye_distance(landmark_points),
    'narrow_eyes': detect_narrow_eyes(landmark_points),
    'big_nose':detect_big_nose(landmark_points),
    'big_lips':detect_big_lips(landmark_points),
    'mouth_width':detect_mouth_width(landmark_points),
    'head_width':detect_head_width(landmark_points),
    'chin_size':detect_chin_size(landmark_points)
     }
    return data

In [5]:
root_dir = 'E:\Projects2024\HAir\hairstyle\Train' 
folder_names = ['Bangs', 'Blunt', 'Bun', 'Curly', 'Pixie', 'Side', 'Straight Long Hair']
# Create an empty list to store the data for each image
image_data_list = []
# Loop through each folder
for folder_name in folder_names:
    # Path to the current folder
    folder_path = os.path.join(root_dir, folder_name)
    # Loop through each image in the folder
    for filename in os.listdir(folder_path):
        # Path to the current image
        image_path = os.path.join(folder_path, filename)
        
        # Collect data for the current image
        image_data = collect_data(image_path)
        # Append the data to the list with the hair_style column named as the folder name
        if image_data is not None:
            image_data['hair_style'] = folder_name
            image_data_list.append(image_data)

            # Create a DataFrame from the collected data
df = pd.DataFrame(image_data_list)

In [6]:
df

Unnamed: 0,face_shape,hair_length,eye_distance,narrow_eyes,big_nose,big_lips,mouth_width,head_width,chin_size,hair_style
0,Square,Long,High,No,No,No,Large,Wide,Average,Bangs
1,Square,Long,High,No,No,No,Average,Wide,Large,Bangs
2,Square,Long,High,No,No,No,Large,Wide,Large,Bangs
3,Oval,Long,High,No,No,No,Large,Wide,Average,Bangs
4,Round,Medium,High,No,No,No,Average,Average,Large,Bangs
...,...,...,...,...,...,...,...,...,...,...
127,Square,Long,High,No,No,No,Average,Wide,Average,Straight Long Hair
128,Square,Short,Average,No,No,No,Large,Average,Average,Straight Long Hair
129,Square,Medium,Average,No,No,No,Average,Average,Average,Straight Long Hair
130,Square,Medium,Average,No,No,No,Small,Average,Small,Straight Long Hair


## Training the model

In [7]:
df_encoded = pd.get_dummies(df.drop('hair_style', axis=1))
df_final = pd.concat([df['hair_style'], df_encoded], axis=1)

# Split the data into features (X) and labels (y)
X = df_final.drop('hair_style', axis=1)  # Features
y = df_final['hair_style']  # Labels


In [10]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
# Gaussian Model
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

In [13]:
y_pred = nb_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.14814814814814814


## Take an image and Find the hair style

In [15]:
input_image_data = collect_data(r"E:\Projects2024\HAir\hairstyle\Train\Curly\15.jpeg")
input_image_data_list=[]
if input_image_data is not None:
    input_image_data_list.append(input_image_data)
df_input = pd.DataFrame(input_image_data_list)
df_input_encoded = pd.get_dummies(df_input)
missing_columns = set(df_encoded.columns) - set(df_input_encoded.columns)
# Add missing columns to encoded_df_test and set their values to 0
for col in missing_columns:
    df_input_encoded[col]= 0
# Reorder columns to match the order in df_encoded
df_input_encoded = df_input_encoded[df_encoded.columns]
X_input_test=df_input_encoded 
# Predict the labels for the test data
y_pred = nb_classifier.predict(X_input_test)
print("Recommended Hair Style:" , y_pred)

Recommended Hair Style: ['Straight Long Hair']
