Import Libraries & Load Dataset

In [99]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

Prepare Data

In [100]:
# Load dataset
df = pd.read_csv("synthetic_face_shape_dataset_2000.csv")

# Define features and target using correct column names
X = df[['forehead width (cm)', 'jaw width (cm)', 'face length (cm)', 'face width (cm)']]  # Numerical features
y = df['face shape']  # Target label


Train-Test Split and Scaling

In [101]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


Train Decision Tree with GridSearchCV

In [102]:
param_grid = {
    'max_depth': [3, 5, 10, None],
    'min_samples_split': [2, 5, 10],
    'criterion': ['gini', 'entropy']
}

grid_search = GridSearchCV(
    DecisionTreeClassifier(random_state=42), param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1
)
grid_search.fit(X_train_scaled, y_train)

tree_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)


Fitting 5 folds for each of 24 candidates, totalling 120 fits
Best Parameters: {'criterion': 'entropy', 'max_depth': 5, 'min_samples_split': 2}


Evaluate Model

In [103]:
y_pred = tree_model.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.7675
              precision    recall  f1-score   support

     Diamond       0.57      0.90      0.70        52
       Heart       0.82      0.79      0.80        70
      Oblong       0.95      0.87      0.91        86
        Oval       0.83      0.85      0.84        59
       Round       0.68      0.58      0.63        65
      Square       0.75      0.62      0.68        68

    accuracy                           0.77       400
   macro avg       0.77      0.77      0.76       400
weighted avg       0.78      0.77      0.77       400



Predict Face Shape From Input

In [104]:
# Function to predict face shape
def predict_face_shape(input_features):
    input_df = pd.DataFrame([input_features], columns=[
        'forehead width (cm)', 'jaw width (cm)', 'face length (cm)', 'face width (cm)'
    ])
    scaled_input = scaler.transform(input_df)
    return tree_model.predict(scaled_input)[0]

# Example input
# new_face = [13.8, 13.2, 17.9, 15.0]  # Example with correct order and units
new_face = [14.6,10.2,17.8,12.9]  # Make sure these values are in the same order and units as your training data
predicted_shape = predict_face_shape(new_face)
print("Predicted Face Shape:", predicted_shape)


Predicted Face Shape: Oval


Recommend Hairstyle Based on Face Shape

In [105]:
# Clean column names
df.columns = df.columns.str.strip()

# Rule-based hairstyle map
face_to_hairstyle = df.drop_duplicates(subset=['face shape'])[['face shape', 'best hairstyle']].set_index('face shape')['best hairstyle'].to_dict()

# Predict shape (example: predicted_shape = 'oval')
recommended_hairstyle = face_to_hairstyle.get(predicted_shape, "No suggestions")
print("Recommended Hairstyle:", recommended_hairstyle)



Recommended Hairstyle: Long Layers
