In [93]:
from transformers import AutoImageProcessor, SiglipForImageClassification
from pathlib import Path
from tqdm import tqdm
from PIL import Image
import torch
import pandas as pd

# === Model Setup ===
MODEL_NAME = "prithivMLmods/facial-age-detection"

print(f"‚è≥ Loading model: {MODEL_NAME}")
processor = AutoImageProcessor.from_pretrained(MODEL_NAME)
model = SiglipForImageClassification.from_pretrained(MODEL_NAME)
model.eval()
print("‚úÖ Model loaded successfully!")

‚è≥ Loading model: prithivMLmods/facial-age-detection
‚úÖ Model loaded successfully!


In [94]:
id2label = {
    "0": "01‚Äì10",
    "1": "11‚Äì20",
    "2": "21‚Äì30",
    "3": "31‚Äì40", 
    "4": "41‚Äì55",
    "5": "56‚Äì65",
    "6": "66‚Äì80",
    "7": "80+"
}

In [99]:
INPUT_DIR = Path("../data/crops_face/20251105/5_cafe_pos_faces_unique")
CSV_PATH = Path("../data/predictions/cafe_pos/20251105/age_annotated/age_predictions.csv")
OUTPUT_DIR = Path("../data/predictions/cafe_pos/20251105/age_annotated")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)


In [100]:
results = []

print(f"üìÇ Scanning folder: {INPUT_DIR}")
image_paths = sorted([p for p in INPUT_DIR.glob("*.*") if p.suffix.lower() in [".jpg", ".png", ".jpeg"]])
print(f"üß† Found {len(image_paths)} face crops.\n")

üìÇ Scanning folder: ..\data\crops_face\20251105\5_cafe_pos_faces_unique
üß† Found 287 face crops.



In [101]:
for img_path in tqdm(image_paths, desc="Predicting age groups"):
    try:
        image = Image.open(img_path).convert("RGB")
        inputs = processor(images=image, return_tensors="pt")

        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits
            probs = torch.nn.functional.softmax(logits, dim=1).squeeze().tolist()

        top_idx = int(torch.argmax(torch.tensor(probs)))
        predicted_age_group = id2label[str(top_idx)]
        confidence = round(probs[top_idx], 3)

        results.append({
            "file": img_path.name,
            "predicted_age": predicted_age_group,
            "confidence": confidence,
            "probs": {id2label[str(i)]: round(p, 3) for i, p in enumerate(probs)}
        })

    except Exception as e:
        print(f"‚ö†Ô∏è Skipped {img_path.name} ‚Äî error: {e}")
        continue

Predicting age groups: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 287/287 [01:29<00:00,  3.19it/s]


In [102]:
df_results = pd.DataFrame(results)
df_results.to_csv(CSV_PATH, index=False)
print(f"\n‚úÖ Saved predictions to: {CSV_PATH}")
display(df_results.head(70))


‚úÖ Saved predictions to: ..\data\predictions\cafe_pos\20251105\age_annotated\age_predictions.csv


Unnamed: 0,file,predicted_age,confidence,probs
0,cluster_000_AccoID_1169434_20251105_131026.png,21‚Äì30,0.443,"{'01‚Äì10': 0.066, '11‚Äì20': 0.196, '21‚Äì30': 0.44..."
1,cluster_001_AccoID_1168923_20251105_105123.png,66‚Äì80,0.305,"{'01‚Äì10': 0.015, '11‚Äì20': 0.118, '21‚Äì30': 0.01..."
2,cluster_002_AccoID_1169467_20251105_132245.png,56‚Äì65,0.566,"{'01‚Äì10': 0.004, '11‚Äì20': 0.005, '21‚Äì30': 0.00..."
3,cluster_003_AccoID_1168748_20251105_095655.png,66‚Äì80,0.776,"{'01‚Äì10': 0.0, '11‚Äì20': 0.003, '21‚Äì30': 0.001,..."
4,cluster_004_AccoID_1168916_20251105_104927.png,56‚Äì65,0.400,"{'01‚Äì10': 0.001, '11‚Äì20': 0.001, '21‚Äì30': 0.03..."
...,...,...,...,...
65,cluster_066_AccoID_1169213_20251105_121321.png,66‚Äì80,0.696,"{'01‚Äì10': 0.0, '11‚Äì20': 0.0, '21‚Äì30': 0.0, '31..."
66,cluster_067_AccoID_1169220_20251105_121427.png,80+,0.563,"{'01‚Äì10': 0.0, '11‚Äì20': 0.0, '21‚Äì30': 0.0, '31..."
67,cluster_068_AccoID_1168847_20251105_103500.png,56‚Äì65,0.693,"{'01‚Äì10': 0.001, '11‚Äì20': 0.003, '21‚Äì30': 0.00..."
68,cluster_069_AccoID_1169227_20251105_121648.png,80+,0.498,"{'01‚Äì10': 0.0, '11‚Äì20': 0.0, '21‚Äì30': 0.0, '31..."


In [104]:
import os
import cv2
import pandas as pd
from pathlib import Path

# === Paths ===
INPUT_DIR = Path("../data/crops_face/20251105/5_cafe_pos_faces_unique")
CSV_PATH = Path("../data/predictions/cafe_pos/20251105/age_annotated/age_predictions.csv")
OUTPUT_DIR = Path("../data/predictions/cafe_pos/20251105/age_annotated")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# === Load Predictions ===
df_results = pd.read_csv(CSV_PATH)
print(f"üìÑ Loaded {len(df_results)} predictions from {CSV_PATH}")

# === Style Settings ===
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 0.7
thickness = 2
text_color = (255, 255, 255)
bg_color = (0, 0, 0)
opacity = 0.5

# === Annotate and Save ===
for _, row in df_results.iterrows():
    filename = row["file"]
    predicted_age = str(row["predicted_age"])
    confidence = row.get("confidence", None)

    src_path = INPUT_DIR / filename
    dst_path = OUTPUT_DIR / filename

    img = cv2.imread(str(src_path))
    if img is None:
        print(f"‚ö†Ô∏è Skipping missing image: {filename}")
        continue

    # Label text
    if confidence is not None:
        text = f"{predicted_age} ({confidence:.2f})"
    else:
        text = predicted_age

    text = text.replace("‚Äì", "-").encode("ascii", errors="ignore").decode()

    # Draw background box
    (tw, th), _ = cv2.getTextSize(text, font, font_scale, thickness)
    overlay = img.copy()
    cv2.rectangle(overlay, (10, 10), (10 + tw + 20, 10 + th + 20), bg_color, -1)
    img = cv2.addWeighted(overlay, opacity, img, 1 - opacity, 0)

    # Draw text
    cv2.putText(img, text, (20, 10 + th + 8), font, font_scale, text_color, thickness, cv2.LINE_AA)

    # Save annotated image
    cv2.imwrite(str(dst_path), img)

print(f"\n‚úÖ Annotated images saved to: {OUTPUT_DIR}")


üìÑ Loaded 287 predictions from ..\data\predictions\cafe_pos\20251105\age_annotated\age_predictions.csv

‚úÖ Annotated images saved to: ..\data\predictions\cafe_pos\20251105\age_annotated


In [107]:
import pandas as pd

# === Paths ===
age_csv = "../data/predictions/cafe_pos/20251105/age_annotated/age_predictions.csv"
results_stage2 = "../results/20251105/results_faces_accounting_20251105.csv"  # your latest merged CSV
output_csv = "../results/20251105/results_faces_accounting_20251105.csv"

# === Load Data ===
age_df = pd.read_csv(age_csv)
results_df = pd.read_csv(results_stage2)

print("‚úÖ Loaded files:")
print(f" - Age predictions: {len(age_df)} rows")
print(f" - Current results (with gender): {len(results_df)} rows\n")

# === Standardize column names ===
age_df.rename(columns={"file": "image_name"}, inplace=True)
age_df["image_name"] = age_df["image_name"].apply(lambda x: x.split("/")[-1])
results_df["image_name"] = results_df["image_name"].apply(lambda x: x.split("/")[-1])

# === Merge on image name ===
merged_df = results_df.merge(
    age_df[["image_name", "predicted_age", "confidence"]],
    on="image_name",
    how="left",
    suffixes=("", "_age")
)

# Rename confidence columns clearly if both exist
if "confidence_age" in merged_df.columns:
    merged_df.rename(columns={"confidence": "gender_confidence", "confidence_age": "age_confidence"}, inplace=True)
else:
    merged_df.rename(columns={"confidence": "gender_confidence"}, inplace=True)

# === Save Final Output ===
merged_df.to_csv(output_csv, index=False)
print(f"‚úÖ Stage 3 results (with age + gender) saved to: {output_csv}")

# Display preview
merged_df.head()


‚úÖ Loaded files:
 - Age predictions: 287 rows
 - Current results (with gender): 796 rows

‚úÖ Stage 3 results (with age + gender) saved to: ../results/20251105/results_faces_accounting_20251105.csv


Unnamed: 0,image_name,cluster_id,AccoID,AccoDocNo,AccoDate,timestamp,AccoAmount,Quantity,Discount,UnitPrice,StockCateDesc,StockName,StockDesciption,predicted_gender,gender_confidence,predicted_age,age_confidence
0,cluster_000_AccoID_1169434_20251105_131026.png,0,1169434,224673/155,2025-11-05 13:10:00,2025-11-05 13:10:26,-85.0,1.0,0.0,29.0,Mineral cooldrinks,COKE,original,Male,0.946,21‚Äì30,0.443
1,cluster_000_AccoID_1169434_20251105_131026.png,0,1169434,224673/155,2025-11-05 13:10:00,2025-11-05 13:10:26,-85.0,1.0,0.0,20.0,Beverages Cold,WATER,thirsty mineral still,Male,0.946,21‚Äì30,0.443
2,cluster_000_AccoID_1169434_20251105_131026.png,0,1169434,224673/155,2025-11-05 13:10:00,2025-11-05 13:10:26,-85.0,1.0,0.0,36.0,Beverages Coffee,COFFEE,cappuccino,Male,0.946,21‚Äì30,0.443
3,cluster_001_AccoID_1168923_20251105_105123.png,1,1168923,224590/74,2025-11-05 10:52:00,2025-11-05 10:51:23,-256.0,1.0,0.0,29.0,Mineral cooldrinks,COKE,original,Female,0.908,66‚Äì80,0.305
4,cluster_001_AccoID_1168923_20251105_105123.png,1,1168923,224590/74,2025-11-05 10:52:00,2025-11-05 10:51:23,-256.0,1.0,0.0,59.0,Breakfast,BREAKFAST,lite,Female,0.908,66‚Äì80,0.305


In [109]:
# ==============================================
# üîπ PHASE 1: Run the ViT Age Classifier
# ==============================================
import torch
from transformers import ViTFeatureExtractor, ViTForImageClassification
from pathlib import Path
from tqdm import tqdm
from PIL import Image
import pandas as pd
import numpy as np

# --- Model setup ---
MODEL_NAME = "nateraw/vit-age-classifier"
print(f"‚è≥ Loading model: {MODEL_NAME}")

model = ViTForImageClassification.from_pretrained(MODEL_NAME)
feature_extractor = ViTFeatureExtractor.from_pretrained(MODEL_NAME)
model.eval()
print("‚úÖ Model loaded successfully!\n")

# --- Paths ---
INPUT_DIR = Path("../data/crops_face/20251107/5_cafe_pos_faces_unique")     # folder with faces
OUTPUT_DIR = Path("../data/predictions/cafe_pos/20251107/age_vit_raw")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
CSV_PATH = OUTPUT_DIR / "age_predictions_vit_raw.csv"

# --- Model labels (from ViT) ---
AGE_LABELS = [
    "0-2", "3-9", "10-19", "20-29",
    "30-39", "40-49", "50-59", "60-69", "more than 70"
]

# --- Run inference ---
results = []
image_paths = sorted([p for p in INPUT_DIR.glob("*.*") if p.suffix.lower() in [".jpg", ".png", ".jpeg"]])
print(f"üìÇ Found {len(image_paths)} face crops.\n")

for img_path in tqdm(image_paths, desc="Predicting age groups (ViT)"):
    try:
        image = Image.open(img_path).convert("RGB")
        inputs = feature_extractor(images=image, return_tensors="pt")

        with torch.no_grad():
            outputs = model(**inputs)
            probs = torch.nn.functional.softmax(outputs.logits, dim=1).squeeze().tolist()

        top_idx = int(np.argmax(probs))
        predicted_age_raw = AGE_LABELS[top_idx]
        confidence = round(probs[top_idx], 3)

        results.append({
            "file": img_path.name,
            "predicted_age_raw": predicted_age_raw,
            "confidence_raw": confidence
        })
    except Exception as e:
        print(f"‚ö†Ô∏è Skipped {img_path.name} ‚Äî {e}")
        continue

df_raw = pd.DataFrame(results)
df_raw.to_csv(CSV_PATH, index=False)
print(f"\n‚úÖ Saved raw ViT predictions to: {CSV_PATH}")
display(df_raw.head())


‚è≥ Loading model: nateraw/vit-age-classifier




‚úÖ Model loaded successfully!

üìÇ Found 235 face crops.



Predicting age groups (ViT): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 235/235 [01:10<00:00,  3.33it/s]


‚úÖ Saved raw ViT predictions to: ..\data\predictions\cafe_pos\20251107\age_vit_raw\age_predictions_vit_raw.csv





Unnamed: 0,file,predicted_age_raw,confidence_raw
0,cluster_000_AccoID_1170854_20251107_091623.png,20-29,0.742
1,cluster_001_AccoID_1171262_20251107_112821.png,30-39,0.448
2,cluster_002_AccoID_1171659_20251107_135038.png,20-29,0.543
3,cluster_004_AccoID_1171659_20251107_135011.png,20-29,0.396
4,cluster_006_AccoID_1171574_20251107_132453.png,10-19,0.491


In [2]:
# ==============================================
# üîπ PHASE 2: Re-bin ViT predictions
# ==============================================
import pandas as pd
from pathlib import Path

RAW_PATH = "../data/predictions/cafe_pos/20251107/age_vit_raw/age_predictions_vit_raw.csv"
REBIND_PATH = "../data/predictions/cafe_pos/20251107/age_vit_raw/age_vit_rebinned.csv"

df = pd.read_csv(RAW_PATH)
print(f"‚úÖ Loaded {len(df)} raw predictions from ViT model")

# --- Map ViT labels to thesis-style bins ---
BIN_MAP = {
    "0-2": "01‚Äì10",
    "3-9": "01‚Äì10",
    "10-19": "11‚Äì20",
    "20-29": "21‚Äì30",
    "30-39": "31‚Äì40",
    "40-49": "41‚Äì55",
    "50-59": "41‚Äì55",
    "60-69": "56‚Äì65",
    "more than 70": "66-80"  # You can change to "80+" if preferred
}

df["predicted_age"] = df["predicted_age_raw"].map(BIN_MAP)
df["predicted_age"].fillna("Unknown", inplace=True)
df["age_confidence"] = df["confidence_raw"]

# --- Save re-binned predictions ---
Path(REBIND_PATH).parent.mkdir(parents=True, exist_ok=True)
df.to_csv(REBIND_PATH, index=False)
print(f"‚úÖ Saved rebinned age predictions to: {REBIND_PATH}")

# --- Quick summary ---
print("\nüìä Age Group Distribution:")
print(df["predicted_age"].value_counts())
df.head()


‚úÖ Loaded 235 raw predictions from ViT model
‚úÖ Saved rebinned age predictions to: ../data/predictions/cafe_pos/20251107/age_vit_raw/age_vit_rebinned.csv

üìä Age Group Distribution:
predicted_age
21‚Äì30    67
41‚Äì55    53
56‚Äì65    41
11‚Äì20    40
31‚Äì40    28
66-80     3
01‚Äì10     3
Name: count, dtype: int64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["predicted_age"].fillna("Unknown", inplace=True)


Unnamed: 0,file,predicted_age_raw,confidence_raw,predicted_age,age_confidence
0,cluster_000_AccoID_1170854_20251107_091623.png,20-29,0.742,21‚Äì30,0.742
1,cluster_001_AccoID_1171262_20251107_112821.png,30-39,0.448,31‚Äì40,0.448
2,cluster_002_AccoID_1171659_20251107_135038.png,20-29,0.543,21‚Äì30,0.543
3,cluster_004_AccoID_1171659_20251107_135011.png,20-29,0.396,21‚Äì30,0.396
4,cluster_006_AccoID_1171574_20251107_132453.png,10-19,0.491,11‚Äì20,0.491


In [4]:
# ==============================================
# üîπ PHASE 3: Merge with existing gender results
# ==============================================
import pandas as pd
from pathlib import Path

# --- Inputs ---
results_in  = "../results/20251105/results_faces_accounting_20251105_vit.csv"         # existing (has gender)
age_rebinned = "../data/predictions/cafe_pos/20251105/age_vit_raw/age_vit_rebinned.csv"
results_out = "../results/20251105/results_faces_accounting_20251105_vit.csv"

# --- Load ---
age_df = pd.read_csv(age_rebinned)
results_df = pd.read_csv(results_in)

print("‚úÖ Loaded files:")
print(f" - ViT age predictions: {len(age_df)} rows")
print(f" - Existing results:    {len(results_df)} rows\n")

# --- Standardize keys ---
age_df.rename(columns={"file": "image_name"}, inplace=True)
age_df["image_name"] = age_df["image_name"].astype(str).str.split("/").str[-1]
results_df["image_name"] = results_df["image_name"].astype(str).str.split("/").str[-1]

# --- Merge ---
merged = results_df.merge(
    age_df[["image_name", "predicted_age", "age_confidence"]],
    on="image_name",
    how="left"
)

# --- Save ---
Path(results_out).parent.mkdir(parents=True, exist_ok=True)
merged.to_csv(results_out, index=False)
print(f"‚úÖ Final merged dataset saved to:\n{results_out}")

# --- Check summary ---
n = len(merged)
filled = merged["predicted_age"].notna().sum()
print(f"‚ÑπÔ∏è Added new age predictions for {filled}/{n} rows ({filled/n*100:.2f}%)")

display(merged.head(10))


‚úÖ Loaded files:
 - ViT age predictions: 287 rows
 - Existing results:    796 rows

‚úÖ Final merged dataset saved to:
../results/20251105/results_faces_accounting_20251105_vit.csv
‚ÑπÔ∏è Added new age predictions for 796/796 rows (100.00%)


Unnamed: 0,image_name,cluster_id,AccoID,AccoDocNo,AccoDate,timestamp,AccoAmount,Quantity,Discount,UnitPrice,StockCateDesc,StockName,StockDesciption,predicted_gender,confidence,predicted_age,age_confidence
0,cluster_000_AccoID_1169434_20251105_131026.png,0,1169434,224673/155,2025-11-05 13:10:00,2025-11-05 13:10:26,-85.0,1.0,0.0,29.0,Mineral cooldrinks,COKE,original,Male,0.946,31‚Äì40,0.685
1,cluster_000_AccoID_1169434_20251105_131026.png,0,1169434,224673/155,2025-11-05 13:10:00,2025-11-05 13:10:26,-85.0,1.0,0.0,20.0,Beverages Cold,WATER,thirsty mineral still,Male,0.946,31‚Äì40,0.685
2,cluster_000_AccoID_1169434_20251105_131026.png,0,1169434,224673/155,2025-11-05 13:10:00,2025-11-05 13:10:26,-85.0,1.0,0.0,36.0,Beverages Coffee,COFFEE,cappuccino,Male,0.946,31‚Äì40,0.685
3,cluster_001_AccoID_1168923_20251105_105123.png,1,1168923,224590/74,2025-11-05 10:52:00,2025-11-05 10:51:23,-256.0,1.0,0.0,29.0,Mineral cooldrinks,COKE,original,Female,0.908,11‚Äì20,0.579
4,cluster_001_AccoID_1168923_20251105_105123.png,1,1168923,224590/74,2025-11-05 10:52:00,2025-11-05 10:51:23,-256.0,1.0,0.0,59.0,Breakfast,BREAKFAST,lite,Female,0.908,11‚Äì20,0.579
5,cluster_001_AccoID_1168923_20251105_105123.png,1,1168923,224590/74,2025-11-05 10:52:00,2025-11-05 10:51:23,-256.0,1.0,0.0,129.0,Breakfast,BREAKFAST,avo full house,Female,0.908,11‚Äì20,0.579
6,cluster_001_AccoID_1168923_20251105_105123.png,1,1168923,224590/74,2025-11-05 10:52:00,2025-11-05 10:51:23,-256.0,1.0,0.0,39.0,Beverages Hot,CHAI LATTE,none,Female,0.908,11‚Äì20,0.579
7,cluster_002_AccoID_1169467_20251105_132245.png,2,1169467,224678/161,2025-11-05 13:24:00,2025-11-05 13:22:45,-104.0,1.0,0.0,36.0,Beverages Coffee,COFFEE,cappuccino,Male,0.935,41‚Äì55,0.3
8,cluster_002_AccoID_1169467_20251105_132245.png,2,1169467,224678/161,2025-11-05 13:24:00,2025-11-05 13:22:45,-104.0,1.0,0.0,29.0,Beverages Coffee,COFFEE,americano,Male,0.935,41‚Äì55,0.3
9,cluster_002_AccoID_1169467_20251105_132245.png,2,1169467,224678/161,2025-11-05 13:24:00,2025-11-05 13:22:45,-104.0,1.0,0.0,39.0,Cakes,LEMON DRIZZLE,none,Male,0.935,41‚Äì55,0.3
