In [1]:
import os
from collections import Counter
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, models
from tqdm import tqdm



In [2]:
LABELS = [
    "surprise",  # 0
    "fear",      # 1
    "disgust",   # 2
    "happy",     # 3
    "sad",       # 4
    "angry",     # 5
    "neutral"    # 6
]
NUM_CLASSES = len(LABELS)


In [3]:
class RafDataset(Dataset):
    """
    RAF-DB folder mapping
    1: surprise
    2: fear
    3: disgust
    4: happy
    5: sad
    6: angry
    7: neutral
    """
    def __init__(self, root, transform=None, verbose=True):
        self.transform = transform
        self.samples = []

        for label in range(1, 8):
            label_dir = os.path.join(root, str(label))
            if not os.path.isdir(label_dir):
                if verbose:
                    print(f"[WARN] missing dir: {label_dir}")
                continue

            files = [
                f for f in os.listdir(label_dir)
                if f.lower().endswith(".jpg")
            ]

            if verbose:
                print(f"label {label} -> {len(files)} images")

            for fname in files:
                self.samples.append(
                    (os.path.join(label_dir, fname), label - 1)
                )

        if len(self.samples) == 0:
            raise RuntimeError("No images found in dataset")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        img = Image.open(path).convert("RGB")

        if self.transform:
            img = self.transform(img)

        return img, label


In [4]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std =[0.229, 0.224, 0.225]
    )
])


In [5]:
train_dataset = RafDataset("DATASET/train", transform, verbose=True)
test_dataset  = RafDataset("DATASET/test",  transform, verbose=True)


label 1 -> 1290 images
label 2 -> 281 images
label 3 -> 717 images
label 4 -> 4772 images
label 5 -> 1982 images
label 6 -> 705 images
label 7 -> 2524 images
label 1 -> 329 images
label 2 -> 74 images
label 3 -> 160 images
label 4 -> 1185 images
label 5 -> 478 images
label 6 -> 162 images
label 7 -> 680 images


In [6]:
train_size = int(0.8 * len(train_dataset))
val_size   = len(train_dataset) - train_size

train_dataset, val_dataset = random_split(
    train_dataset, [train_size, val_size]
)


In [7]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_dataset,   batch_size=32, shuffle=False)
test_loader  = DataLoader(test_dataset,  batch_size=32, shuffle=False)


In [8]:
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

for p in model.parameters():
    p.requires_grad = False

for name, p in model.named_parameters():
    if "layer4" in name:
        p.requires_grad = True

model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)


In [9]:
criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-4
)


In [10]:
def train_one_epoch(model, loader):
    model.train()
    total_loss, correct, total = 0, 0, 0

    for x, y in loader:
        optimizer.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        pred = out.argmax(1)
        correct += (pred == y).sum().item()
        total += y.size(0)

    return total_loss / len(loader), correct / total


In [11]:
def val_one_epoch(model, loader):
    model.eval()
    total_loss, correct, total = 0, 0, 0

    with torch.no_grad():
        for x, y in loader:
            out = model(x)
            loss = criterion(out, y)

            total_loss += loss.item()
            pred = out.argmax(1)
            correct += (pred == y).sum().item()
            total += y.size(0)

    return total_loss / len(loader), correct / total


In [12]:
epochs = 5

for epoch in tqdm(range(epochs), desc="Epoch"):
    train_loss, train_acc = train_one_epoch(model, train_loader)
    val_loss, val_acc     = val_one_epoch(model, val_loader)

    print(
        f"Epoch {epoch+1}/{epochs} | "
        f"loss {train_loss:.3f} acc {train_acc:.3f} | "
        f"val_loss {val_loss:.3f} val_acc {val_acc:.3f}"
    )


Epoch:  20%|██        | 1/5 [10:58<43:52, 658.23s/it]

Epoch 1/5 | loss 1.051 acc 0.627 | val_loss 0.855 val_acc 0.683


Epoch:  40%|████      | 2/5 [20:08<29:44, 594.96s/it]

Epoch 2/5 | loss 0.502 acc 0.831 | val_loss 0.821 val_acc 0.705


Epoch:  60%|██████    | 3/5 [30:05<19:51, 595.58s/it]

Epoch 3/5 | loss 0.176 acc 0.956 | val_loss 0.938 val_acc 0.708


Epoch:  80%|████████  | 4/5 [37:40<09:00, 540.23s/it]

Epoch 4/5 | loss 0.053 acc 0.993 | val_loss 0.979 val_acc 0.715


Epoch: 100%|██████████| 5/5 [44:30<00:00, 534.04s/it]

Epoch 5/5 | loss 0.024 acc 0.997 | val_loss 0.937 val_acc 0.727





In [13]:
def predict_image(image_path, model):
    model.eval()

    img = Image.open(image_path).convert("RGB")
    x = transform(img).unsqueeze(0)  # [1, 3, 224, 224]

    with torch.no_grad():
        logits = model(x)
        probs = F.softmax(logits, dim=1)
        conf, pred = torch.max(probs, dim=1)

    label = LABELS[pred.item()]
    confidence = conf.item()

    return label, confidence


In [18]:
IMAGE_DIR = "app/facial/tmp/faces"

for fname in os.listdir(IMAGE_DIR):
    if not fname.lower().endswith(".jpg"):
        continue

    path = os.path.join(IMAGE_DIR, fname)
    label, conf = predict_image(path, model)
    print(f"{fname} -> {label} ({conf:.2f})")


face_000s_0.jpg -> neutral (0.54)
face_005s_0.jpg -> neutral (0.55)
face_010s_0.jpg -> happy (0.96)
face_015s_0.jpg -> happy (0.57)
face_020s_0.jpg -> happy (0.92)
face_025s_0.jpg -> happy (0.77)
face_030s_0.jpg -> surprise (0.30)
face_035s_0.jpg -> happy (0.91)
face_040s_0.jpg -> happy (0.78)
face_045s_0.jpg -> neutral (0.85)
Image_20251205_13608_723 PM.jpg -> surprise (0.80)


In [15]:
STATE_MAP = {
    "happy": "happiness",
    "neutral": "neutral",

    "anger": "tension",
    "disgust": "tension",
    "fear": "tension",
    "surprise": "tension",
    "sad": "tension",
}



In [22]:
def emotion_to_state(emotion, confidence, th=0.6):
    if confidence < th:
        return "neutral", confidence

    state = STATE_MAP.get(emotion, "neutral")
    return state, confidence


In [23]:
results = []

for fname in sorted(os.listdir(IMAGE_DIR)):
    if not fname.lower().endswith(".jpg"):
        continue

    path = os.path.join(IMAGE_DIR, fname)
    emotion, conf = predict_image(path, model)

    results.append({
        "image": fname,
        "emotion": emotion,
        "confidence": conf
    })


In [24]:
print("=== 3クラス変換結果 ===")

for r in results:
    state, conf = emotion_to_state(r["emotion"], r["confidence"])
    print(
        f"{r['image']:20s} | "
        f"{r['emotion']:9s} ({r['confidence']:.2f}) -> {state}"
    )


=== 3クラス変換結果 ===
Image_20251205_13608_723 PM.jpg | surprise  (0.80) -> tension
face_000s_0.jpg      | neutral   (0.54) -> neutral
face_005s_0.jpg      | neutral   (0.55) -> neutral
face_010s_0.jpg      | happy     (0.96) -> happiness
face_015s_0.jpg      | happy     (0.57) -> neutral
face_020s_0.jpg      | happy     (0.92) -> happiness
face_025s_0.jpg      | happy     (0.77) -> happiness
face_030s_0.jpg      | surprise  (0.30) -> neutral
face_035s_0.jpg      | happy     (0.91) -> happiness
face_040s_0.jpg      | happy     (0.78) -> happiness
face_045s_0.jpg      | neutral   (0.85) -> neutral


In [25]:
def is_tension_window(emotions, min_count=2):
    tension_set = {"anger","disgust","fear","surprise","sad"}
    return sum(e in tension_set for e in emotions) >= min_count


In [26]:
WINDOW = 3

print("\n=== 時系列 緊張判定 ===")

for i in range(len(results)):
    window = results[max(0, i-WINDOW+1):i+1]
    emotions = [w["emotion"] for w in window]

    tension = is_tension_window(emotions)
    print(f"{i:02d} | {[e for e in emotions]} -> tension={tension}")



=== 時系列 緊張判定 ===
00 | ['surprise'] -> tension=False
01 | ['surprise', 'neutral'] -> tension=False
02 | ['surprise', 'neutral', 'neutral'] -> tension=False
03 | ['neutral', 'neutral', 'happy'] -> tension=False
04 | ['neutral', 'happy', 'happy'] -> tension=False
05 | ['happy', 'happy', 'happy'] -> tension=False
06 | ['happy', 'happy', 'happy'] -> tension=False
07 | ['happy', 'happy', 'surprise'] -> tension=False
08 | ['happy', 'surprise', 'happy'] -> tension=False
09 | ['surprise', 'happy', 'happy'] -> tension=False
10 | ['happy', 'happy', 'neutral'] -> tension=False


In [28]:
torch.save(model.state_dict(), "rafdb_resnet18.pth")