#### [머신러닝(CV) 미니 프로젝트]
- 운전자 부주의 분석
- 현우 : 운전자 하품 

[0] img → csv <HR>

In [42]:
# img → csv
# from img_to_csv import *

# preprocess_images_to_csv('./data/img/no_yawn',
#                          './data/csv',
#                          'yawn',
#                          'no_yawn'
# )

[1] 모듈 로딩 및 사용자 정의 함수 로딩<hr>


In [43]:
# -------------------------------------------------------------------------------------
# [1-1] 모듈 로딩
# -------------------------------------------------------------------------------------
# 기본
import os
import glob
import cv2

# 경고 무시
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# 전처리 관련
import numpy as np

# ML학습 관련
from sklearn.base import ClassifierMixin

# ML 데이터셋 및 전처리 관련
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# ML CV, Pipeline 관련 
from sklearn.pipeline import Pipeline 

# ML 성능지표 관련
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report 
from sklearn.utils import all_estimators

# ML 모델 저장
import joblib

In [44]:
# -------------------------------------------------------------------------------------
# [1-2] 사용자 정의 함수 (전처리 및 데이터 로딩)
# -------------------------------------------------------------------------------------
DATA_DIR = "./data"
CLASSES = ["no_yawn", "yawn"]  # label 0, 1
IMG_SIZE = (64, 64)

# Haar face detector
face_cascade = cv2.CascadeClassifier("./haarcascade_frontalface_default.xml")

def extract_hog(gray_64):
    hog = cv2.HOGDescriptor(
        _winSize=IMG_SIZE,
        _blockSize=(16,16),
        _blockStride=(8,8),
        _cellSize=(8,8),
        _nbins=9
    )
    return hog.compute(gray_64).flatten()

def detect_largest_face(gray):
    
    # 얼굴 여러개면 가장 큰 것 1개만 사용
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(40,40))
    if len(faces) == 0:
        return None
    x,y,w,h = max(faces, key=lambda b: b[2]*b[3])
    return (x,y,w,h)

def img_to_feature(img_bgr):
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    box = detect_largest_face(gray)
    if box is None:
        return None
    x,y,w,h = box
    face = gray[y:y+h, x:x+w]
    face = cv2.resize(face, IMG_SIZE, interpolation=cv2.INTER_AREA)
    feat = extract_hog(face)
    return feat

def load_dataset():
    x, y = [], []
    for label, cls in enumerate(CLASSES):
        paths = glob.glob(os.path.join(DATA_DIR, cls, "*"))
        for p in paths:
            img = cv2.imread(p)
            if img is None:
                continue
            feat = img_to_feature(img)
            if feat is None:
                continue
            x.append(feat)
            y.append(label)

    x = np.array(x, dtype=np.float32)
    y = np.array(y, dtype=np.int64)
    return x, y

[3] 데이터 로딩 및 피쳐와 타겟 분리 <hr>

In [45]:
# -------------------------------------------------------------------------------------
# [3-1] 데이터 준비
# -------------------------------------------------------------------------------------
dataAR = load_dataset()
print(dataAR)

KeyboardInterrupt: 

In [None]:
# --------------------------------------------------------------
# [3-1] 피쳐, 타겟 설정
# --------------------------------------------------------------
featureDF, targetSR = load_dataset()

print(f"\n최종 데이터 shape: featureDF{featureDF.shape}, targetSR{targetSR.shape}")


최종 데이터 shape: featureDF(826, 1764), targetSR(826,)


[4] 훈련용(feature) / 테스트용(target) 분리 <hr>

In [None]:
# -------------------------------------------------------------------------------------
# [4-1] 데이터 분리
# -------------------------------------------------------------------------------------
x_train, x_test, y_train, y_test = train_test_split(
    featureDF,
    targetSR,
    test_size=0.2,
    random_state=42,
    stratify=targetSR
)

[5] all_estimators <hr>

In [None]:
# -------------------------------------------------------------------------------------
# [5-1] 모델 순회 및 교차 검증 (All Estimators)
# -------------------------------------------------------------------------------------
estimators = all_estimators(type_filter="classifier")
best = {"name": None, "model": None, "acc": -1}

for name, Est in estimators:
    try:
        model = Est()
        if not isinstance(model, ClassifierMixin):
            continue

        pipe = Pipeline([
            ("scaler", StandardScaler()),
            ("clf", model),
        ])

        pipe.fit(x_train, y_train)
        pred = pipe.predict(x_test)
        acc = accuracy_score(y_test, pred)

        if acc > best["acc"]:
            best.update({"name": name, "model": pipe, "acc": acc})
            print(f"★ BEST update: {name} acc={acc:.4f}")

    except Exception:
        # all_estimators는 실패하는 모델이 꽤 있어서 그냥 스킵
        continue

if best["model"] is None:
    raise RuntimeError("학습 가능한 모델을 찾지 못함")

# -------------------------------------------------------------------------------------
# [5-2] 결과 정리 및 최적 모델 선정
# -------------------------------------------------------------------------------------
print("\n=== BEST RESULT ===")
print("Model:", best["name"])
print("Accuracy:", best["acc"])
print(classification_report(y_test, best["model"].predict(x_test), target_names=CLASSES))

★ BEST update: AdaBoostClassifier acc=0.8373
★ BEST update: BaggingClassifier acc=0.8735
★ BEST update: CalibratedClassifierCV acc=0.8855
★ BEST update: ExtraTreesClassifier acc=0.9036
★ BEST update: NuSVC acc=0.9277

=== BEST RESULT ===
Model: NuSVC
Accuracy: 0.927710843373494
              precision    recall  f1-score   support

     no_yawn       0.91      0.95      0.93        82
        yawn       0.95      0.90      0.93        84

    accuracy                           0.93       166
   macro avg       0.93      0.93      0.93       166
weighted avg       0.93      0.93      0.93       166



[6] 최적 모델 저장 <hr>

In [None]:
# -------------------------------------------------------------------------------------
# [6] 결과 정리 및 최적 모델 선정
# -------------------------------------------------------------------------------------
joblib.dump(best["model"], "best_yawn_model.joblib")
print("\nSaved -> best_yawn_model.joblib")


Saved -> best_yawn_model.joblib
