# [이미지 불러와서 데이터 셋 저장]

In [2]:
# 일반적인 포멧의 이미지 처리 모듈
import cv2
# 시각화
import matplotlib.pyplot as plt
# 이미지 데이터가 저장된 타입 관련 모듈
import numpy as np
# 폴더, 파일, 경로 관련 모듈
import os 

import koreanize_matplotlib

import joblib  # 모델 저장용

from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report


In [3]:
#pip install scikit-image

In [3]:
from skimage.feature import hog

In [5]:
# 1. HOG 특징 추출 함수
def extract_hog_features(img, img_size=(128, 128)):
    img = cv2.resize(img, img_size)
    features, _ = hog(img, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    return features

# 2. 이미지 불러오기 + 라벨링
def load_target_and_others(target_dir='./data/image/gray', others_dir='./data/image/gray_others'):
    X = []
    y = []

    # 타겟 이미지 (label = 1)
    for file in os.listdir(target_dir):
        if file.lower().endswith(('.jpg', '.png','.jpeg')):
            path = os.path.join(target_dir, file)
            img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                features = extract_hog_features(img)
                X.append(features)
                y.append(1)

    # Others 이미지 (label = 0)
    for file in os.listdir(others_dir):
        if file.lower().endswith(('.jpg', '.png')):
            path = os.path.join(others_dir, file)
            img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                features = extract_hog_features(img)
                X.append(features)
                y.append(0)

    return np.array(X), np.array(y)

# 3. 데이터 로딩 및 전처리
X, y = load_target_and_others()

# 4. 학습/테스트 분할
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# # 5. 모델 학습
# scv_model = SVC(kernel='linear', probability=True)
# scv_model.fit(X_train, y_train)

# # 6. 예측 및 평가
# y_pred = scv_model.predict(X_test)
# print("✅ 정확도:", accuracy_score(y_test, y_pred))
# print("📊 분류 리포트:\n", classification_report(y_test, y_pred))

In [6]:
def test_model_on_image(image_path, model, target_name="Target"):
    # 1. 얼굴 검출기
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    # 2. 이미지 읽기
    img = cv2.imread(image_path)
    if img is None:
        print("❌ 이미지 로딩 실패")
        return

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # 3. 얼굴 검출
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)
    print(f"🔍 감지된 얼굴 수: {len(faces)}")

    for (x, y, w, h) in faces:
        face_img = gray[y:y+h, x:x+w]
        features = extract_hog_features(face_img)

        # 4. 예측
        prediction = model.predict([features])[0]
        label = target_name if prediction == 1 else "Others"

        # 5. 결과 시각화
        color = (0, 255, 0) if prediction == 1 else (0, 0, 255)
        cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
        cv2.putText(img, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

    # 6. 결과 보기
    cv2.imshow("검증 결과", img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [7]:
#test_model_on_image('./test/IMG_4736.jpg', scv_, target_name="Karina")

In [8]:
from sklearn.metrics import accuracy_score, classification_report

def evaluate_model(model, X_test, y_test, model_name="Model"):
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"\n📌 [{model_name}] 평가 결과")
    print(f"✅ 정확도: {acc:.4f}")
    print("📊 분류 리포트:")
    print(classification_report(y_test, y_pred))

In [9]:
from sklearn.ensemble import RandomForestClassifier

def train_model_random_forest(X_train, y_train, X_test, y_test):
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    evaluate_model(model, X_test, y_test, "Random Forest")
    return model

from sklearn.linear_model import LogisticRegression

def train_model_logistic(X_train, y_train, X_test, y_test):
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)
    evaluate_model(model, X_test, y_test, "Logistic Regression")
    return model

In [10]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

# 1. SVC (선형)
def train_model_svc_linear(X_train, y_train, X_test, y_test):
    model = SVC(kernel='linear', probability=True)
    model.fit(X_train, y_train)
    evaluate_model(model, X_test, y_test, "SVC (linear)")
    return model

# 2. SVC (RBF)
def train_model_svc_rbf(X_train, y_train, X_test, y_test):
    model = SVC(kernel='rbf', probability=True)
    model.fit(X_train, y_train)
    evaluate_model(model, X_test, y_test, "SVC (RBF)")
    return model

# 3. Random Forest
def train_model_random_forest(X_train, y_train, X_test, y_test):
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    evaluate_model(model, X_test, y_test, "Random Forest")
    return model

# 4. Decision Tree
def train_model_decision_tree(X_train, y_train, X_test, y_test):
    model = DecisionTreeClassifier(random_state=42)
    model.fit(X_train, y_train)
    evaluate_model(model, X_test, y_test, "Decision Tree")
    return model

# 5. Logistic Regression
def train_model_logistic(X_train, y_train, X_test, y_test):
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)
    evaluate_model(model, X_test, y_test, "Logistic Regression")
    return model

# 6. KNN
def train_model_knn(X_train, y_train, X_test, y_test):
    model = KNeighborsClassifier(n_neighbors=3)
    model.fit(X_train, y_train)
    evaluate_model(model, X_test, y_test, "KNN")
    return model

# 7. Naive Bayes
def train_model_naive_bayes(X_train, y_train, X_test, y_test):
    model = GaussianNB()
    model.fit(X_train, y_train)
    evaluate_model(model, X_test, y_test, "Naive Bayes")
    return model

In [11]:
# 예시: 랜덤 포레스트 모델 학습 및 평가
rf_model = train_model_random_forest(X_train, y_train, X_test, y_test)

# 예시: KNN 모델 학습 및 평가
knn_model = train_model_knn(X_train, y_train, X_test, y_test)

scv_lr_model = train_model_svc_linear(X_train,y_train, X_test,y_test)

scv_RBF_model = train_model_svc_rbf(X_train,y_train, X_test,y_test)

dt_model = train_model_decision_tree(X_train,y_train, X_test,y_test)

log_model = train_model_logistic(X_train,y_train, X_test,y_test)



📌 [Random Forest] 평가 결과
✅ 정확도: 0.7570
📊 분류 리포트:
              precision    recall  f1-score   support

           0       0.74      0.80      0.77       288
           1       0.78      0.71      0.74       280

    accuracy                           0.76       568
   macro avg       0.76      0.76      0.76       568
weighted avg       0.76      0.76      0.76       568


📌 [KNN] 평가 결과
✅ 정확도: 0.7113
📊 분류 리포트:
              precision    recall  f1-score   support

           0       0.76      0.63      0.69       288
           1       0.68      0.79      0.73       280

    accuracy                           0.71       568
   macro avg       0.72      0.71      0.71       568
weighted avg       0.72      0.71      0.71       568


📌 [SVC (linear)] 평가 결과
✅ 정확도: 0.8451
📊 분류 리포트:
              precision    recall  f1-score   support

           0       0.85      0.84      0.85       288
           1       0.84      0.85      0.84       280

    accuracy                           0.85   

In [18]:
test_model_on_image('./test/IMG_4728.jpg', knn_model, target_name="Karina") 

🔍 감지된 얼굴 수: 4


In [13]:
joblib.dump(scv_RBF_model, 'model.pkl')
print("✅ 모델 저장 완료: model.pkl")

✅ 모델 저장 완료: model.pkl


In [1]:
accuracies = {
    "SVC_liner": accuracy_score(y_test, scv_lr_model.predict(X_test)),
    "Random Forest": accuracy_score(y_test, rf_model.predict(X_test)),
    "Logistic Regression": accuracy_score(y_test, log_model.predict(X_test)),
    "scv_RBF_model": accuracy_score(y_test, scv_RBF_model.predict(X_test)),
    "KNN_model": accuracy_score(y_test, knn_model.predict(X_test)),
    "Decision Tree": accuracy_score(y_test, dt_model.predict(X_test)),
}

import matplotlib.pyplot as plt
plt.figure(figsize=(15,10))
plt.bar(accuracies.keys(), accuracies.values(), color='skyblue')
plt.ylabel("Accuracy")
plt.title("모델별 정확도 비교")
plt.ylim(0, 1)
plt.show()

NameError: name 'accuracy_score' is not defined