# 脆性X综合征与正常人脸二分类非端到端模型

In [2]:
from deepface import DeepFace
import os
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.decomposition import PCA
from sklearn.svm import SVC
import math
import joblib
from sklearn.metrics import classification_report

设置数据源路径以及保存模型位置

In [3]:
NORMALIZED_IMG_DIR = './dataset/'
clf_model_name = 'Fragile_X-normal.pkl'
pca_model_name = 'Fragile_X-normal_pca.pkl'

读取所有遗传病类型

In [4]:
class_list = os.listdir(NORMALIZED_IMG_DIR)

引入FaceNet

In [5]:
model = DeepFace.build_model('Facenet')

使用FaceNet获取人脸embedding特征向量

In [6]:
embeddings = {}

In [7]:
for class_name in class_list:
#     embeddings[class_name] = DeepFace.represent(os.listdir(os.path.join(NORMALIZED_IMG_DIR, class_name)), model_name = 'Facenet', enforce_detection=False)
    embeddings[class_name] = []
    for file_name in os.listdir(os.path.join(NORMALIZED_IMG_DIR, class_name)):
        print(f"getting {file_name}'s embedding")
        embeddings[class_name].append(
            DeepFace.represent(os.path.join(NORMALIZED_IMG_DIR, class_name, file_name), model_name = 'Facenet', model=model, enforce_detection=False)
        )

getting 100.jpg's embedding
getting 101.jpg's embedding
getting 102.jpg's embedding
getting 107.jpg's embedding
getting 111.jpg's embedding
getting 115.jpg's embedding
getting 116.jpg's embedding
getting 117.jpg's embedding
getting 118.jpg's embedding
getting 119.jpg's embedding
getting 122.jpg's embedding
getting 124.jpg's embedding
getting 128.jpg's embedding
getting 129.jpg's embedding
getting 130.jpg's embedding
getting 132.jpg's embedding
getting 133.jpg's embedding
getting 134.jpg's embedding
getting 135.jpg's embedding
getting 136.jpg's embedding
getting 137.jpg's embedding
getting 138.jpg's embedding
getting 139.jpg's embedding
getting 14.jpg's embedding
getting 140.jpg's embedding
getting 141.jpg's embedding
getting 142.jpg's embedding
getting 143.jpg's embedding
getting 144.jpg's embedding
getting 145.jpg's embedding
getting 146.jpg's embedding
getting 147.jpg's embedding
getting 15.jpg's embedding
getting 157.jpg's embedding
getting 25.jpg's embedding
getting 26.jpg's embedd

In [8]:
X = []
Y = []
for key, value in embeddings.items():
    X = X + value
    Y = Y + [key] * len(value)

In [10]:
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2)

网格搜索

In [11]:
best_score = -1
best_kernel = ''
best_pca_dim = -1
min_dim = math.inf
for class_name in class_list:
    min_dim = min(min_dim, len(os.listdir(os.path.join(NORMALIZED_IMG_DIR, class_name))))
min_dim = min(min_dim, 129)
print(f'searching pca dim in range 1 t4o {min_dim}')
for n_dim in range(1, min_dim):
    pca = PCA(n_components=n_dim)
    pca = pca.fit(X)
    X_dr = pca.transform(Xtrain)
    kernels = ["linear","poly","rbf","sigmoid"]
    for kernel in kernels:
        clf = SVC(kernel=kernel)
        score = cross_val_score(clf, X_dr, Ytrain, cv=7, scoring='accuracy').mean()
        if score >= best_score:
            best_score = score
            best_pca_dim = n_dim
            best_kernel = kernel
            print("The accuracy under kernel %s and pca dimension %d is %f" % (kernel, n_dim, score))

searching pca dim in range 1 t4o 89
The accuracy under kernel linear and pca dimension 1 is 0.647279
The accuracy under kernel linear and pca dimension 2 is 0.647279
The accuracy under kernel poly and pca dimension 2 is 0.704082
The accuracy under kernel rbf and pca dimension 2 is 0.711224
The accuracy under kernel linear and pca dimension 3 is 0.739116
The accuracy under kernel poly and pca dimension 3 is 0.781633
The accuracy under kernel poly and pca dimension 4 is 0.788435
The accuracy under kernel rbf and pca dimension 12 is 0.802381
The accuracy under kernel rbf and pca dimension 13 is 0.802381
The accuracy under kernel rbf and pca dimension 14 is 0.809524
The accuracy under kernel rbf and pca dimension 21 is 0.816667


In [12]:
pca = PCA(n_components=best_pca_dim)
pca = pca.fit(X)
X_dr = pca.transform(X)
joblib.dump(pca, pca_model_name)

['Fragile_X-normal_pca.pkl']

In [13]:
clf = SVC(kernel=best_kernel, probability = True)
X_train_dr = pca.transform(Xtrain)
X_test_dr = pca.transform(Xtest)
clf.fit(X_train_dr, Ytrain)
Y_test_predict = clf.predict(X_test_dr)
target_names = class_list
print(classification_report(Ytest, Y_test_predict, target_names=target_names))

              precision    recall  f1-score   support

   Fragile_X       0.79      0.95      0.86        20
      normal       0.92      0.69      0.79        16

    accuracy                           0.83        36
   macro avg       0.85      0.82      0.82        36
weighted avg       0.85      0.83      0.83        36



In [14]:
joblib.dump(clf, clf_model_name)

['Fragile_X-normal.pkl']