In [14]:
from __init__ import subject_number, path_data, path_model, model_name

import pandas as pd
import glob

# ファイルパスを取得
file_paths = glob.glob(path_data + '/*.csv')

# 認証したい方のパス
subject_path = 'out_no' + str(subject_number)

# 認証したい方のみのデータを抽出
user_file_paths = [file for file in file_paths if subject_path in file]
dfs_user = [pd.read_csv(file) for file in user_file_paths]
df_user = pd.concat(dfs_user, ignore_index=True)
df_d1 = pd.read_csv(path_data + '/' + subject_path + '_days' + str(1) + '.csv')
df_d2 = pd.read_csv(path_data + '/' + subject_path + '_days' + str(2) + '.csv')
df_d3 = pd.read_csv(path_data + '/' + subject_path + '_days' + str(3) + '.csv')
df_d4 = pd.read_csv(path_data + '/' + subject_path + '_days' + str(4) + '.csv')

# 認証対象以外のデータを抽出
allusers_file_paths = [file for file in file_paths if subject_path not in file]
dfs_all = [pd.read_csv(file) for file in allusers_file_paths]
df_all = pd.concat(dfs_all, ignore_index=True)
df_all['class'] = 99

ユーザの1日目のデータを利用してモデル生成

In [10]:
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Input
import pickle

# 1日目のデータを半分分ける．
df_d1_train, df_d1_test = train_test_split(df_d1, test_size=0.5, random_state=42)
# alldataをテストデータと分ける．
df_all_train, df_all_test = train_test_split(df_all, test_size=0.2, random_state=42)

# df_d1_trainとサンプリングしたdf_allを結合
df_train = pd.concat([df_d1_train, df_all_train], ignore_index=True)

# 訓練データ
X_train = df_train.iloc[:, 1:].apply(lambda x: (x - x.mean()) / x.std(), axis=0)
y_train = df_train['class'].apply(lambda x: 1 if x != subject_number else 0)

# テストデータ
df_test = pd.concat([df_d1_test, df_all_test], ignore_index=True)
X_test = df_test.iloc[:, 1:].apply(lambda x: (x - x.mean()) / x.std(), axis=0)
y_test = df_test['class'].apply(lambda x: 1 if x != subject_number else 0)

# モデル構成
model = Sequential()
model.add(Input(shape=(X_train.shape[1],)))  # 入力層
model.add(Dense(64, activation='relu'))  # 隠れ層1
model.add(Dense(32, activation='relu'))  # 隠れ層2
model.add(Dense(1, activation='sigmoid'))  # 出力層

# モデルのコンパイル
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# モデルの訓練
model.fit(X_train, y_train, epochs=50, batch_size=10, validation_split=0.2, verbose=0)

pickle.dump(model, open(path_model, 'wb'))

日付ごとに精度の比較

In [16]:
from sklearn.metrics import classification_report, confusion_matrix

# 1日目のデータを半分分ける．
df_d1_train, df_d1_test = train_test_split(df_d1, test_size=0.5, random_state=42)
# alldataをテストデータと分ける．
df_all_train, df_all_test = train_test_split(df_all, test_size=0.2, random_state=42)

# テストデータの選択
# df_test = pd.concat([df_d1_test, df_all_test], ignore_index=True) # 1日目の場合
# df_test = pd.concat([df_d2, df_all_test], ignore_index=True) # 2日目の場合
# df_test = pd.concat([df_d3, df_all_test], ignore_index=True) # 3日目の場合
df_test = pd.concat([df_d4, df_all_test], ignore_index=True) # 4日目の場合

X_test = df_test.iloc[:, 1:].apply(lambda x: (x - x.mean()) / x.std(), axis=0)
y_test = df_test['class'].apply(lambda x: 1 if x != subject_number else 0)

model = pickle.load(open(path_model, 'rb'))

# モデルの評価
y_pred = (model.predict(X_test) > 0.5).astype("int32")  # 予測値を0または1に変換

# 評価結果の表示
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred)) # ラベルデータ：ユーザの場合0，ユーザ以外を1

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[[16  4]
 [ 0 55]]
              precision    recall  f1-score   support

           0       1.00      0.80      0.89        20
           1       0.93      1.00      0.96        55

    accuracy                           0.95        75
   macro avg       0.97      0.90      0.93        75
weighted avg       0.95      0.95      0.94        75



<br><br><br><br><br><br><br>ユーザデータ全ての認証精度

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Input
from sklearn.metrics import classification_report, confusion_matrix

# df_userとdf_allを縦方向に結合
df = pd.concat([df_user, df_all], ignore_index=True)

# 行列の標準化
dfs = df.iloc[:, 1:].apply(lambda x: (x - x.mean()) / x.std(), axis=0)

# 特徴量とラベルを分ける
X = dfs # 特徴量データ
y = df['class'].apply(lambda x: 1 if x != subject_number else 0)  # ラベルデータ

# データを訓練用とテスト用に分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential()
model.add(Input(shape=(X_train.shape[1],)))  # 入力層
model.add(Dense(64, activation='relu'))  # 隠れ層1
model.add(Dense(32, activation='relu'))  # 隠れ層2
model.add(Dense(1, activation='sigmoid'))  # 出力層

# モデルのコンパイル
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# モデルの訓練
model.fit(X_train, y_train, epochs=50, batch_size=10, validation_split=0.2, verbose=0)

# モデルの評価
y_pred = (model.predict(X_test) > 0.5).astype("int32")  # 予測値を0または1に変換

# 評価結果の表示
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))