## 初期設定

In [1]:
exp_name = '8248_ImageFeature-Effnetb0TorchV_EvalGMM' # ファイル名とそろえる

In [2]:
import os, gc, json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.io as sio
from scipy.signal import resample
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GroupKFold
import lightgbm as lgb
import scipy.signal as signal
from tqdm.notebook import tqdm
from scipy.signal import welch
import pandas as pd
from tqdm import tqdm

import os
import numpy as np
import pandas as pd
import torch
import timm
from torch import nn
from torch.optim import AdamW
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToTensor
from sklearn.model_selection import train_test_split, KFold
from torch import nn, optim

In [3]:
train = sio.loadmat('dataset/train.mat')
test = sio.loadmat('dataset/test.mat')
reference = sio.loadmat('dataset/reference.mat')

In [4]:
# 筋電位データの特徴量の名前です
feature_name = ['TA R', 'TA L', 'LG R', 'LG L', 'RF R', 'RF L', 'VL R', 'VL L', 'ST R',
                'ST L', 'GMAX R', 'GMAX L', 'EMI R', 'EMI L', 'DEL R', 'DEL L']

"""
16個の変数はインデックスの小さい順番から順番に以下のようなデータが配列されている.
- TibialisAnterior Right(脛骨前筋 右)
- TibialisAnterior Left(脛骨前筋 左)
- GastrocnemiusLateralis Right(外側広筋 右)
- GastrocnemiusLateralis Left(外側広筋 左)
- QuadricepsRectusFemoris Right(大腿四頭筋 直腿筋 右)
- QuadricepsRectusFemoris Left(大腿四頭筋 直腿筋 左)
- QuadricepsVastusLateralis Right(大腿四頭筋 外側広筋 右)
- QuadricepsVastusLateralis Left(大腿四頭筋 外側広筋 左)
- Semitendinosus Right(半腱様筋 右)
- Semitendinosus Left(半腱様筋 左)
- GluteusMaximus Right(大殿筋 右)
- GluteusMaximus Left(大殿筋 左)
- ErectorSpinaeIliocostalis Right(脊柱起立筋 右)
- ErectorSpinaeIliocostalis Left(脊柱起立筋 左)
- DeltoideusMedius Right(三角筋 右)
- DeltoideusMedius Left(三角筋 左)
"""

'\n16個の変数はインデックスの小さい順番から順番に以下のようなデータが配列されている.\n- TibialisAnterior Right(脛骨前筋 右)\n- TibialisAnterior Left(脛骨前筋 左)\n- GastrocnemiusLateralis Right(外側広筋 右)\n- GastrocnemiusLateralis Left(外側広筋 左)\n- QuadricepsRectusFemoris Right(大腿四頭筋 直腿筋 右)\n- QuadricepsRectusFemoris Left(大腿四頭筋 直腿筋 左)\n- QuadricepsVastusLateralis Right(大腿四頭筋 外側広筋 右)\n- QuadricepsVastusLateralis Left(大腿四頭筋 外側広筋 左)\n- Semitendinosus Right(半腱様筋 右)\n- Semitendinosus Left(半腱様筋 左)\n- GluteusMaximus Right(大殿筋 右)\n- GluteusMaximus Left(大殿筋 左)\n- ErectorSpinaeIliocostalis Right(脊柱起立筋 右)\n- ErectorSpinaeIliocostalis Left(脊柱起立筋 左)\n- DeltoideusMedius Right(三角筋 右)\n- DeltoideusMedius Left(三角筋 左)\n'

In [5]:
output_folder = "output/"
os.makedirs(output_folder, exist_ok=True)
os.makedirs(os.path.join(output_folder, exp_name), exist_ok=True)

In [6]:
import random

# 乱数シードの設定
def seed_everything(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed = 42
seed_everything(seed)

In [7]:
import pickle

# データ保存の例
def save_data_with_pickle(data, file_path):
    with open(file_path, 'wb') as file:
        pickle.dump(data, file)

# データ読み込みの例
def load_data_with_pickle(file_path):
    with open(file_path, 'rb') as file:
        return pickle.load(file)

In [8]:
fold_num = 10

## 画像データ作成

In [9]:
for user_id in ["0001", "0002", "0003", "0004"]:
    x_array = train[user_id][0][0][0]
    y_array = train[user_id][0][0][1]
    
    # 形状を変更 (例: (319, 30, 3) -> (319, 3, 30))
    x_array = x_array.transpose(0, 2, 1)
    y_array = y_array.transpose(0, 2, 1)
    print(x_array.shape)  # デバッグ出力用
    print(y_array.shape)  # デバッグ出力用

    # 連結用リスト
    x_list = []
    y_list = []

    for i, x_arr in enumerate(x_array):
        x_df = pd.DataFrame()

        # DataFrameを作成
        for j, col in enumerate(feature_name):
            x_df[f"{col}"] = x_arr[:, j]
        
        # DataFrameをリストに追加
        x_list.append(x_df)

    for i, y_arr in enumerate(y_array):
        y_df = pd.DataFrame()

        # DataFrameを作成
        for j, col in enumerate(["vel_x", "vel_y", "vel_z"]):
            y_df[f"{col}"] = y_arr[:, j]
        
        # DataFrameをリストに追加
        y_list.append(y_df)
        
    # 各ユーザーのDataFrameを縦方向に連結
    combined_x_df = pd.concat(x_list, ignore_index=True)    
    combined_y_df = pd.concat(y_list, ignore_index=True)

(319, 1000, 16)
(319, 30, 3)
(300, 1000, 16)
(300, 30, 3)
(320, 1000, 16)
(320, 30, 3)
(320, 1000, 16)
(320, 30, 3)


In [10]:
# データの読み込みと前処理
def load_data_labels(output_folder, exp_name):
    labels = []

    for user_id in ["0001", "0002", "0003", "0004"]:
        y_array = train[user_id][0][0][1]
        y_array = y_array.transpose(0, 2, 1)
        
        for i in range(y_array.shape[0]):
            labels.append(y_array[i].reshape(-1))  # 90次元ベクトルに変換

    for user_id in ["0005"]:
        y_array = reference[user_id][0][0][1]
        y_array = y_array.transpose(0, 2, 1)
        
        for i in range(y_array.shape[0]):
            labels.append(y_array[i].reshape(-1))  # 90次元ベクトルに変換
    
    # ここでfeaturesとlabelsをNumpy配列に変換
    return np.array(labels)

In [11]:
# 正解ラベルの作成
labels = load_data_labels(output_folder, exp_name)

In [12]:
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

# 速度データから特徴量ベクトル作成
vel_features = []
print(len(labels))
for i in range(len(labels)):
    vel_x = labels[i][0::3]
    vel_y = labels[i][1::3]
    vel_z = labels[i][2::3]

    acc_x = np.diff(vel_x)
    acc_y = np.diff(vel_y)
    acc_z = np.diff(vel_z)

    sum_x = np.sum(vel_x)
    sum_y = np.sum(vel_y)
    sum_z = np.sum(vel_z)

    diff_x = vel_x[0] - vel_x[29]
    diff_y = vel_y[0] - vel_y[29]
    diff_z = vel_z[0] - vel_z[29]
    
    feature_vector = np.concatenate([vel_x, vel_y, vel_z, [sum_x, sum_y, sum_z]])
    vel_features.append(feature_vector)

1579


In [13]:
# PCAで次元削減
pca = PCA(n_components=10)
principal_components = pca.fit_transform(vel_features)

# Gaussian Mixture Modelを用いて3クラスに分類
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(n_components=3, random_state=seed)
gmm.fit(principal_components)
labels_probs = gmm.predict_proba(principal_components)  # 各クラスの確率
labels_gmm = gmm.predict(principal_components)  # 各データポイントのクラス


In [14]:
print(len(principal_components[labels_gmm == 0, 0]))
print(len(principal_components[labels_gmm == 1, 0]))
print(len(principal_components[labels_gmm == 2, 0]))
print(len(labels_gmm))
labels_gmm

636
431
512
1579


array([1, 0, 0, ..., 2, 0, 1])

In [15]:
labels_probs

array([[2.86865937e-250, 1.00000000e+000, 4.08046809e-027],
       [1.00000000e+000, 3.94527460e-286, 3.62326933e-047],
       [1.00000000e+000, 5.63792605e-207, 1.21654599e-075],
       ...,
       [7.84428548e-112, 1.61659361e-045, 1.00000000e+000],
       [1.00000000e+000, 2.77156674e-231, 3.64891571e-100],
       [1.71210341e-310, 1.00000000e+000, 1.31253177e-010]])

## 判定結果からクラス分類

In [16]:
exp_name_woCatg = '8227_ImageFeature-Effnetb0TorchV'

In [17]:
def extract_features(vel_x, vel_y, vel_z):
    acc_x = np.diff(vel_x)
    acc_y = np.diff(vel_y)
    acc_z = np.diff(vel_z)

    sum_x = np.sum(vel_x)
    sum_y = np.sum(vel_y)
    sum_z = np.sum(vel_z)
    
    feature_vector = np.concatenate([vel_x, vel_y, vel_z, [sum_x, sum_y, sum_z]])
    return feature_vector

# ユーザーごとに処理
all_train_classes = []
all_train_classes_true = []
all_test_classes = []

all_train_probs = []
all_train_probs_true = []
all_test_probs = []
for user_id in ["0001", "0002", "0003", "0004", "0005"]:

    # 各モデルのデータを結合
    train_path_woCatg = f"{output_folder}/{exp_name_woCatg}/train_{user_id}.csv"
    test_path_woCatg = f"{output_folder}/{exp_name_woCatg}/test_{user_id}.csv"
    
    # 訓練データの読み込み
    train_data_woCatg = pd.read_csv(train_path_woCatg)
    test_data_woCatg = pd.read_csv(test_path_woCatg)
    
    # 予測値のみ抽出し、列名にモデル名を追加
    train_predicted_woCatg = train_data_woCatg.filter(regex='_predicted')
    train_predicted_woCatg_true = train_data_woCatg.loc[:, ~train_data_woCatg.columns.str.contains('_predicted')]
    test_predicted_woCatg = test_data_woCatg.filter(regex='_predicted')

    # トライアルごとに特徴量を抽出し、PCAで次元削減し、クラス分類
    train_classes = []
    train_classes_true = []
    test_classes = []

    train_probs = []
    train_probs_true = []
    test_probs = []
    
    num_trials = len(train_predicted_woCatg) // 30  # 各trialには30点のデータがある
    
    for trial_index in range(num_trials):
        # 該当するtrialのデータを取得
        start_idx = trial_index * 30
        end_idx = start_idx + 30
        train_predicted_woCatg_data = train_predicted_woCatg.iloc[start_idx:end_idx].to_numpy()
        train_predicted_woCatg_data_true = train_predicted_woCatg_true.iloc[start_idx:end_idx].to_numpy()
        test_predicted_woCatg_data = test_predicted_woCatg.iloc[start_idx:end_idx].to_numpy()
    
        vel_x = train_predicted_woCatg_data[:,0]
        vel_y = train_predicted_woCatg_data[:,1]
        vel_z = train_predicted_woCatg_data[:,2]
        vel_x_true = train_predicted_woCatg_data_true[:,0]
        vel_y_true = train_predicted_woCatg_data_true[:,1]
        vel_z_true = train_predicted_woCatg_data_true[:,2]
        vel_x_test = - test_predicted_woCatg_data[:,0] # x,yは反転
        vel_y_test = - test_predicted_woCatg_data[:,1] # x,yは反転
        vel_z_test = test_predicted_woCatg_data[:,2]
        
    
        # 特徴量を抽出
        features = extract_features(vel_x, vel_y, vel_z)
        features_true = extract_features(vel_x_true, vel_y_true, vel_z_true)
        features_test = extract_features(vel_x_test, vel_y_test, vel_z_test)
    
        # PCAで次元削減
        principal_components = pca.transform([features])  # 事前にfit_transformされたPCAを使用
        principal_components_true = pca.transform([features_true])  # 事前にfit_transformされたPCAを使用
        principal_components_test = pca.transform([features_test])  # 事前にfit_transformされたPCAを使用
    
        # GMMで分類
        train_class = gmm.predict(principal_components)
        train_classes.append(train_class)  # 分類結果をリストに追加
        train_prob = gmm.predict_proba(principal_components)
        train_probs.append(train_prob)  # 分類結果をリストに追加
        
        train_class_true = gmm.predict(principal_components_true)
        train_classes_true.append(train_class_true)  # 分類結果をリストに追加
        train_prob_true = gmm.predict_proba(principal_components_true)
        train_probs_true.append(train_prob_true)  # 分類結果をリストに追加

        test_class = gmm.predict(principal_components_test)
        test_classes.append(test_class)  # 分類結果をリストに追加
        test_prob = gmm.predict_proba(principal_components_test)
        test_probs.append(test_prob)  # 分類結果をリストに追加
    
    # 分類結果の出力
    all_train_classes.extend(train_classes)
    all_train_classes_true.extend(train_classes_true)
    all_test_classes.extend(test_classes)

    all_train_probs.extend(train_probs)
    all_train_probs_true.extend(train_probs_true)
    all_test_probs.extend(test_probs)


FileNotFoundError: [Errno 2] No such file or directory: 'output//8227_ImageFeature-Effnetb0TorchV/train_0001.csv'

### trainの事前分類、予測後の一致率

In [None]:
# 一致する要素の数を数え、一致率を計算
matching_count = sum([1 for a, b in zip(all_train_classes, all_train_classes_true) if a == b])
total_count = len(all_train_classes)
match_percentage = (matching_count / total_count) * 100

match_percentage

### trainの予測後、testの予測後の一致率

In [None]:
# 一致する要素の数を数え、一致率を計算
matching_count = sum([1 for a, b in zip(all_train_classes, all_test_classes) if a == b])
total_count = len(all_train_classes)
match_percentage = (matching_count / total_count) * 100

match_percentage

# クラス分類結果を出力

In [None]:
np.savetxt(os.path.join(output_folder, exp_name, f'all_train_classes.csv'), all_train_classes,  fmt="%s")
np.savetxt(os.path.join(output_folder, exp_name, f'all_train_classes_true.csv'), all_train_classes_true,  fmt="%s")
np.savetxt(os.path.join(output_folder, exp_name, f'all_test_classes.csv'), all_test_classes,  fmt="%s")


In [18]:
all_train_probs_array = np.vstack([prob[0] for prob in all_train_probs])
np.savetxt(os.path.join(output_folder, exp_name, f'all_train_probs.csv'), all_train_probs_array, delimiter=",", fmt="%.18e")

all_train_probs_true_array = np.vstack([prob[0] for prob in all_train_probs_true])
np.savetxt(os.path.join(output_folder, exp_name, f'all_train_probs_true.csv'), all_train_probs_true_array, delimiter=",", fmt="%.18e")

all_test_probs_array = np.vstack([prob[0] for prob in all_test_probs])
np.savetxt(os.path.join(output_folder, exp_name, f'all_test_probs.csv'), all_test_probs_array, delimiter=",", fmt="%.18e")

ValueError: need at least one array to concatenate

In [19]:
all_train_probs_true_array

NameError: name 'all_train_probs_true_array' is not defined

ここまで