In [11]:
import os
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:2"

In [12]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
import torch.utils.data as utils


import pandas as pd
import glob
import os
import sys
from tqdm.notebook import tqdm

import random
import pickle


import numpy as np

from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
import warnings
from sklearn.model_selection import train_test_split

from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.model_selection import GroupKFold

# from scipy.spatial import distance
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from fastdtw import fastdtw
from tslearn.neighbors import KNeighborsTimeSeriesClassifier
from sklearn.ensemble import RandomForestClassifier

# from sklearn.tree import DecisionTreeClassifier
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.linear_model import LogisticRegression
# from sklearn.naive_bayes import GaussianNB
# from sklearn.neighbors import KNeighborsClassifier

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score

import copy
# import datetime
import itertools

from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import KFold
import matplotlib.ticker as ticker

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

from natsort import natsorted



In [13]:
SENSORS_NUM = 16

# 関数

In [14]:
def fix_seed(seed):
    # random
    random.seed(seed)
    # Numpy
    np.random.seed(seed)
    # Pytorch
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    # Tensorflow
    torch.backends.cudnn.benchmark = False
#     torch.backends.cudnn.benchmark = True
    os.environ['PYTHONHASHSEED'] = str(seed)
    
    torch.use_deterministic_algorithms = True

# SEED = 0
# fix_seed(SEED)

## ファインチューニング

### CNN

In [15]:
def pipeline_cnn_finetuning(train_list_df, test_list_df, DIR_OUT, count, pre_best_eval_acc, conv_size, model_path, num_epochs):

    over_sensors_num = 4
    image_size_height = 16+over_sensors_num
    image_size_width = len([col for col in test_list_df[0].columns if str(col).isdecimal()])

    # 画像に直す
    # dataframeに直してるので時間かかる
    def sensor_to_image(list_df):
        data_list_df = []
        label_list = []
        for i in tqdm(range(len(list_df[0]))):
            temp_list_df = []
            label_list.append(list_df[0].iloc[i]['Label'])

            for df in list_df:
                temp_list_df.append(pd.DataFrame(df.iloc[i]).T.drop(['Label', 'Trial', 'Label_Trial'], axis=1))

            data_list_df.append(pd.concat(temp_list_df, sort=False))
            
        # センサ順を一周するよう入れ替え、over_sensors_num分上のセンサを下に延長
        for i in range(len(list_df[0])):
            data_list_df[i] = data_list_df[i].reset_index(drop=True).reindex(index=[0, 1, 2,3,4,5,6,15,14,13,12,11,10,9,8,7]).reset_index(drop=True)
            data_list_df[i] = pd.concat([data_list_df[i], data_list_df[i].iloc[:over_sensors_num]]).reset_index(drop=True)
 
        return data_list_df, label_list

    train_image_list, train_label_list = sensor_to_image(train_list_df)
    test_image_list, test_label_list = sensor_to_image(test_list_df)


    def df_to_nd(image_list):
        for i in range(len(image_list)):
            image_list[i] = image_list[i].values

        image_list = np.array(image_list)

        return image_list
    
    train_image_list = df_to_nd(train_image_list)
    train_image_list = np.reshape(train_image_list, (len(train_list_df[0]), 1, image_size_height, image_size_width), order='F')

    test_image_list = df_to_nd(test_image_list)
    test_image_list = np.reshape(test_image_list, (len(test_list_df[0]), 1, image_size_height, image_size_width), order='F')



    def make_label(label_list):
        label_list = label_list.astype(object)
        label_dict = {}
        for i, label in enumerate(sorted(set(label_list), key=list(label_list).index)):
            label_dict[i] = label
            np.putmask(label_list, label_list == label, i)
        label_list = label_list.astype(int)

        return label_list, label_dict

    train_label_list, train_label_dict = make_label(np.array(train_label_list))
    test_label_list, test_label_dict = make_label(np.array(test_label_list))


    x_train_tensor = torch.from_numpy(train_image_list.astype(float))
    x_test_tensor = torch.from_numpy(test_image_list.astype(float))
    y_train_tensor = torch.from_numpy(train_label_list).to(dtype=torch.long)
    y_test_tensor = torch.from_numpy(test_label_list).to(dtype=torch.long)


    train_dataset = utils.TensorDataset(x_train_tensor,y_train_tensor)
    test_dataset = utils.TensorDataset(x_test_tensor,y_test_tensor)


    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=60, 
                                               shuffle=True,
                                              num_workers=0)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                               batch_size=60, 
                                               shuffle=False,
                                             num_workers=0)



    
    num_classes = len(test_list_df[0]['Label'].unique())

    class CNN(nn.Module):

        def __init__(self, num_classes, size_check, conv_size):
            super(CNN, self).__init__()
            self.features = nn.Sequential(
                nn.Conv2d(1, 512, kernel_size=(5, conv_size), padding=0),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=2, stride=2),
                # nn.MaxPool2d(kernel_size=(1,2), stride=2),
#                 nn.Dropout(0.2),
                
#                 nn.Conv2d(512, 1024, kernel_size=(3, 5), padding=0),
#                 nn.ReLU(inplace=True),
#                 nn.MaxPool2d(kernel_size=2, stride=2),
            )
            self.liniear_input_size = self.features(size_check).size()
            self.liniear_input_size = self.liniear_input_size[1]*self.liniear_input_size[2]*self.liniear_input_size[3]
            self.classifier = nn.Sequential(
                nn.Linear(self.liniear_input_size, num_classes),
    #             nn.Softmax(dim=1),
            )

        def forward(self, x):
            x = self.features(x)
            x = x.view(x.size(0), -1)
            x = self.classifier(x)
            return x

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    net = CNN(num_classes, torch.FloatTensor(60, 1, image_size_height, image_size_width), conv_size).to(device)
    
    # モデルロード
    net.load_state_dict(torch.load(model_path))
    # 重み固定
    for i, param in enumerate(net.parameters()):
        if i <= 1:
            param.requires_grad=False
    # 0:Conv2dのパラメータ 1:Conv2dのバイアス 2:classifierのパラメータ 3:classifierのバイアス

    
    criterion = nn.CrossEntropyLoss()
    # optimizer = optim.SGD(net.parameters(), lr=10**-5, momentum=0.9, weight_decay=5e-4)
    optimizer = optim.SGD(net.parameters(), lr=10**-6, momentum=0.9)
    # optimizer = optim.Adam(net.parameters(), lr=10**-3)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)




    train_loss_list = []
    train_acc_list = []
    train_f_list = []
    val_loss_list = []
    val_acc_list = []
    val_f_list = []
    
    best_eval_acc = 0
    not_up_counter = 0 

    for epoch in range(num_epochs):
        train_loss = 0
        train_acc = 0
        train_f = 0    
        val_loss = 0
        val_acc = 0
        val_f = 0
        tmp_pred_list_by_subject = []
        tmp_label_list_by_subject = []  
        
        #train
        net.train()
        for i, (images, labels) in enumerate(train_loader):
            #view()での変換をしない
            images, labels = images.to(device, dtype=torch.float), labels.to(device)

            optimizer.zero_grad()
            outputs = net(images)
            loss = criterion(outputs, labels)
            train_loss += loss.item()
            train_acc += (outputs.max(1)[1] == labels).sum().item()
            train_f += f1_score(outputs.max(1)[1].cpu(), labels.cpu(), average='macro') * len(labels)
            #   print(outputs.max(1)[1])
            #   print(labels)
            loss.backward()
            optimizer.step()

        avg_train_loss = train_loss / len(train_loader.dataset)
        avg_train_acc = train_acc / len(train_loader.dataset)
        avg_train_f = train_f / len(train_loader.dataset)

        #val
        net.eval()
        with torch.no_grad():
          for images, labels in test_loader:
            #view()での変換をしない
            images = images.to(device, dtype=torch.float)
            labels = labels.to(device)
            outputs = net(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            val_acc += (outputs.max(1)[1] == labels).sum().item()
            val_f += f1_score(outputs.max(1)[1].cpu(), labels.cpu(), average='macro') * len(labels)

#             if epoch == (num_epochs-1):
            tmp_pred_list_by_subject += [test_label_dict.get(x,x) for x in outputs.max(1)[1].tolist()]
            tmp_label_list_by_subject += [test_label_dict.get(x,x) for x in labels.tolist()]

        avg_val_loss = val_loss / len(test_loader.dataset)
        avg_val_acc = val_acc / len(test_loader.dataset)
        avg_val_f = val_f / len(test_loader.dataset)

        if (epoch % 10) == 0:
            print ('Epoch [{}/{}], Loss: {loss:.4f}, val_loss: {val_loss:.4f}, val_acc: {val_acc:.4f}, val_f: {val_f:.4f}' 
                        .format(epoch+1, num_epochs, i+1, loss=avg_train_loss, val_loss=avg_val_loss, val_acc=avg_val_acc, val_f=avg_val_f))
            
        train_loss_list.append(avg_train_loss)
        train_acc_list.append(avg_train_acc)
        train_f_list.append(avg_train_f)
        val_loss_list.append(avg_val_loss)
        val_acc_list.append(avg_val_acc)
        val_f_list.append(avg_val_f)
        
        if avg_val_acc > best_eval_acc:
            best_eval_acc = avg_val_acc
            pred_list_by_subject = copy.copy(tmp_pred_list_by_subject)
            label_list_by_subject = copy.copy(tmp_label_list_by_subject)
            not_up_counter = 0  
        else:
            not_up_counter += 1
        # if not_up_counter == 100:
        #     break

    #     scheduler.step()



    if best_eval_acc > pre_best_eval_acc:

        plt.figure()
        plt.plot(range(epoch+1), train_loss_list, color='blue', linestyle='-', label='train_loss')
        plt.plot(range(epoch+1), val_loss_list, color='green', linestyle='--', label='val_loss')
        plt.legend()
        plt.xlim(0, num_epochs)
        plt.xlabel('epoch')
        plt.ylabel('loss')
        plt.title('Training and validation loss')
        plt.grid()
        plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_loss.png")
        plt.close()

        plt.figure()
        plt.plot(range(epoch+1-100, epoch+1), train_loss_list[epoch+1-100:], color='blue', linestyle='-', label='train_loss')
        plt.plot(range(epoch+1-100, epoch+1), val_loss_list[epoch+1-100:], color='green', linestyle='--', label='val_loss')
        plt.legend()
        plt.xlabel('epoch')
        plt.ylabel('loss')
        plt.title('Training and validation loss (last 100 epock)')
        plt.grid()
        plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_miniloss.png")
        plt.close()

        plt.figure()
        plt.plot(range(epoch+1), train_f_list, color='blue', linestyle='-', label='train_f')
        plt.plot(range(epoch+1), val_f_list, color='green', linestyle='--', label='val_f')
        plt.legend()
        plt.xlim(0, num_epochs)
        plt.ylim(0, 1)
        plt.xlabel('epoch')
        plt.ylabel('f1-score')
        plt.title('Training and validation f1-score')
        plt.grid()
        plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_f1.png")
        plt.close()

        plt.figure()
        plt.plot(range(epoch+1), train_acc_list, color='blue', linestyle='-', label='train_acc')
        plt.plot(range(epoch+1), val_acc_list, color='green', linestyle='--', label='val_acc')
        plt.legend()
        plt.xlim(0, num_epochs)
        plt.ylim(0, 1)
        plt.xlabel('epoch')
        plt.ylabel('acc')
        plt.title('Training and validation accuracy')
        plt.grid()
        plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_acc.png")
        plt.close()




    # return pred_list_by_subject, label_list_by_subject, best_eval_acc
    return tmp_pred_list_by_subject, tmp_label_list_by_subject, avg_val_acc

### LSTM

In [16]:
def pipeline_lstm_finetuning(train_list_df, test_list_df, DIR_OUT, count, pre_best_eval_acc, num_epochs, hidden_size, batch_size, num_layers, is_bi, mabiki_interval, model_path):
    
    num_classes = len(test_list_df[0]['Label'].unique())

#     hidden_size = 500 #lstmの出力次元

    input_dim = SENSORS_NUM
#     batch_size = 40 #バッチサイズ
    output_size = num_classes #fcの出力次元

    # 画像に直す
    # dataframeに直してるので時間かかる
    def sensor_to_image(list_df):
        data_list = []
        label_list = []
        for i in tqdm(range(len(list_df[0]))):
            time_series_list = []
            label_list.append(list_df[0].iloc[i]['Label'])

    #         for one_sensor_df in list_df:
    #             time_series_list.append(list(one_sensor_df.drop(['Label', 'Trial', 'Label_Trial'], axis=1).iloc[i]))
            # センサ順を直しながらデータ作成
            for j in [0, 1, 2,3,4,5,6,15,14,13,12,11,10,9,8,7]:
                one_sensor_df = list_df[j].copy()
                time_series_list.append(list(one_sensor_df.drop(['Label', 'Trial', 'Label_Trial'], axis=1).iloc[i]))

            data_list.append(np.array(time_series_list).T)       

        return np.array(data_list), np.array(label_list)

    train_image_list, train_label_list = sensor_to_image(train_list_df)
    test_image_list, test_label_list = sensor_to_image(test_list_df)



    def make_label(label_list):
        label_list = label_list.astype(object)
        label_dict = {}
        for i, label in enumerate(sorted(set(label_list), key=list(label_list).index)):
            label_dict[i] = label
            np.putmask(label_list, label_list == label, i)
        label_list = label_list.astype(int)

        return label_list, label_dict

    train_label_list, train_label_dict = make_label(np.array(train_label_list))
    test_label_list, test_label_dict = make_label(np.array(test_label_list))

    # 間引く
    train_image_list = train_image_list[:,::mabiki_interval,:]
    test_image_list = test_image_list[:,::mabiki_interval,:]

    x_train_tensor = torch.from_numpy(train_image_list.astype(float))
    x_test_tensor = torch.from_numpy(test_image_list.astype(float))
    y_train_tensor = torch.from_numpy(train_label_list).to(dtype=torch.long)
    y_test_tensor = torch.from_numpy(test_label_list).to(dtype=torch.long)


    train_dataset = utils.TensorDataset(x_train_tensor,y_train_tensor)
    test_dataset = utils.TensorDataset(x_test_tensor,y_test_tensor)


    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=batch_size, 
                                               shuffle=True,
                                              num_workers=0)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                               batch_size=batch_size, 
                                               shuffle=False,
                                             num_workers=0)


#     g = torch.Generator()
#     g.manual_seed(0)

# #     DataLoader(
# #         train_dataset,
# #         batch_size=batch_size,
# #         num_workers=num_workers,
# #         worker_init_fn=seed_worker,
# #         generator=g,
# #     )

#     train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
#                                                batch_size=batch_size, 
#                                                shuffle=True,
#                                               num_workers=0,
#                                                 worker_init_fn=seed_worker,
#                                                 generator=g,)
#     test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
#                                                batch_size=batch_size, 
#                                                shuffle=False,
#                                              num_workers=0,
#                                                 worker_init_fn=seed_worker,
#                                                 generator=g,)



    class LstmClassifier(nn.Module):
        def __init__(self, input_dim, batch_size, hidden_size, output_size, num_layers, is_bi):
            super(LstmClassifier, self).__init__()
            self.input_dim = input_dim
            self.batch_size = batch_size
            self.hidden_size = hidden_size
            self.output_size = output_size
            self.num_layers = num_layers
            self.is_bi = is_bi

            self.lstm = nn.LSTM(input_dim, hidden_size, batch_first=True, num_layers=num_layers, bidirectional=is_bi) #batch_first=Trueにしてる
#             self.lstm = nn.GRU(input_dim, hidden_size, batch_first=True, num_layers=num_layers, bidirectional=is_bi) #batch_first=Trueにしてる
#         self.relu = nn.ReLU(inplace=True)
            self.fc = nn.Linear(hidden_size, output_size)

        def forward(self, x):
    #         x = self.embed(x)
            # 初期隠れ状態とセル状態を設定
            h0 = torch.zeros(1, self.batch_size, self.hidden_size).to(device)
            c0 = torch.zeros(1, self.batch_size, self.hidden_size).to(device)
            # LSTMを伝播する
            # output_seqの出力形状：（バッチサイズ、シーケンス長、出力次元）
    #         output_seq, (h_n, c_n) = self.lstm(x, (h0, c0)) 
            output_seq, (h_n, c_n) = self.lstm(x, None)        
#             output_seq, h_n = self.lstm(x, None) # GRU    

            # 最後のタイムステップの隠れ状態をデコード
    #         out = self.fc(self.relu(h_n[-1]))
            out = self.fc(h_n[-1])
            return out

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    net = LstmClassifier(input_dim, batch_size, hidden_size, output_size, num_layers, is_bi).to(device)

    # モデルロード
    net.load_state_dict(torch.load(model_path))
    # 重み固定
    for i, param in enumerate(net.parameters()):
        # 最後の２層（全結合の重みとバイアス）以外固定
        if i <= len(net.state_dict())-3:
            param.requires_grad=False
    
    
    
    criterion = nn.CrossEntropyLoss()
    # optimizer = optim.SGD(net.parameters(), lr=10**-5, momentum=0.9, weight_decay=5e-4)
#     optimizer = optim.SGD(net.parameters(), lr=10**-5, momentum=0.9)
#     optimizer = optim.SGD(net.parameters(), lr=10**-5)
    optimizer = optim.Adam(net.parameters(), lr=10**-5)
    # optimizer = optim.Adam(net.parameters(), lr=100)
    # optimizer = torch.optim.RMSprop(net.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)



#     num_epochs = 500

    train_loss_list = []
    train_acc_list = []
    train_f_list = []
    val_loss_list = []
    val_acc_list = []
    val_f_list = []

    best_eval_acc = 0
    not_up_counter = 0 

    for epoch in range(num_epochs):
        train_loss = 0
        train_acc = 0
        train_f = 0    
        val_loss = 0
        val_acc = 0
        val_f = 0
        tmp_pred_list_by_subject = []
        tmp_label_list_by_subject = []  

        #train
        net.train()
        for i, (images, labels) in enumerate(train_loader):
            #view()での変換をしない
            images, labels = images.to(device, dtype=torch.float), labels.to(device)

            optimizer.zero_grad()
            outputs = net(images)
            loss = criterion(outputs, labels)
            train_loss += loss.item()
            train_acc += (outputs.max(1)[1] == labels).sum().item()
            train_f += f1_score(outputs.max(1)[1].cpu(), labels.cpu(), average='macro') * len(labels)
            #   print(outputs.max(1)[1])
            #   print(labels)
            loss.backward()
            optimizer.step()

        avg_train_loss = train_loss / len(train_loader.dataset)
        avg_train_acc = train_acc / len(train_loader.dataset)
        avg_train_f = train_f / len(train_loader.dataset)

        #val
        net.eval()
        with torch.no_grad():
          for images, labels in test_loader:
            #view()での変換をしない
            images = images.to(device, dtype=torch.float)
            labels = labels.to(device)
            outputs = net(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            val_acc += (outputs.max(1)[1] == labels).sum().item()
            val_f += f1_score(outputs.max(1)[1].cpu(), labels.cpu(), average='macro') * len(labels)

    #             if epoch == (num_epochs-1):
            tmp_pred_list_by_subject += [test_label_dict.get(x,x) for x in outputs.max(1)[1].tolist()]
            tmp_label_list_by_subject += [test_label_dict.get(x,x) for x in labels.tolist()]

        avg_val_loss = val_loss / len(test_loader.dataset)
        avg_val_acc = val_acc / len(test_loader.dataset)
        avg_val_f = val_f / len(test_loader.dataset)

        if (epoch % 10) == 0:
            print ('Epoch [{}/{}], Loss: {loss:.4f}, val_loss: {val_loss:.4f}, val_acc: {val_acc:.4f}, val_f: {val_f:.4f}' 
                        .format(epoch+1, num_epochs, i+1, loss=avg_train_loss, val_loss=avg_val_loss, val_acc=avg_val_acc, val_f=avg_val_f))

        train_loss_list.append(avg_train_loss)
        train_acc_list.append(avg_train_acc)
        train_f_list.append(avg_train_f)
        val_loss_list.append(avg_val_loss)
        val_acc_list.append(avg_val_acc)
        val_f_list.append(avg_val_f)

        if avg_val_acc > best_eval_acc:
            best_eval_acc = avg_val_acc
            pred_list_by_subject = copy.copy(tmp_pred_list_by_subject)
            label_list_by_subject = copy.copy(tmp_label_list_by_subject)
            not_up_counter = 0  
        else:
            not_up_counter += 1
        # if not_up_counter == 100:
        #     break

    #     scheduler.step()



    if best_eval_acc > pre_best_eval_acc:

        plt.figure()
        plt.plot(range(epoch+1), train_loss_list, color='blue', linestyle='-', label='train_loss')
        plt.plot(range(epoch+1), val_loss_list, color='green', linestyle='--', label='val_loss')
        plt.legend()
        plt.xlim(0, num_epochs)
        plt.xlabel('epoch')
        plt.ylabel('loss')
        plt.title('Training and validation loss')
        plt.grid()
        plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_loss.png")
        plt.close()

        plt.figure()
        plt.plot(range(epoch+1-100, epoch+1), train_loss_list[epoch+1-100:], color='blue', linestyle='-', label='train_loss')
        plt.plot(range(epoch+1-100, epoch+1), val_loss_list[epoch+1-100:], color='green', linestyle='--', label='val_loss')
        plt.legend()
        plt.xlabel('epoch')
        plt.ylabel('loss')
        plt.title('Training and validation loss (last 100 epock)')
        plt.grid()
        plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_miniloss.png")
        plt.close()

        plt.figure()
        plt.plot(range(epoch+1), train_f_list, color='blue', linestyle='-', label='train_f')
        plt.plot(range(epoch+1), val_f_list, color='green', linestyle='--', label='val_f')
        plt.legend()
        plt.xlim(0, num_epochs)
        plt.ylim(0, 1)
        plt.xlabel('epoch')
        plt.ylabel('f1-score')
        plt.title('Training and validation f1-score')
        plt.grid()
        plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_f1.png")
        plt.close()

        plt.figure()
        plt.plot(range(epoch+1), train_acc_list, color='blue', linestyle='-', label='train_acc')
        plt.plot(range(epoch+1), val_acc_list, color='green', linestyle='--', label='val_acc')
        plt.legend()
        plt.xlim(0, num_epochs)
        plt.ylim(0, 1)
        plt.xlabel('epoch')
        plt.ylabel('acc')
        plt.title('Training and validation accuracy')
        plt.grid()
        plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_acc.png")
        plt.close()




    # return pred_list_by_subject, label_list_by_subject, best_eval_acc
    return tmp_pred_list_by_subject, tmp_label_list_by_subject, avg_val_acc

## 事前学習

### CNN

In [17]:
def pipeline_cnn(train_list_df, test_list_df, DIR_OUT, count, pre_best_eval_acc, conv_size, num_epochs):

    over_sensors_num = 4
    image_size_height = 16+over_sensors_num
    image_size_width = len([col for col in test_list_df[0].columns if str(col).isdecimal()])

    # 画像に直す
    # dataframeに直してるので時間かかる
    def sensor_to_image(list_df):
        data_list_df = []
        label_list = []
        for i in tqdm(range(len(list_df[0]))):
            temp_list_df = []
            label_list.append(list_df[0].iloc[i]['Label'])

            for df in list_df:
                temp_list_df.append(pd.DataFrame(df.iloc[i]).T.drop(['Label', 'Trial', 'Label_Trial'], axis=1))

            data_list_df.append(pd.concat(temp_list_df, sort=False))
            
        # センサ順を一周するよう入れ替え、over_sensors_num分上のセンサを下に延長
        for i in range(len(list_df[0])):
            data_list_df[i] = data_list_df[i].reset_index(drop=True).reindex(index=[0, 1, 2,3,4,5,6,15,14,13,12,11,10,9,8,7]).reset_index(drop=True)
            data_list_df[i] = pd.concat([data_list_df[i], data_list_df[i].iloc[:over_sensors_num]]).reset_index(drop=True)
 
        return data_list_df, label_list

    train_image_list, train_label_list = sensor_to_image(train_list_df)
    test_image_list, test_label_list = sensor_to_image(test_list_df)


    def df_to_nd(image_list):
        for i in range(len(image_list)):
            image_list[i] = image_list[i].values

        image_list = np.array(image_list)

        return image_list
    
    train_image_list = df_to_nd(train_image_list)
    train_image_list = np.reshape(train_image_list, (len(train_list_df[0]), 1, image_size_height, image_size_width), order='F')

    test_image_list = df_to_nd(test_image_list)
    test_image_list = np.reshape(test_image_list, (len(test_list_df[0]), 1, image_size_height, image_size_width), order='F')



    def make_label(label_list):
        label_list = label_list.astype(object)
        label_dict = {}
        for i, label in enumerate(sorted(set(label_list), key=list(label_list).index)):
            label_dict[i] = label
            np.putmask(label_list, label_list == label, i)
        label_list = label_list.astype(int)

        return label_list, label_dict

    train_label_list, train_label_dict = make_label(np.array(train_label_list))
    test_label_list, test_label_dict = make_label(np.array(test_label_list))


    x_train_tensor = torch.from_numpy(train_image_list.astype(float))
    x_test_tensor = torch.from_numpy(test_image_list.astype(float))
    y_train_tensor = torch.from_numpy(train_label_list).to(dtype=torch.long)
    y_test_tensor = torch.from_numpy(test_label_list).to(dtype=torch.long)


    train_dataset = utils.TensorDataset(x_train_tensor,y_train_tensor)
    test_dataset = utils.TensorDataset(x_test_tensor,y_test_tensor)


    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=60, 
                                               shuffle=True,
                                              num_workers=0)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                               batch_size=60, 
                                               shuffle=False,
                                             num_workers=0)



    
    num_classes = len(test_list_df[0]['Label'].unique())

    class CNN(nn.Module):

        def __init__(self, num_classes, size_check, conv_size):
            super(CNN, self).__init__()
            self.features = nn.Sequential(
                nn.Conv2d(1, 512, kernel_size=(5, conv_size), padding=0),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=2, stride=2),
                # nn.MaxPool2d(kernel_size=(1,2), stride=2),
#                 nn.Dropout(0.2),
                
#                 nn.Conv2d(512, 1024, kernel_size=(3, 5), padding=0),
#                 nn.ReLU(inplace=True),
#                 nn.MaxPool2d(kernel_size=2, stride=2),
            )
            self.liniear_input_size = self.features(size_check).size()
            self.liniear_input_size = self.liniear_input_size[1]*self.liniear_input_size[2]*self.liniear_input_size[3]
            self.classifier = nn.Sequential(
                nn.Linear(self.liniear_input_size, num_classes),
    #             nn.Softmax(dim=1),
            )

        def forward(self, x):
            x = self.features(x)
            x = x.view(x.size(0), -1)
            x = self.classifier(x)
            return x

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    net = CNN(num_classes, torch.FloatTensor(60, 1, image_size_height, image_size_width), conv_size).to(device)

    criterion = nn.CrossEntropyLoss()
    # optimizer = optim.SGD(net.parameters(), lr=10**-5, momentum=0.9, weight_decay=5e-4)
    optimizer = optim.SGD(net.parameters(), lr=10**-6, momentum=0.9)
    # optimizer = optim.Adam(net.parameters(), lr=10**-3)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)






    train_loss_list = []
    train_acc_list = []
    train_f_list = []
    val_loss_list = []
    val_acc_list = []
    val_f_list = []
    
    best_eval_acc = 0
    not_up_counter = 0 

    for epoch in range(num_epochs):
        train_loss = 0
        train_acc = 0
        train_f = 0    
        val_loss = 0
        val_acc = 0
        val_f = 0
        tmp_pred_list_by_subject = []
        tmp_label_list_by_subject = []  
        
        #train
        net.train()
        for i, (images, labels) in enumerate(train_loader):
            #view()での変換をしない
            images, labels = images.to(device, dtype=torch.float), labels.to(device)

            optimizer.zero_grad()
            outputs = net(images)
            loss = criterion(outputs, labels)
            train_loss += loss.item()
            train_acc += (outputs.max(1)[1] == labels).sum().item()
            train_f += f1_score(outputs.max(1)[1].cpu(), labels.cpu(), average='macro') * len(labels)
            #   print(outputs.max(1)[1])
            #   print(labels)
            loss.backward()
            optimizer.step()

        avg_train_loss = train_loss / len(train_loader.dataset)
        avg_train_acc = train_acc / len(train_loader.dataset)
        avg_train_f = train_f / len(train_loader.dataset)

        #val
        net.eval()
        with torch.no_grad():
          for images, labels in test_loader:
            #view()での変換をしない
            images = images.to(device, dtype=torch.float)
            labels = labels.to(device)
            outputs = net(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            val_acc += (outputs.max(1)[1] == labels).sum().item()
            val_f += f1_score(outputs.max(1)[1].cpu(), labels.cpu(), average='macro') * len(labels)

#             if epoch == (num_epochs-1):
            tmp_pred_list_by_subject += [test_label_dict.get(x,x) for x in outputs.max(1)[1].tolist()]
            tmp_label_list_by_subject += [test_label_dict.get(x,x) for x in labels.tolist()]

        avg_val_loss = val_loss / len(test_loader.dataset)
        avg_val_acc = val_acc / len(test_loader.dataset)
        avg_val_f = val_f / len(test_loader.dataset)

        if (epoch % 10) == 0:
            print ('Epoch [{}/{}], Loss: {loss:.4f}, val_loss: {val_loss:.4f}, val_acc: {val_acc:.4f}, val_f: {val_f:.4f}' 
                        .format(epoch+1, num_epochs, i+1, loss=avg_train_loss, val_loss=avg_val_loss, val_acc=avg_val_acc, val_f=avg_val_f))
            
        train_loss_list.append(avg_train_loss)
        train_acc_list.append(avg_train_acc)
        train_f_list.append(avg_train_f)
        val_loss_list.append(avg_val_loss)
        val_acc_list.append(avg_val_acc)
        val_f_list.append(avg_val_f)
        
        if avg_val_acc > best_eval_acc:
            best_eval_acc = avg_val_acc
            pred_list_by_subject = copy.copy(tmp_pred_list_by_subject)
            label_list_by_subject = copy.copy(tmp_label_list_by_subject)
            not_up_counter = 0  
        else:
            not_up_counter += 1
        # if not_up_counter == 100:
        #     break

    #     scheduler.step()



    if best_eval_acc > pre_best_eval_acc:

        plt.figure()
        plt.plot(range(epoch+1), train_loss_list, color='blue', linestyle='-', label='train_loss')
        plt.plot(range(epoch+1), val_loss_list, color='green', linestyle='--', label='val_loss')
        plt.legend()
        plt.xlim(0, num_epochs)
        plt.xlabel('epoch')
        plt.ylabel('loss')
        plt.title('Training and validation loss')
        plt.grid()
        plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_loss.png")
        plt.close()

        plt.figure()
        plt.plot(range(epoch+1-100, epoch+1), train_loss_list[epoch+1-100:], color='blue', linestyle='-', label='train_loss')
        plt.plot(range(epoch+1-100, epoch+1), val_loss_list[epoch+1-100:], color='green', linestyle='--', label='val_loss')
        plt.legend()
        plt.xlabel('epoch')
        plt.ylabel('loss')
        plt.title('Training and validation loss (last 100 epock)')
        plt.grid()
        plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_miniloss.png")
        plt.close()

        plt.figure()
        plt.plot(range(epoch+1), train_f_list, color='blue', linestyle='-', label='train_f')
        plt.plot(range(epoch+1), val_f_list, color='green', linestyle='--', label='val_f')
        plt.legend()
        plt.xlim(0, num_epochs)
        plt.ylim(0, 1)
        plt.xlabel('epoch')
        plt.ylabel('f1-score')
        plt.title('Training and validation f1-score')
        plt.grid()
        plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_f1.png")
        plt.close()

        plt.figure()
        plt.plot(range(epoch+1), train_acc_list, color='blue', linestyle='-', label='train_acc')
        plt.plot(range(epoch+1), val_acc_list, color='green', linestyle='--', label='val_acc')
        plt.legend()
        plt.xlim(0, num_epochs)
        plt.ylim(0, 1)
        plt.xlabel('epoch')
        plt.ylabel('acc')
        plt.title('Training and validation accuracy')
        plt.grid()
        plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_acc.png")
        plt.close()




    # return pred_list_by_subject, label_list_by_subject, best_eval_acc
    return tmp_pred_list_by_subject, tmp_label_list_by_subject, avg_val_acc, net

### LSTM

In [21]:
def pipeline_lstm(train_list_df, test_list_df, DIR_OUT, count, pre_best_eval_acc, num_epochs, hidden_size, batch_size, num_layers, is_bi, mabiki_interval):
    
    num_classes = len(test_list_df[0]['Label'].unique())

#     hidden_size = 500 #lstmの出力次元

    input_dim = SENSORS_NUM
#     batch_size = 40 #バッチサイズ
    output_size = num_classes #fcの出力次元

    # 画像に直す
    # dataframeに直してるので時間かかる
    def sensor_to_image(list_df):
        data_list = []
        label_list = []
        for i in tqdm(range(len(list_df[0]))):
            time_series_list = []
            label_list.append(list_df[0].iloc[i]['Label'])

    #         for one_sensor_df in list_df:
    #             time_series_list.append(list(one_sensor_df.drop(['Label', 'Trial', 'Label_Trial'], axis=1).iloc[i]))
            # センサ順を直しながらデータ作成
            for j in [0, 1, 2,3,4,5,6,15,14,13,12,11,10,9,8,7]:
                one_sensor_df = list_df[j].copy()
                time_series_list.append(list(one_sensor_df.drop(['Label', 'Trial', 'Label_Trial'], axis=1).iloc[i]))

            data_list.append(np.array(time_series_list).T)       

        return np.array(data_list), np.array(label_list)

    train_image_list, train_label_list = sensor_to_image(train_list_df)
    test_image_list, test_label_list = sensor_to_image(test_list_df)



    def make_label(label_list):
        label_list = label_list.astype(object)
        label_dict = {}
        for i, label in enumerate(sorted(set(label_list), key=list(label_list).index)):
            label_dict[i] = label
            np.putmask(label_list, label_list == label, i)
        label_list = label_list.astype(int)

        return label_list, label_dict

    train_label_list, train_label_dict = make_label(np.array(train_label_list))
    test_label_list, test_label_dict = make_label(np.array(test_label_list))

    # 間引く
    train_image_list = train_image_list[:,::mabiki_interval,:]
    test_image_list = test_image_list[:,::mabiki_interval,:]

    x_train_tensor = torch.from_numpy(train_image_list.astype(float))
    x_test_tensor = torch.from_numpy(test_image_list.astype(float))
    y_train_tensor = torch.from_numpy(train_label_list).to(dtype=torch.long)
    y_test_tensor = torch.from_numpy(test_label_list).to(dtype=torch.long)
    print(x_test_tensor.size)
    
    train_dataset = utils.TensorDataset(x_train_tensor,y_train_tensor)
    test_dataset = utils.TensorDataset(x_test_tensor,y_test_tensor)


    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=batch_size, 
                                               shuffle=True,
                                              num_workers=0)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                               batch_size=batch_size, 
                                               shuffle=False,
                                             num_workers=0)

    class LstmClassifier(nn.Module):
        def __init__(self, input_dim, batch_size, hidden_size, output_size, num_layers, is_bi):
            super(LstmClassifier, self).__init__()
            self.input_dim = input_dim
            self.batch_size = batch_size
            self.hidden_size = hidden_size
            self.output_size = output_size
            self.num_layers = num_layers
            self.is_bi = is_bi

            self.lstm = nn.LSTM(input_dim, hidden_size, batch_first=True, num_layers=num_layers, bidirectional=is_bi) #batch_first=Trueにしてる
#             self.lstm = nn.GRU(input_dim, hidden_size, batch_first=True, num_layers=num_layers, bidirectional=is_bi) #batch_first=Trueにしてる
#         self.relu = nn.ReLU(inplace=True)
            self.fc = nn.Linear(hidden_size, output_size)

        def forward(self, x):
    #         x = self.embed(x)
            # 初期隠れ状態とセル状態を設定
            h0 = torch.zeros(1, self.batch_size, self.hidden_size).to(device)
            c0 = torch.zeros(1, self.batch_size, self.hidden_size).to(device)
            # LSTMを伝播する
            # output_seqの出力形状：（バッチサイズ、シーケンス長、出力次元）
    #         output_seq, (h_n, c_n) = self.lstm(x, (h0, c0)) 
            output_seq, (h_n, c_n) = self.lstm(x, None)        
#             output_seq, h_n = self.lstm(x, None) # GRU    

            # 最後のタイムステップの隠れ状態をデコード
    #         out = self.fc(self.relu(h_n[-1]))
            out = self.fc(h_n[-1])
            return out

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    net = LstmClassifier(input_dim, batch_size, hidden_size, output_size, num_layers, is_bi)
    net = net.to(device)

    criterion = nn.CrossEntropyLoss()
    # optimizer = optim.SGD(net.parameters(), lr=10**-5, momentum=0.9, weight_decay=5e-4)
#     optimizer = optim.SGD(net.parameters(), lr=10**-5, momentum=0.9)
#     optimizer = optim.SGD(net.parameters(), lr=10**-5)
    optimizer = optim.Adam(net.parameters(), lr=10**-5)
    # optimizer = optim.Adam(net.parameters(), lr=100)
    # optimizer = torch.optim.RMSprop(net.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)



#     num_epochs = 500

    train_loss_list = []
    train_acc_list = []
    train_f_list = []
    val_loss_list = []
    val_acc_list = []
    val_f_list = []

    best_eval_acc = 0
    not_up_counter = 0 

    for epoch in range(num_epochs):
        train_loss = 0
        train_acc = 0
        train_f = 0    
        val_loss = 0
        val_acc = 0
        val_f = 0
        tmp_pred_list_by_subject = []
        tmp_label_list_by_subject = []  

        #train
        net.train()
        for i, (images, labels) in enumerate(train_loader):
            #view()での変換をしない
            images, labels = images.to(device, dtype=torch.float), labels.to(device)

            optimizer.zero_grad()
            outputs = net(images)
            loss = criterion(outputs, labels)
            train_loss += loss.item()
            train_acc += (outputs.max(1)[1] == labels).sum().item()
            train_f += f1_score(outputs.max(1)[1].cpu(), labels.cpu(), average='macro') * len(labels)
            #   print(outputs.max(1)[1])
            #   print(labels)
            loss.backward()
            optimizer.step()

        avg_train_loss = train_loss / len(train_loader.dataset)
        avg_train_acc = train_acc / len(train_loader.dataset)
        avg_train_f = train_f / len(train_loader.dataset)

        #val
        net.eval()
        with torch.no_grad():
          for images, labels in test_loader:
            #view()での変換をしない
            images = images.to(device, dtype=torch.float)
            labels = labels.to(device)
            outputs = net(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            val_acc += (outputs.max(1)[1] == labels).sum().item()
            val_f += f1_score(outputs.max(1)[1].cpu(), labels.cpu(), average='macro') * len(labels)

    #             if epoch == (num_epochs-1):
            tmp_pred_list_by_subject += [test_label_dict.get(x,x) for x in outputs.max(1)[1].tolist()]
            tmp_label_list_by_subject += [test_label_dict.get(x,x) for x in labels.tolist()]

        avg_val_loss = val_loss / len(test_loader.dataset)
        avg_val_acc = val_acc / len(test_loader.dataset)
        avg_val_f = val_f / len(test_loader.dataset)

        if (epoch % 10) == 0:
            print ('Epoch [{}/{}], Loss: {loss:.4f}, val_loss: {val_loss:.4f}, val_acc: {val_acc:.4f}, val_f: {val_f:.4f}' 
                        .format(epoch+1, num_epochs, i+1, loss=avg_train_loss, val_loss=avg_val_loss, val_acc=avg_val_acc, val_f=avg_val_f))

        train_loss_list.append(avg_train_loss)
        train_acc_list.append(avg_train_acc)
        train_f_list.append(avg_train_f)
        val_loss_list.append(avg_val_loss)
        val_acc_list.append(avg_val_acc)
        val_f_list.append(avg_val_f)

        if avg_val_acc > best_eval_acc:
            best_eval_acc = avg_val_acc
            pred_list_by_subject = copy.copy(tmp_pred_list_by_subject)
            label_list_by_subject = copy.copy(tmp_label_list_by_subject)
            not_up_counter = 0  
        else:
            not_up_counter += 1
        # if not_up_counter == 100:
        #     break

    #     scheduler.step()



    if best_eval_acc > pre_best_eval_acc:

        plt.figure()
        plt.plot(range(epoch+1), train_loss_list, color='blue', linestyle='-', label='train_loss')
        plt.plot(range(epoch+1), val_loss_list, color='green', linestyle='--', label='val_loss')
        plt.legend()
        plt.xlim(0, num_epochs)
        plt.xlabel('epoch')
        plt.ylabel('loss')
        plt.title('Training and validation loss')
        plt.grid()
        plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_loss.png")
        plt.close()

        plt.figure()
        plt.plot(range(epoch+1-100, epoch+1), train_loss_list[epoch+1-100:], color='blue', linestyle='-', label='train_loss')
        plt.plot(range(epoch+1-100, epoch+1), val_loss_list[epoch+1-100:], color='green', linestyle='--', label='val_loss')
        plt.legend()
        plt.xlabel('epoch')
        plt.ylabel('loss')
        plt.title('Training and validation loss (last 100 epock)')
        plt.grid()
        plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_miniloss.png")
        plt.close()

        plt.figure()
        plt.plot(range(epoch+1), train_f_list, color='blue', linestyle='-', label='train_f')
        plt.plot(range(epoch+1), val_f_list, color='green', linestyle='--', label='val_f')
        plt.legend()
        plt.xlim(0, num_epochs)
        plt.ylim(0, 1)
        plt.xlabel('epoch')
        plt.ylabel('f1-score')
        plt.title('Training and validation f1-score')
        plt.grid()
        plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_f1.png")
        plt.close()

        plt.figure()
        plt.plot(range(epoch+1), train_acc_list, color='blue', linestyle='-', label='train_acc')
        plt.plot(range(epoch+1), val_acc_list, color='green', linestyle='--', label='val_acc')
        plt.legend()
        plt.xlim(0, num_epochs)
        plt.ylim(0, 1)
        plt.xlabel('epoch')
        plt.ylabel('acc')
        plt.title('Training and validation accuracy')
        plt.grid()
        plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_acc.png")
        plt.close()




    # return pred_list_by_subject, label_list_by_subject, best_eval_acc
    return tmp_pred_list_by_subject, tmp_label_list_by_subject, avg_val_acc, net

## make_input_data

In [19]:
def make_input_data(DATA_PATH, moving_average_size, metric):

    All_Files = natsorted(glob.glob('{}/**/*.csv'.format(DATA_PATH),recursive=True))
    del_no_list = [] # センサ番号昇順
    #7,9,10


    list_df = [0]*SENSORS_NUM
    list_list_df = [[] for col in range(SENSORS_NUM)]

    # with open("rand_indexs_multi_dir.pkl", "rb") as f:
    #     rand_indexs = pickle.load(f) #読み出し
    # if len(All_Files) != (len(rand_indexs)*10):
    #     print(rand_indexsの長さがおかしいです)
    #     sys.exit()

    # df最長長探索    
    df_len = []
    for file in tqdm(All_Files):
        df = pd.read_csv(file, header=0)
        df = df.rolling(window=moving_average_size, min_periods=1).mean().dropna(how='all')
        df_len.append(len(df))
        max_len = np.max(df_len)

    # ファイル名順でtrial順
    for i, file in enumerate(tqdm(All_Files)):
        file_name = os.path.basename(file)
    #     print(file_name)
        df = pd.read_csv(file, header=0)
    #             print(file_name)


        # 移動平均
        df = df.rolling(window=moving_average_size, min_periods=1).mean().dropna(how='all')
    #     df.drop(['day', 'time'], axis=1, inplace=True)
    #             df.to_csv(file.replace('.csv', '') + '_MA' + '.csv', encoding='utf-8', index=False)

        # ウィンドウ作成
        for col in range(1, SENSORS_NUM+1): # sec除くため

#             pad_width = (0, max_len-len(df))
            # 16ジェスチャのためパディングをしないよう設定
            pad_width = (0, 0)

            if metric == "eq":
                list_df[col-1] = pd.DataFrame(np.pad(df.iloc[:, col], pad_width, 'edge')).T
            elif metric == "dtw":
                list_df[col-1] = pd.DataFrame(np.pad(df.iloc[:, col], pad_width, 'constant', constant_values=np.nan)).T
            else:
                print(metricがおかしいです)
                sys.exit()


        for col in range(len(list_df)):
            # ファイル名からジェスチャ列追加
            splited_name = os.path.basename(file).split('_')

            if len(splited_name) == 2:
                gesture = splited_name[0]
            else:
                print('ファイル名がおかしい')
                sys.exit()
            list_df[col].insert(0, 'Label', gesture)

            # ファイル名からTrial数列追加
            trial_num = int(splited_name[-1].replace('T', '').replace('.csv', ''))
            list_df[col].insert(1, 'Trial', trial_num)

    #         group_id = rand_indexs[int(i/10)][trial_num-1]
    #         list_df[col].insert(1, 'Trial', group_id)

        #     # label_trial追加
        #     gesture_trial = gesture + '_' + str(trial_num)
        #     df.insert(2, 'Label_Trial', gesture_trial)

            # ファイル名追加
            list_df[col].insert(2, 'Label_Trial', os.path.basename(file).replace('.csv', ''))

        # df追加
        for col in range(len(list_list_df)):
            list_list_df[col].append(list_df[col])

    for col in range(len(list_list_df)):
        list_df[col] = pd.concat(list_list_df[col], sort=False)


    # センサ削除
    if len(del_no_list) > 0:
        del_no_list.reverse()

        for del_no in del_no_list:
            del list_df[del_no-1]



    # センサ番号入れる
    if feats_num == 40:

        adj1_list = list(range(1, SENSORS_NUM))+[SENSORS_NUM]
        adj2_list = list(range(2, SENSORS_NUM+1))+[1]
        diag1_list = list(range(1, int((SENSORS_NUM/2))+1))
        diag2_list = list(range(int(SENSORS_NUM/2)+1, SENSORS_NUM+1))

        if (len(adj1_list) != len(adj2_list)) or (len(diag1_list) != len(diag2_list)):
            print(計算用リストの長さがおかしいです)
            sys.exit() 

        column_0_num = list_df[0].columns.get_loc(0)
        # label_df = list_df[0].iloc[:,:column_0_num] # 時間かかればこっち使う
        for adj1, adj2 in zip(adj1_list, adj2_list):
            label_df = list_df[adj1-1].iloc[:,:column_0_num]
            df = pd.concat([label_df, list_df[adj1-1].iloc[:,column_0_num:] - list_df[adj2-1].iloc[:,column_0_num:]], axis=1)
            list_df.append(df)

        for diag1, diag2 in zip(diag1_list, diag2_list):
            label_df = list_df[diag1-1].iloc[:,:column_0_num]
            df = pd.concat([label_df, list_df[diag1-1].iloc[:,column_0_num:] - list_df[diag2-1].iloc[:,column_0_num:]], axis=1)
            list_df.append(df)



    if gesutures_num == 5:
        # ジェスチャ絞る
        for i, df in enumerate(list_df):
            list_df[i] = df[df['Label'].str.contains('G1S|G3S|G5S|G7S|G10S')]
    elif gesutures_num == 10:
        for i, df in enumerate(list_df):
            list_df[i] = df[df['Label'].str.contains('S')]
    elif gesutures_num == 'houkou16':
        for i, df in enumerate(list_df):
            df = df[~df['Label'].str.contains('9') & ~df['Label'].str.contains('10')]
            df = df.dropna(how='all', axis=1)
            list_df[i] = df



    # 前処理
    if prep != '生':
    #     for sensor_num in tqdm(range(1, SENSORS_NUM+1)):
        for sensor_num in tqdm(range(1, len(list_df)+1)):
            df = list_df[sensor_num-1]
            trial_list = df['Trial'].unique()
            scaled_feature_names = [col for col in df.columns if type(col) == int]

            df_list_by_trial = []
            for trial_num in trial_list:

                one_trial_df = df.query('Trial==@trial_num')

                scaled_features = one_trial_df.copy()
                features = scaled_features[scaled_feature_names]

                if prep == '標準化':
    #                             features = features - features.mean().mean()
                    features = (features - np.nanmean(features.values))/np.nanstd(features.values)

                elif prep == '最初0':
                    features = (features.T-features.iloc[:,0].values).T
                elif prep == '差':
                    first_df = features.T[::window_size-overlap_size].reset_index(drop=True)
                    last_df = features.T[window_size-1::window_size-overlap_size].reset_index(drop=True)
                    diff_df = (last_df - first_df).dropna(how='all')
                    features = diff_df.T
                else:
                    print('prepがおかしいです')
                    sys.exit()

    #             scaled_features[scaled_feature_names] = features
                scaled_features = pd.concat([scaled_features.drop(scaled_feature_names, axis=1), features], axis=1)
                df_list_by_trial.append(scaled_features)

            list_df[sensor_num-1] = pd.concat(df_list_by_trial, sort=False)

    return list_df

# 実行

## 事前学習

In [35]:
all_subs = ['isobe_split', 'yamashita_split', 'tabuchi_split', 'Sub.5_split', 
            'hirayama_split', 'nagasima_split', 'okamotomasa_split', 'okamotomarina_split', 
            'igarashi_split', 'yosida_split', 'watanabe_split', 'okuda_split', 
            'hotta_split', 'takayama_split']
# all_subs = ['Sub.5_split', 'hirayama_split']
subs_n = len(all_subs)
# INPUT_FOLDER_train_list = [[sub] for sub in ['takayama_split']*len(all_subs)]
INPUT_FOLDER_test_list = copy.copy(all_subs)

In [36]:
subs_num = 3

SEED = 0
fix_seed(SEED)
INPUT_FOLDER_train_list = []
for sub in INPUT_FOLDER_test_list:
    learn_subs = copy.copy(INPUT_FOLDER_test_list)
    learn_subs.remove(sub)
    
    INPUT_FOLDER_train_list.append(random.sample(learn_subs, subs_num))

In [37]:
%%time

SEED = 0
fix_seed(SEED)

test_trial_list = [8, 9, 10]
model_name = 'lstm'

# LSTM
num_epochs = 500
hidden_size = 512
batch_size = 60
num_layers = 3
is_bi = True
mabiki_interval = 8

# CNN
conv_size = 60
num_epochs = 300

# kNN
metric = "dtw" # "dtw" or "eq"


prep_list = ['生', '最初0', '標準化']
#     prep_list = ['生', '最初0', '差', '標準化']
gesutures_num_list = ['houkou16'] # 5or10or20
feats_num_list = [16]

# option_name = ''
option_name = '_'+str(conv_size)+'size' +'_'+str(num_epochs)+'epock'

# moving_average_size_list = [1, 5, 10, 15, 20]
moving_average_size = 10

window_size = 20
overlap_size = 19
for prep in prep_list:
    
#     option_name_top = '_事前'+str(len(INPUT_FOLDER_train_list[0])) +'_'+model_name +'_'+str(conv_size)+'size' +'_'+str(num_epochs)+'epock' +'_'+prep

    if model_name == 'lstm':
        option_name_top = '_事前'+str(len(INPUT_FOLDER_train_list[0])) +'_'+model_name +'_'+str(num_epochs)+'epock' +'_'+prep
    elif model_name == 'cnn':
        option_name_top = '_事前'+str(len(INPUT_FOLDER_train_list[0])) +'_'+model_name +'_'+str(conv_size)+'size' +'_'+str(num_epochs)+'epock' +'_'+prep  
    
    # prep_list = ['生', '最初0', '差', '標準化']
    # feats_num_list = [16, 40]
    # INPUT_FOLDER_list = ['nagasima_split', 'Sub.5_split', 'hirayama_split', 'tabuchi_split', 'okamotomasa_split', 'yosida_split', 'okuda_split', 'okamotomarina_split']



    # INPUT_FOLDER_list = ['nagasima_split', 'hirayama_split', 'tabuchi_split', 'okamotomasa_split', 'yosida_split', 'okuda_split', 'okamotomarina_split']

#     INPUT_FOLDER_train_list = [['Sub.5_split', 'hirayama_split', 'tabuchi_split', 
#                          'okamotomasa_split', 'yosida_split', 'okuda_split', 
#                          'okamotomarina_split', 'nagasima_split', 'isobe_split', 
#                         'igarashi_split', 'hotta_split', 'watanabe_split', 
#                          'takayama_split', 'yamashita_split']]
#     INPUT_FOLDER_train_list = [['Sub.5_split', 'hirayama_split', 'tabuchi_split', 
#                          'okamotomasa_split', 'yosida_split', 
#                           'nagasima_split', 'isobe_split', 
#                         'igarashi_split', 'hotta_split', 'watanabe_split', 
#                          'takayama_split']]
#     INPUT_FOLDER_train_list = [['isobe_split', 'hirayama_split', 'nagasima_split', 'okamotomasa_split']] #2Dlist
#     INPUT_FOLDER_train_list = [['isobe_split']] #2Dlist
#     INPUT_FOLDER_test_list = ['yamashita_split']

#     INPUT_FOLDER_train_list = [['Sub.5_split', 'hirayama_split', 'tabuchi_split', 
#                      'okamotomasa_split', 'yosida_split', 'okuda_split', 
#                      'yamashita_split', 'nagasima_split', 'yamashita_split', 
#                     'igarashi_split', 'hotta_split', 'watanabe_split', 
#                      'takayama_split']]
#     INPUT_FOLDER_train_list = [['yamashita_split', 'hirayama_split', 'nagasima_split', 'okamotomasa_split']] #2Dlist
#     INPUT_FOLDER_test_list = ['isobe_split']

#     nSplit = 10
    all_acc_list = []
    for INPUT_FOLDER_train, INPUT_FOLDER_test in tqdm(zip(INPUT_FOLDER_train_list, INPUT_FOLDER_test_list), total=len(INPUT_FOLDER_test_list)):
        for gesutures_num in gesutures_num_list:
#             for prep in prep_list:
            for feats_num in feats_num_list:

                train_subs_name = []
                for i, sub in enumerate(INPUT_FOLDER_train):
                    if i == 0:
                        DATA_PATH_train = '.' + "./data_split/" + sub + "/3. STEP3/3-1 条件(照明：普通、表情：無、振動：静止)、タスク：ジェスチャ"
                        train_list_df = make_input_data(DATA_PATH_train, moving_average_size, metric)
                    else:
                        DATA_PATH_train = '.' + "./data_split/" + sub + "/3. STEP3/3-1 条件(照明：普通、表情：無、振動：静止)、タスク：ジェスチャ"
                        train_list_df_temp = make_input_data(DATA_PATH_train, moving_average_size, metric)
                        for j in range(len(train_list_df)):
                            train_list_df[j] = pd.concat([train_list_df[j], train_list_df_temp[j]],0)
                    train_subs_name.append(sub.replace('_split', ''))
                train_subs_name = 'train-'+','.join(sorted(train_subs_name))


                DATA_PATH_test = '.' + "./data_split/" + INPUT_FOLDER_test + "/3. STEP3/3-1 条件(照明：普通、表情：無、振動：静止)、タスク：ジェスチャ"
                test_list_df = make_input_data(DATA_PATH_test, moving_average_size, metric)

                # 使うtrial数分抽出
                for i in range(SENSORS_NUM):
                    test_list_df[i] = test_list_df[i][test_list_df[i]['Trial'].isin(test_trial_list)]

                if model_name=='knn':
                    DIR_OUT = './' + 'ジェスチャ通常環境'+option_name_top + '/' + INPUT_FOLDER_test.replace('_split', '') + '_makemodel' +'/'+ train_subs_name + '/１普通状態_' + str(gesutures_num) + 'G_' + str(feats_num) + 'feats_' + prep + '_' + str(moving_average_size)+'sma' + '_'+model_name+metric
                else:
                    DIR_OUT = './' + 'ジェスチャ通常環境'+option_name_top + '/' + INPUT_FOLDER_test.replace('_split', '') + '_makemodel' +'/'+ train_subs_name + '/１普通状態_' + str(gesutures_num) + 'G_' + str(feats_num) + 'feats_' + prep + '_' + str(moving_average_size)+'sma' + '_'+model_name

                os.makedirs(DIR_OUT, exist_ok=True)


                df = train_list_df[0]
                class_num = len(set(df["Label"]))


                if model_name == 'knn':
                    def DTW(a, b):
                        return fastdtw(a, b)[0]

                #     clf = Pipeline([("scaler", StandardScaler()), ("svc", SVC())])
                #     clf = Pipeline([("scaler", StandardScaler()), ("knn", KNeighborsClassifier(n_neighbors=5, n_jobs=-1))])

                    if metric == "eq":
                        clf = KNeighborsClassifier(n_neighbors=7, n_jobs=-1)
                #         , weights='distance'
                    elif metric == "dtw":
                        clf = KNeighborsTimeSeriesClassifier(n_neighbors=7, metric="dtw", n_jobs=-1)
                #         , weights='distance'
                #         clf = KNeighborsClassifier(metric=DTW, n_neighbors=11, n_jobs=-1)
                    else:
                        print('metricがおかしいです')
                        sys.exit()

                #     param_grid = [{'knn__n_neighbors': [3]}]


                # kf = KFold(n_splits=nSplit, shuffle=False)
                # kf_grid = KFold(n_splits=nSplit_grid, shuffle=True)
                # kf_grid = KFold(n_splits=nSplit-1)

#                     kf = GroupKFold(n_splits=nSplit)
#                     kf_grid = GroupKFold(n_splits=nSplit-1)


                param_list = []
                accuracy_list = []
                test_df_list = []
                # テスト
#                     for count, (train_index, test_index) in enumerate(tqdm(kf.split(df.drop(['Label', 'Trial', 'Label_Trial'], axis=1), df['Label'], df['Trial']), total=nSplit)):                    
                count = 0

                if model_name != 'knn':
#                         # train, test分割   
#                         train_list_df = []
#                         test_list_df = []
#                         for df in list_df:
#                             train_list_df.append(df.iloc[train_index])
#                             test_list_df.append(df.iloc[test_index])

#                       # l_p_df用、エラー出さないため
                    test_df = test_list_df[0].copy()

#                     tmp_Y_pred, tmp_Y_test, tmp_best_eval_acc = pipeline(train_list_df, test_list_df, DIR_OUT, count)
                    pre_best_eval_acc = 0
                    best_eval_acc_list = []
                    for random_counter in range(1):
                        if model_name == 'cnn':
                            tmp_Y_pred, tmp_Y_test, tmp_best_eval_acc, net = pipeline_cnn(train_list_df, test_list_df, 
                                                                                 DIR_OUT, count, pre_best_eval_acc, 
                                                                                conv_size, num_epochs)
                        elif model_name == 'lstm':
                            tmp_Y_pred, tmp_Y_test, tmp_best_eval_acc, net = pipeline_lstm(train_list_df, test_list_df, 
                                                                                 DIR_OUT, count, pre_best_eval_acc, 
                                                                                num_epochs, hidden_size, batch_size, 
                                                                                num_layers, is_bi, mabiki_interval)
                            aaa
                        if tmp_best_eval_acc > pre_best_eval_acc:
                            best_eval_acc = tmp_best_eval_acc
                            Y_pred = copy.copy(tmp_Y_pred)
                            Y_test = copy.copy(tmp_Y_test)
                        best_eval_acc_list.append(tmp_best_eval_acc)
                        pre_best_eval_acc = tmp_best_eval_acc
                    print(best_eval_acc_list) 

                    test_df_list.append(test_df)            

                    model_path = model_name+option_name+'_'+prep+'_'+train_subs_name.replace('train-', '')+'.pth'
                    torch.save(net.state_dict(), model_path)

                elif model_name == 'knn':
                    Y_proba = np.zeros((len(test_list_df[0]), class_num))
                    for train_df, test_df in tqdm(zip(train_list_df, test_list_df), total=len(test_list_df)):
#                         train_df = df.iloc[train_index]
#                         test_df = df.iloc[test_index]
                        data_train = train_df.drop(['Label', 'Trial', 'Label_Trial'], axis=1)
                        label_train = train_df.loc[:, 'Label']
                        group_train = train_df.loc[:, 'Trial']
                        data_test = test_df.drop(['Label', 'Trial', 'Label_Trial'], axis=1)
                        label_test = test_df.loc[:, 'Label']

                        if model_name == 'knn':
                            clf.fit(data_train, label_train)
                #             Y_pred = clf.predict(data_test)
                            Y_proba += clf.predict_proba(data_test.values)

                    Y_pred = Y_proba.argmax(axis = 1).astype(object)
                    for class_i in range(class_num):
                        np.putmask(Y_pred, Y_pred == class_i, clf.classes_[class_i])
                    Y_test = label_test
                    test_df_list.append(test_df)



                warnings.filterwarnings('ignore')
                recall = recall_score(Y_test, Y_pred, average="weighted")
                precision = precision_score(Y_test, Y_pred, average="weighted")
                fMeasure = f1_score(Y_test, Y_pred, average="weighted")
                labels = natsorted(list(set(Y_test)))
                cmx_data = confusion_matrix(Y_test, Y_pred, labels=labels)
                df_cmx = pd.DataFrame(cmx_data, index=labels, columns=labels)
                report = classification_report(Y_test, Y_pred, target_names=labels, labels=labels, output_dict=True)
                df_report = pd.DataFrame(report)

                if count == 0:
                    mean_df_report = df_report
                    sum_df_cmx = df_cmx
                    all_Y_pred = Y_pred
                else:
                    mean_df_report += df_report
                    sum_df_cmx += df_cmx
                    all_Y_pred = np.append(all_Y_pred, Y_pred)

                os.makedirs(DIR_OUT, exist_ok=True)
#                     df_report.to_csv(DIR_OUT + "/" + "trial" + str(count+1) + ".csv", encoding='utf-8')
#                     df_cmx.to_csv(DIR_OUT + "/" + "trial" + str(count+1) + "_cmx.csv", encoding='utf-8')
                plt.figure(figsize=(10, 7))
            #     sns.set(font='MS Gothic') #ラベルが日本語の場合
                sns.heatmap(df_cmx, annot=True, cmap='Blues', fmt='g')
                #plt.show()
#                     plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_cmx.png", bbox_inches='tight')
                plt.close()
                #df_report.to_csv(DIR_OUT + "/" + "trial" + str(count+1) + ".csv", encoding='utf-8',mode='x')




                mean_df_report /= count+1
                mean_df_report.to_csv(DIR_OUT + "/" + "trialAll.csv", encoding='utf-8')

#                     with open(DIR_OUT + '/F1score.txt', mode='w') as f:
#                         s = str(mean_df_report.loc['f1-score', 'macro avg'])
#                         f.write(s)
#                     with open("./ジェスチャ通常環境" + '/' + 'ジェスチャ通常環境' + 'F1score.txt', mode='a') as f:
#                         s = DIR_OUT + '\n'
#                         s += str(mean_df_report.loc['f1-score', 'macro avg']) + '\n\n'
#                         f.write(s)
                with open(DIR_OUT + '/accuracy.txt', mode='w') as f:
                    s = str(mean_df_report.at['f1-score', 'accuracy'])
                    f.write(s)
                all_acc_list.append(mean_df_report.loc['f1-score', 'accuracy'])
                with open("./ジェスチャ通常環境"+option_name_top + '/' + 'ジェスチャ通常環境' + 'accuracy.txt', mode='a') as f:
                    s = DIR_OUT + '\n'
                    s += str(mean_df_report.at['f1-score', 'accuracy']) + '\n\n'
                    f.write(s)


#                     ss_table.at[train_subs_name.replace('train-', ''), INPUT_FOLDER_test.replace('_split', '')] = mean_df_report.at['f1-score', 'accuracy']

                # result_list.append(mean_df_report['macro avg'].loc['precision'])
                # result_list.append(mean_df_report['macro avg'].loc['recall'])
                # result_list.append(mean_df_report['macro avg'].loc['f1-score'])

                plt.figure(figsize=(10, 7))
                sns.heatmap(sum_df_cmx, annot=True, cmap='Blues', fmt='g')
                plt.savefig(DIR_OUT + "/" + "trialAll_cmx.png", bbox_inches='tight')
                plt.close()
                sum_df_cmx.to_csv(DIR_OUT + "/" + "trialAll_cmx.csv", encoding='utf-8')

#                     if model_name != 'knn':
#                         with open(DIR_OUT + '/param.txt', mode='w') as f:
#                             for i in range(nSplit):
#                                 s = "trial" +str(i+1) + '\n' + 'param : ' + str(param_list[i]) + '\n' + 'accuracy : ' + str(accuracy_list[i]) + '\n\n'
#                                 f.write(s)

                l_p_df = pd.concat(test_df_list, sort=False)
                label = l_p_df['Label']
                l_p_df = l_p_df.drop('Label', axis=1)
                l_p_df.insert(2, 'Label', label)
                l_p_df.insert(3, 'prediction_label', all_Y_pred)
                l_p_df.to_csv(DIR_OUT + "/" + "prediction.csv", encoding='utf-8')



    with open("./ジェスチャ通常環境"+option_name_top + '/' + 'ジェスチャ通常環境' + 'accuracy.txt', mode='a') as f:
        s = 'all_mean' + '\n'
        s += str(np.mean(all_acc_list)) + '\n\n'
        f.write(s)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=14.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=480.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=48.0), HTML(value='')))


<built-in method size of Tensor object at 0x000002168FEE0B40>
Epoch [1/300], Loss: 0.0462, val_loss: 0.0578, val_acc: 0.0417, val_f: 0.0050
Epoch [11/300], Loss: 0.0458, val_loss: 0.0578, val_acc: 0.0833, val_f: 0.0360
Epoch [21/300], Loss: 0.0443, val_loss: 0.0575, val_acc: 0.0625, val_f: 0.0121
Epoch [31/300], Loss: 0.0307, val_loss: 0.0487, val_acc: 0.2083, val_f: 0.0982
Epoch [41/300], Loss: 0.0215, val_loss: 0.0461, val_acc: 0.2083, val_f: 0.1223
Epoch [51/300], Loss: 0.0148, val_loss: 0.0503, val_acc: 0.1042, val_f: 0.0607
Epoch [61/300], Loss: 0.0110, val_loss: 0.0533, val_acc: 0.0833, val_f: 0.0290
Epoch [71/300], Loss: 0.0080, val_loss: 0.0536, val_acc: 0.1042, val_f: 0.0607
Epoch [81/300], Loss: 0.0072, val_loss: 0.0585, val_acc: 0.0833, val_f: 0.0280
Epoch [91/300], Loss: 0.0048, val_loss: 0.0565, val_acc: 0.0833, val_f: 0.0294
Epoch [101/300], Loss: 0.0040, val_loss: 0.0583, val_acc: 0.0833, val_f: 0.0290
Epoch [111/300], Loss: 0.0038, val_loss: 0.0597, val_acc: 0.0833, va

NameError: name 'aaa' is not defined

## ファインチューニング

In [17]:
all_subs = ['isobe_split', 'yamashita_split', 'tabuchi_split', 'Sub.5_split', 
            'hirayama_split', 'nagasima_split', 'okamotomasa_split', 'okamotomarina_split', 
            'igarashi_split', 'yosida_split', 'watanabe_split', 'okuda_split', 
            'hotta_split', 'takayama_split']
# all_subs = ['Sub.5_split', 'hirayama_split', 'isobe_split']
subs_n = len(all_subs)
INPUT_FOLDER_train_list = [[sub] for sub in all_subs]
INPUT_FOLDER_test_list = copy.copy(all_subs)

In [20]:
model_path_list = ['lstm_60batch_500epock_標準化4人.pth']

In [12]:
%%time

SEED = 0
fix_seed(SEED)

# train_trial_list = [1, 2,3,4,5,6,7]
train_trial_list = [1,2,3,4]
# train_trial_list = [1]
test_trial_list = [8, 9, 10]


model_name = 'cnn'

# LSTM
num_epochs = 500
hidden_size = 512
batch_size = 20
num_layers = 3
is_bi = True
mabiki_interval = 8

# CNN
conv_size = 60
num_epochs = 500

# kNN
metric = "dtw" # "dtw" or "eq"
n_neighbors = 1

# moving_average_size_list = [1, 5, 10, 15, 20]
moving_average_size_list = [10]

window_size = 20
overlap_size = 19
for moving_average_size in tqdm(moving_average_size_list):

    # prep_list = ['生', '最初0', '差', '標準化']
    # feats_num_list = [16, 40]
    # INPUT_FOLDER_list = ['nagasima_split', 'Sub.5_split', 'hirayama_split', 'tabuchi_split', 'okamotomasa_split', 'yosida_split', 'okuda_split', 'okamotomarina_split']

    prep_list = ['標準化']
#     prep_list = ['生', '最初0', '差', '標準化']
    gesutures_num_list = ['houkou16'] # 5or10or20
    feats_num_list = [16]

    # INPUT_FOLDER_list = ['nagasima_split', 'hirayama_split', 'tabuchi_split', 'okamotomasa_split', 'yosida_split', 'okuda_split', 'okamotomarina_split']

#     INPUT_FOLDER_train_list = [['Sub.5_split', 'hirayama_split', 'tabuchi_split', 
#                          'okamotomasa_split', 'yosida_split', 'okuda_split', 
#                          'okamotomarina_split', 'nagasima_split', 'isobe_split', 
#                         'igarashi_split', 'hotta_split', 'watanabe_split', 
#                          'takayama_split']]
#     INPUT_FOLDER_train_list = [['isobe_split', 'hirayama_split', 'nagasima_split', 'okamotomasa_split']] #2Dlist
#     INPUT_FOLDER_train_list = [['yamashita_split']] #2Dlist
#     INPUT_FOLDER_test_list = ['yamashita_split']

#     INPUT_FOLDER_train_list = [['Sub.5_split', 'hirayama_split', 'tabuchi_split', 
#                      'okamotomasa_split', 'yosida_split', 'okuda_split', 
#                      'yamashita_split', 'nagasima_split', 'yamashita_split', 
#                     'igarashi_split', 'hotta_split', 'watanabe_split', 
#                      'takayama_split']]
#     INPUT_FOLDER_train_list = [['okamotomarina_split']] #2Dlist
#     INPUT_FOLDER_test_list = ['okamotomarina_split']
#     INPUT_FOLDER_train_list = [['okuda_split']] #2Dlist
#     INPUT_FOLDER_test_list = ['okuda_split']   

#     nSplit = 10
    for INPUT_FOLDER_train, INPUT_FOLDER_test, model_path in tqdm(zip(INPUT_FOLDER_train_list, INPUT_FOLDER_test_list, model_path_list), total=len(INPUT_FOLDER_test_list):
        for gesutures_num in gesutures_num_list:
            for prep in prep_list:
                for feats_num in feats_num_list:

                    train_subs_name = []
                    for i, sub in enumerate(INPUT_FOLDER_train):
                        if i == 0:
                            DATA_PATH_train = '.' + "./data_split/" + sub + "/3. STEP3/3-1 条件(照明：普通、表情：無、振動：静止)、タスク：ジェスチャ"
                            train_list_df = make_input_data(DATA_PATH_train, moving_average_size, metric)
                        else:
                            DATA_PATH_train = '.' + "./data_split/" + sub + "/3. STEP3/3-1 条件(照明：普通、表情：無、振動：静止)、タスク：ジェスチャ"
                            train_list_df_temp = make_input_data(DATA_PATH_train, moving_average_size, metric)
                            for j in range(len(train_list_df)):
                                train_list_df[j] = pd.concat([train_list_df[j], train_list_df_temp[j]],0)
                        train_subs_name.append(sub.replace('_split', ''))
                    train_subs_name = 'train-'+','.join(sorted(train_subs_name))
                    

                    DATA_PATH_test = '.' + "./data_split/" + INPUT_FOLDER_test + "/3. STEP3/3-1 条件(照明：普通、表情：無、振動：静止)、タスク：ジェスチャ"
                    test_list_df = make_input_data(DATA_PATH_test, moving_average_size, metric)
                    
                    # 使うtrial数分抽出
                    for i in range(SENSORS_NUM):
                        train_list_df[i] = train_list_df[i][train_list_df[i]['Trial'].isin(train_trial_list)]
                        test_list_df[i] = test_list_df[i][test_list_df[i]['Trial'].isin(test_trial_list)]
                    
                    if model_name=='knn':
                        DIR_OUT = './' + 'ジェスチャ通常環境' + '/' + INPUT_FOLDER_test.replace('_split', '') +'/'+ train_subs_name + '/１普通状態_' + str(gesutures_num) + 'G_' + str(feats_num) + 'feats_' + prep + '_' + str(moving_average_size)+'sma' + '_'+model_name+metric
                    else:
                        DIR_OUT = './' + 'ジェスチャ通常環境' + '/' + INPUT_FOLDER_test.replace('_split', '') +'/'+ train_subs_name + '/１普通状態_' + str(gesutures_num) + 'G_' + str(feats_num) + 'feats_' + prep + '_' + str(moving_average_size)+'sma' + '_'+model_name

                    os.makedirs(DIR_OUT, exist_ok=True)
                    

                    df = train_list_df[0]
                    class_num = len(set(df["Label"]))

                    
                    if model_name == 'knn':
                        def DTW(a, b):
                            return fastdtw(a, b)[0]

                    #     clf = Pipeline([("scaler", StandardScaler()), ("svc", SVC())])
                    #     clf = Pipeline([("scaler", StandardScaler()), ("knn", KNeighborsClassifier(n_neighbors=5, n_jobs=-1))])

                        if metric == "eq":
                            clf = KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1)
                    #         , weights='distance'
                        elif metric == "dtw":
                            clf = KNeighborsTimeSeriesClassifier(n_neighbors=n_neighbors, metric="dtw", n_jobs=-1)
                    #         , weights='distance'
                    #         clf = KNeighborsClassifier(metric=DTW, n_neighbors=11, n_jobs=-1)
                        else:
                            print('metricがおかしいです')
                            sys.exit()

                    #     param_grid = [{'knn__n_neighbors': [3]}]


                    # kf = KFold(n_splits=nSplit, shuffle=False)
                    # kf_grid = KFold(n_splits=nSplit_grid, shuffle=True)
                    # kf_grid = KFold(n_splits=nSplit-1)

#                     kf = GroupKFold(n_splits=nSplit)
#                     kf_grid = GroupKFold(n_splits=nSplit-1)


                    param_list = []
                    accuracy_list = []
                    test_df_list = []
                    # テスト
#                     for count, (train_index, test_index) in enumerate(tqdm(kf.split(df.drop(['Label', 'Trial', 'Label_Trial'], axis=1), df['Label'], df['Trial']), total=nSplit)):                    
                    count = 0

                    if model_name != 'knn':
#                         # train, test分割   
#                         train_list_df = []
#                         test_list_df = []
#                         for df in list_df:
#                             train_list_df.append(df.iloc[train_index])
#                             test_list_df.append(df.iloc[test_index])

#                       # l_p_df用、エラー出さないため
                        test_df = test_list_df[0].copy()

    #                     tmp_Y_pred, tmp_Y_test, tmp_best_eval_acc = pipeline(train_list_df, test_list_df, DIR_OUT, count)
                        pre_best_eval_acc = 0
                        best_eval_acc_list = []
                        for random_counter in range(1):
                            if model_name == 'cnn':
                                tmp_Y_pred, tmp_Y_test, tmp_best_eval_acc = pipeline_cnn_finetuning(train_list_df, test_list_df, 
                                                                                     DIR_OUT, count, pre_best_eval_acc, 
                                                                                    conv_size, model_path, num_epochs)
                            elif model_name == 'lstm':
                                tmp_Y_pred, tmp_Y_test, tmp_best_eval_acc = pipeline_lstm_finetuning(train_list_df, test_list_df, 
                                                                                     DIR_OUT, count, pre_best_eval_acc, 
                                                                                    num_epochs, hidden_size, batch_size, 
                                                                                    num_layers, is_bi, mabiki_interval, model_path)
                            if tmp_best_eval_acc > pre_best_eval_acc:
                                best_eval_acc = tmp_best_eval_acc
                                Y_pred = copy.copy(tmp_Y_pred)
                                Y_test = copy.copy(tmp_Y_test)
                            best_eval_acc_list.append(tmp_best_eval_acc)
                            pre_best_eval_acc = tmp_best_eval_acc
                        print(best_eval_acc_list) 
                        
                        test_df_list.append(test_df)            

                    elif model_name == 'knn':
                        Y_proba = np.zeros((len(test_list_df[0]), class_num))
                        for train_df, test_df in tqdm(zip(train_list_df, test_list_df), total=len(test_list_df)):
    #                         train_df = df.iloc[train_index]
    #                         test_df = df.iloc[test_index]
                            data_train = train_df.drop(['Label', 'Trial', 'Label_Trial'], axis=1)
                            label_train = train_df.loc[:, 'Label']
                            group_train = train_df.loc[:, 'Trial']
                            data_test = test_df.drop(['Label', 'Trial', 'Label_Trial'], axis=1)
                            label_test = test_df.loc[:, 'Label']

                            if model_name == 'knn':
                                clf.fit(data_train, label_train)
                    #             Y_pred = clf.predict(data_test)
                                Y_proba += clf.predict_proba(data_test.values)

                        Y_pred = Y_proba.argmax(axis = 1).astype(object)
                        for class_i in range(class_num):
                            np.putmask(Y_pred, Y_pred == class_i, clf.classes_[class_i])
                        Y_test = label_test
                        test_df_list.append(test_df)
                    
                    

                    warnings.filterwarnings('ignore')
                    recall = recall_score(Y_test, Y_pred, average="weighted")
                    precision = precision_score(Y_test, Y_pred, average="weighted")
                    fMeasure = f1_score(Y_test, Y_pred, average="weighted")
                    labels = natsorted(list(set(Y_test)))
                    cmx_data = confusion_matrix(Y_test, Y_pred, labels=labels)
                    df_cmx = pd.DataFrame(cmx_data, index=labels, columns=labels)
                    report = classification_report(Y_test, Y_pred, target_names=labels, labels=labels, output_dict=True)
                    df_report = pd.DataFrame(report)

                    if count == 0:
                        mean_df_report = df_report
                        sum_df_cmx = df_cmx
                        all_Y_pred = Y_pred
                    else:
                        mean_df_report += df_report
                        sum_df_cmx += df_cmx
                        all_Y_pred = np.append(all_Y_pred, Y_pred)

                    os.makedirs(DIR_OUT, exist_ok=True)
#                     df_report.to_csv(DIR_OUT + "/" + "trial" + str(count+1) + ".csv", encoding='utf-8')
#                     df_cmx.to_csv(DIR_OUT + "/" + "trial" + str(count+1) + "_cmx.csv", encoding='utf-8')
                    plt.figure(figsize=(10, 7))
                #     sns.set(font='MS Gothic') #ラベルが日本語の場合
                    sns.heatmap(df_cmx, annot=True, cmap='Blues', fmt='g')
                    #plt.show()
#                     plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_cmx.png", bbox_inches='tight')
                    plt.close()
                    #df_report.to_csv(DIR_OUT + "/" + "trial" + str(count+1) + ".csv", encoding='utf-8',mode='x'                       
                        
                    mean_df_report /= count+1
                    mean_df_report.to_csv(DIR_OUT + "/" + "trialAll.csv", encoding='utf-8')

#                     with open(DIR_OUT + '/F1score.txt', mode='w') as f:
#                         s = str(mean_df_report.loc['f1-score', 'macro avg'])
#                         f.write(s)
#                     with open("./ジェスチャ通常環境" + '/' + 'ジェスチャ通常環境' + 'F1score.txt', mode='a') as f:
#                         s = DIR_OUT + '\n'
#                         s += str(mean_df_report.loc['f1-score', 'macro avg']) + '\n\n'
#                         f.write(s)
                    with open(DIR_OUT + '/accuracy.txt', mode='w') as f:
                        s = str(mean_df_report.loc['f1-score', 'accuracy'])
                        f.write(s)
                    with open("./ジェスチャ通常環境" + '/' + 'ジェスチャ通常環境' + 'accuracy.txt', mode='a') as f:
                        s = DIR_OUT + '\n'
                        s += str(mean_df_report.loc['f1-score', 'accuracy']) + '\n\n'
                        f.write(s)
                        
                    # result_list.append(mean_df_report['macro avg'].loc['precision'])
                    # result_list.append(mean_df_report['macro avg'].loc['recall'])
                    # result_list.append(mean_df_report['macro avg'].loc['f1-score'])

                    plt.figure(figsize=(10, 7))
                    sns.heatmap(sum_df_cmx, annot=True, cmap='Blues', fmt='g')
                    plt.savefig(DIR_OUT + "/" + "trialAll_cmx.png", bbox_inches='tight')
                    plt.close()
                    sum_df_cmx.to_csv(DIR_OUT + "/" + "trialAll_cmx.csv", encoding='utf-8')

#                     if model_name != 'knn':
#                         with open(DIR_OUT + '/param.txt', mode='w') as f:
#                             for i in range(nSplit):
#                                 s = "trial" +str(i+1) + '\n' + 'param : ' + str(param_list[i]) + '\n' + 'accuracy : ' + str(accuracy_list[i]) + '\n\n'
#                                 f.write(s)

                    l_p_df = pd.concat(test_df_list, sort=False)
                    label = l_p_df['Label']
                    l_p_df = l_p_df.drop('Label', axis=1)
                    l_p_df.insert(2, 'Label', label)
                    l_p_df.insert(3, 'prediction_label', all_Y_pred)
                    l_p_df.to_csv(DIR_OUT + "/" + "prediction.csv", encoding='utf-8')





HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=64.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=48.0), HTML(value='')))


Epoch [1/500], Loss: 0.0429, val_loss: 0.0302, val_acc: 0.4583, val_f: 0.3665
Epoch [11/500], Loss: 0.0448, val_loss: 0.0237, val_acc: 0.5208, val_f: 0.4445
Epoch [21/500], Loss: 0.0380, val_loss: 0.0202, val_acc: 0.6042, val_f: 0.5472
Epoch [31/500], Loss: 0.0368, val_loss: 0.0183, val_acc: 0.5625, val_f: 0.5091
Epoch [41/500], Loss: 0.0296, val_loss: 0.0163, val_acc: 0.5625, val_f: 0.5080
Epoch [51/500], Loss: 0.0200, val_loss: 0.0156, val_acc: 0.6667, val_f: 0.6200
Epoch [61/500], Loss: 0.0225, val_loss: 0.0153, val_acc: 0.7083, val_f: 0.6539
Epoch [71/500], Loss: 0.0214, val_loss: 0.0142, val_acc: 0.7083, val_f: 0.6539
Epoch [81/500], Loss: 0.0218, val_loss: 0.0135, val_acc: 0.7292, val_f: 0.6875
Epoch [91/500], Loss: 0.0193, val_loss: 0.0138, val_acc: 0.7292, val_f: 0.6885
Epoch [101/500], Loss: 0.0186, val_loss: 0.0128, val_acc: 0.7292, val_f: 0.6838
Epoch [111/500], Loss: 0.0133, val_loss: 0.0127, val_acc: 0.7292, val_f: 0.6885
Epoch [121/500], Loss: 0.0202, val_loss: 0.0123, v

## ファインチューニングなしで少量データ学習・予測

In [65]:
all_subs = ['isobe_split', 'yamashita_split', 'tabuchi_split', 'Sub.5_split', 
            'hirayama_split', 'nagasima_split', 'okamotomasa_split', 'okamotomarina_split', 
            'igarashi_split', 'yosida_split', 'watanabe_split', 'okuda_split', 
            'hotta_split', 'takayama_split']
# all_subs = ['Sub.5_split', 'hirayama_split']
subs_n = len(all_subs)
INPUT_FOLDER_train_list = [[sub] for sub in all_subs]
INPUT_FOLDER_test_list = copy.copy(all_subs)

In [30]:
%%time

SEED = 0
fix_seed(SEED)

# train_trial_list = [1, 2,3,4,5,6,7]
# train_trial_list = [1,2,3,4]
train_trial_list = [1]
test_trial_list = [8, 9, 10]

model_name = 'cnn'

# LSTM
num_epochs = 500
hidden_size = 512
batch_size = 20
num_layers = 3
is_bi = True
mabiki_interval = 8

# CNN
conv_size = 20
num_epochs = 300

# kNN
metric = "dtw" # "dtw" or "eq"
n_neighbors = 1

# moving_average_size_list = [1, 5, 10, 15, 20]
moving_average_size_list = [10]

prep_list = ['生', '最初0', '標準化']
#     prep_list = ['生', '最初0', '差', '標準化']
gesutures_num_list = ['houkou16'] # 5or10or20
feats_num_list = [16]
    


window_size = 20
overlap_size = 19
for prep in prep_list:

    if model_name == 'lstm':
        option_name_top = '_少データ'+str(len(train_trial_list)) +'_'+model_name +'_'+str(num_epochs)+'epock' +'_'+prep
    elif model_name == 'cnn':
        option_name_top = '_少データ'+str(len(train_trial_list)) +'_'+model_name +'_'+str(conv_size)+'size' +'_'+str(num_epochs)+'epock' +'_'+prep
    elif model_name == 'knn':
        option_name_top = '_少データ'+str(len(train_trial_list)) +'_'+model_name +'_'+str(n_neighbors)+'neighbor' +'_'+prep
    
    # prep_list = ['生', '最初0', '差', '標準化']
    # feats_num_list = [16, 40]
    # INPUT_FOLDER_list = ['nagasima_split', 'Sub.5_split', 'hirayama_split', 'tabuchi_split', 'okamotomasa_split', 'yosida_split', 'okuda_split', 'okamotomarina_split']


    # INPUT_FOLDER_list = ['nagasima_split', 'hirayama_split', 'tabuchi_split', 'okamotomasa_split', 'yosida_split', 'okuda_split', 'okamotomarina_split']

#     INPUT_FOLDER_train_list = [['Sub.5_split', 'hirayama_split', 'tabuchi_split', 
#                          'okamotomasa_split', 'yosida_split', 'okuda_split', 
#                          'okamotomarina_split', 'nagasima_split', 'isobe_split', 
#                         'igarashi_split', 'hotta_split', 'watanabe_split', 
#                          'takayama_split']]
#     INPUT_FOLDER_train_list = [['isobe_split', 'hirayama_split', 'nagasima_split', 'okamotomasa_split']] #2Dlist
#     INPUT_FOLDER_train_list = [['okuda_split']] #2Dlist
#     INPUT_FOLDER_test_list = ['okuda_split']
#     INPUT_FOLDER_train_list = [['okamotomarina_split']] #2Dlist
#     INPUT_FOLDER_test_list = ['okamotomarina_split']
#     INPUT_FOLDER_train_list = [['yamashita_split']] #2Dlist
#     INPUT_FOLDER_test_list = ['yamashita_split']

#     INPUT_FOLDER_train_list = [['Sub.5_split', 'hirayama_split', 'tabuchi_split', 
#                      'okamotomasa_split', 'yosida_split', 'okuda_split', 
#                      'yamashita_split', 'nagasima_split', 'yamashita_split', 
#                     'igarashi_split', 'hotta_split', 'watanabe_split', 
#                      'takayama_split']]
#     INPUT_FOLDER_train_list = [['yamashita_split', 'hirayama_split', 'nagasima_split', 'okamotomasa_split']] #2Dlist
#     INPUT_FOLDER_test_list = ['isobe_split']        

#     nSplit = 10
    all_acc_list = []
    for INPUT_FOLDER_train, INPUT_FOLDER_test in tqdm(zip(INPUT_FOLDER_train_list, INPUT_FOLDER_test_list), total=len(INPUT_FOLDER_test_list)):
        for gesutures_num in gesutures_num_list:
#             for prep in prep_list:
            for feats_num in feats_num_list:

                train_subs_name = []
                for i, sub in enumerate(INPUT_FOLDER_train):
                    if i == 0:
                        DATA_PATH_train = '.' + "./data_split/" + sub + "/3. STEP3/3-1 条件(照明：普通、表情：無、振動：静止)、タスク：ジェスチャ"
                        train_list_df = make_input_data(DATA_PATH_train, moving_average_size, metric)
                    else:
                        DATA_PATH_train = '.' + "./data_split/" + sub + "/3. STEP3/3-1 条件(照明：普通、表情：無、振動：静止)、タスク：ジェスチャ"
                        train_list_df_temp = make_input_data(DATA_PATH_train, moving_average_size, metric)
                        for j in range(len(train_list_df)):
                            train_list_df[j] = pd.concat([train_list_df[j], train_list_df_temp[j]],0)
                    train_subs_name.append(sub.replace('_split', ''))
                train_subs_name = 'train-'+','.join(sorted(train_subs_name))


                DATA_PATH_test = '.' + "./data_split/" + INPUT_FOLDER_test + "/3. STEP3/3-1 条件(照明：普通、表情：無、振動：静止)、タスク：ジェスチャ"
                test_list_df = make_input_data(DATA_PATH_test, moving_average_size, metric)

                # 使うtrial数分抽出
                for i in range(SENSORS_NUM):
                    train_list_df[i] = train_list_df[i][train_list_df[i]['Trial'].isin(train_trial_list)]
                    test_list_df[i] = test_list_df[i][test_list_df[i]['Trial'].isin(test_trial_list)]

                if model_name=='knn':
                    DIR_OUT = './' + 'ジェスチャ通常環境'+option_name_top + '/' + INPUT_FOLDER_test.replace('_split', '') +'/'+ train_subs_name + '/１普通状態_' + str(gesutures_num) + 'G_' + str(feats_num) + 'feats_' + prep + '_' + str(moving_average_size)+'sma' + '_'+model_name+metric
                else:
                    DIR_OUT = './' + 'ジェスチャ通常環境'+option_name_top + '/' + INPUT_FOLDER_test.replace('_split', '') +'/'+ train_subs_name + '/１普通状態_' + str(gesutures_num) + 'G_' + str(feats_num) + 'feats_' + prep + '_' + str(moving_average_size)+'sma' + '_'+model_name

                os.makedirs(DIR_OUT, exist_ok=True)


                df = train_list_df[0]
                class_num = len(set(df["Label"]))


                if model_name == 'knn':
                    def DTW(a, b):
                        return fastdtw(a, b)[0]

                #     clf = Pipeline([("scaler", StandardScaler()), ("svc", SVC())])
                #     clf = Pipeline([("scaler", StandardScaler()), ("knn", KNeighborsClassifier(n_neighbors=5, n_jobs=-1))])

                    if metric == "eq":
                        clf = KNeighborsClassifier(n_neighbors=n_neighbors, n_jobs=-1)
                #         , weights='distance'
                    elif metric == "dtw":
                        clf = KNeighborsTimeSeriesClassifier(n_neighbors=n_neighbors, metric="dtw", n_jobs=-1)
                #         , weights='distance'
                #         clf = KNeighborsClassifier(metric=DTW, n_neighbors=11, n_jobs=-1)
                    else:
                        print('metricがおかしいです')
                        sys.exit()

                #     param_grid = [{'knn__n_neighbors': [3]}]


                # kf = KFold(n_splits=nSplit, shuffle=False)
                # kf_grid = KFold(n_splits=nSplit_grid, shuffle=True)
                # kf_grid = KFold(n_splits=nSplit-1)

#                     kf = GroupKFold(n_splits=nSplit)
#                     kf_grid = GroupKFold(n_splits=nSplit-1)


                param_list = []
                accuracy_list = []
                test_df_list = []
                # テスト
#                     for count, (train_index, test_index) in enumerate(tqdm(kf.split(df.drop(['Label', 'Trial', 'Label_Trial'], axis=1), df['Label'], df['Trial']), total=nSplit)):                    
                count = 0

                if model_name != 'knn':
#                         # train, test分割   
#                         train_list_df = []
#                         test_list_df = []
#                         for df in list_df:
#                             train_list_df.append(df.iloc[train_index])
#                             test_list_df.append(df.iloc[test_index])

#                       # l_p_df用、エラー出さないため
                    test_df = test_list_df[0].copy()

#                     tmp_Y_pred, tmp_Y_test, tmp_best_eval_acc = pipeline(train_list_df, test_list_df, DIR_OUT, count)
                    pre_best_eval_acc = 0
                    best_eval_acc_list = []
                    for random_counter in range(1):
                        if model_name == 'cnn':
                            tmp_Y_pred, tmp_Y_test, tmp_best_eval_acc, _ = pipeline_cnn(train_list_df, test_list_df, 
                                                                                 DIR_OUT, count, pre_best_eval_acc, 
                                                                                conv_size, num_epochs)
                        elif model_name == 'lstm':
                            tmp_Y_pred, tmp_Y_test, tmp_best_eval_acc, _ = pipeline_lstm(train_list_df, test_list_df, 
                                                                                 DIR_OUT, count, pre_best_eval_acc, 
                                                                                num_epochs, hidden_size, batch_size, 
                                                                                num_layers, is_bi, mabiki_interval)
                        if tmp_best_eval_acc > pre_best_eval_acc:
                            best_eval_acc = tmp_best_eval_acc
                            Y_pred = copy.copy(tmp_Y_pred)
                            Y_test = copy.copy(tmp_Y_test)
                        best_eval_acc_list.append(tmp_best_eval_acc)
                        pre_best_eval_acc = tmp_best_eval_acc
                    print(best_eval_acc_list) 

                    test_df_list.append(test_df)            

                elif model_name == 'knn':
                    Y_proba = np.zeros((len(test_list_df[0]), class_num))
                    for train_df, test_df in tqdm(zip(train_list_df, test_list_df), total=len(test_list_df)):
#                         train_df = df.iloc[train_index]
#                         test_df = df.iloc[test_index]
                        data_train = train_df.drop(['Label', 'Trial', 'Label_Trial'], axis=1)
                        label_train = train_df.loc[:, 'Label']
                        group_train = train_df.loc[:, 'Trial']
                        data_test = test_df.drop(['Label', 'Trial', 'Label_Trial'], axis=1)
                        label_test = test_df.loc[:, 'Label']

                        if model_name == 'knn':
                            clf.fit(data_train, label_train)
                #             Y_pred = clf.predict(data_test)
                            Y_proba += clf.predict_proba(data_test.values)

                    Y_pred = Y_proba.argmax(axis = 1).astype(object)
                    for class_i in range(class_num):
                        np.putmask(Y_pred, Y_pred == class_i, clf.classes_[class_i])
                    Y_test = label_test
                    test_df_list.append(test_df)



                warnings.filterwarnings('ignore')
                recall = recall_score(Y_test, Y_pred, average="weighted")
                precision = precision_score(Y_test, Y_pred, average="weighted")
                fMeasure = f1_score(Y_test, Y_pred, average="weighted")
                labels = natsorted(list(set(Y_test)))
                cmx_data = confusion_matrix(Y_test, Y_pred, labels=labels)
                df_cmx = pd.DataFrame(cmx_data, index=labels, columns=labels)
                report = classification_report(Y_test, Y_pred, target_names=labels, labels=labels, output_dict=True)
                df_report = pd.DataFrame(report)

                if count == 0:
                    mean_df_report = df_report
                    sum_df_cmx = df_cmx
                    all_Y_pred = Y_pred
                else:
                    mean_df_report += df_report
                    sum_df_cmx += df_cmx
                    all_Y_pred = np.append(all_Y_pred, Y_pred)

                os.makedirs(DIR_OUT, exist_ok=True)
#                     df_report.to_csv(DIR_OUT + "/" + "trial" + str(count+1) + ".csv", encoding='utf-8')
#                     df_cmx.to_csv(DIR_OUT + "/" + "trial" + str(count+1) + "_cmx.csv", encoding='utf-8')
                plt.figure(figsize=(10, 7))
            #     sns.set(font='MS Gothic') #ラベルが日本語の場合
                sns.heatmap(df_cmx, annot=True, cmap='Blues', fmt='g')
                #plt.show()
#                     plt.savefig(DIR_OUT + "/" + "trial" + str(count+1) + "_cmx.png", bbox_inches='tight')
                plt.close()
                #df_report.to_csv(DIR_OUT + "/" + "trial" + str(count+1) + ".csv", encoding='utf-8',mode='x')




                mean_df_report /= count+1
                mean_df_report.to_csv(DIR_OUT + "/" + "trialAll.csv", encoding='utf-8')

#                     with open(DIR_OUT + '/F1score.txt', mode='w') as f:
#                         s = str(mean_df_report.loc['f1-score', 'macro avg'])
#                         f.write(s)
#                     with open("./ジェスチャ通常環境" + '/' + 'ジェスチャ通常環境' + 'F1score.txt', mode='a') as f:
#                         s = DIR_OUT + '\n'
#                         s += str(mean_df_report.loc['f1-score', 'macro avg']) + '\n\n'
#                         f.write(s)
                with open(DIR_OUT + '/accuracy.txt', mode='w') as f:
                    s = str(mean_df_report.loc['f1-score', 'accuracy'])
                    f.write(s)
                all_acc_list.append(mean_df_report.loc['f1-score', 'accuracy'])
                with open("./ジェスチャ通常環境"+option_name_top + '/' + 'ジェスチャ通常環境' + 'accuracy.txt', mode='a') as f:
                    s = DIR_OUT + '\n'
                    s += str(mean_df_report.loc['f1-score', 'accuracy']) + '\n\n'
                    f.write(s)

                # result_list.append(mean_df_report['macro avg'].loc['precision'])
                # result_list.append(mean_df_report['macro avg'].loc['recall'])
                # result_list.append(mean_df_report['macro avg'].loc['f1-score'])

                plt.figure(figsize=(10, 7))
                sns.heatmap(sum_df_cmx, annot=True, cmap='Blues', fmt='g')
                plt.savefig(DIR_OUT + "/" + "trialAll_cmx.png", bbox_inches='tight')
                plt.close()
                sum_df_cmx.to_csv(DIR_OUT + "/" + "trialAll_cmx.csv", encoding='utf-8')

#                     if model_name != 'knn':
#                         with open(DIR_OUT + '/param.txt', mode='w') as f:
#                             for i in range(nSplit):
#                                 s = "trial" +str(i+1) + '\n' + 'param : ' + str(param_list[i]) + '\n' + 'accuracy : ' + str(accuracy_list[i]) + '\n\n'
#                                 f.write(s)

                l_p_df = pd.concat(test_df_list, sort=False)
                label = l_p_df['Label']
                l_p_df = l_p_df.drop('Label', axis=1)
                l_p_df.insert(2, 'Label', label)
                l_p_df.insert(3, 'prediction_label', all_Y_pred)
                l_p_df.to_csv(DIR_OUT + "/" + "prediction.csv", encoding='utf-8')



    with open("./ジェスチャ通常環境"+option_name_top + '/' + 'ジェスチャ通常環境' + 'accuracy.txt', mode='a') as f:
        s = 'all_mean' + '\n'
        s += str(np.mean(all_acc_list)) + '\n\n'
        f.write(s)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=48.0), HTML(value='')))


Epoch [1/300], Loss: 21.1839, val_loss: 35.2780, val_acc: 0.0625, val_f: 0.0074
Epoch [11/300], Loss: 1916.8169, val_loss: 484.8145, val_acc: 0.1042, val_f: 0.0577
Epoch [21/300], Loss: 6.8810, val_loss: 1.8005, val_acc: 0.1667, val_f: 0.0885
Epoch [31/300], Loss: 0.5523, val_loss: 0.1574, val_acc: 0.2083, val_f: 0.1453
Epoch [41/300], Loss: 0.1054, val_loss: 0.0487, val_acc: 0.3125, val_f: 0.2113
Epoch [51/300], Loss: 0.0957, val_loss: 0.0480, val_acc: 0.2917, val_f: 0.2165
Epoch [61/300], Loss: 0.0807, val_loss: 0.0451, val_acc: 0.3542, val_f: 0.2765
Epoch [71/300], Loss: 0.0648, val_loss: 0.0423, val_acc: 0.4167, val_f: 0.3244
Epoch [81/300], Loss: 0.0508, val_loss: 0.0406, val_acc: 0.6042, val_f: 0.5403
Epoch [91/300], Loss: 0.0409, val_loss: 0.0401, val_acc: 0.6042, val_f: 0.5287
Epoch [101/300], Loss: 0.0336, val_loss: 0.0404, val_acc: 0.5833, val_f: 0.5287
Epoch [111/300], Loss: 0.0288, val_loss: 0.0408, val_acc: 0.5625, val_f: 0.5003
Epoch [121/300], Loss: 0.0253, val_loss: 0.

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=48.0), HTML(value='')))


Epoch [1/300], Loss: 29.2994, val_loss: 49.5829, val_acc: 0.0625, val_f: 0.0074
Epoch [11/300], Loss: 941.6346, val_loss: 49.8969, val_acc: 0.0625, val_f: 0.0074
Epoch [21/300], Loss: 0.2076, val_loss: 0.0639, val_acc: 0.1667, val_f: 0.0762
Epoch [31/300], Loss: 0.0913, val_loss: 0.0456, val_acc: 0.1667, val_f: 0.1023
Epoch [41/300], Loss: 0.0666, val_loss: 0.0400, val_acc: 0.2917, val_f: 0.1963
Epoch [51/300], Loss: 0.0504, val_loss: 0.0379, val_acc: 0.3125, val_f: 0.1923
Epoch [61/300], Loss: 0.0369, val_loss: 0.0363, val_acc: 0.3750, val_f: 0.2821
Epoch [71/300], Loss: 0.0297, val_loss: 0.0368, val_acc: 0.3333, val_f: 0.2321
Epoch [81/300], Loss: 0.0250, val_loss: 0.0369, val_acc: 0.3542, val_f: 0.2508
Epoch [91/300], Loss: 0.0214, val_loss: 0.0375, val_acc: 0.3542, val_f: 0.2508
Epoch [101/300], Loss: 0.0186, val_loss: 0.0382, val_acc: 0.3958, val_f: 0.3071
Epoch [111/300], Loss: 0.0165, val_loss: 0.0390, val_acc: 0.3958, val_f: 0.3029
Epoch [121/300], Loss: 0.0147, val_loss: 0.03

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=48.0), HTML(value='')))


Epoch [1/300], Loss: 1.1381, val_loss: 0.3162, val_acc: 0.0625, val_f: 0.0197
Epoch [11/300], Loss: 0.1046, val_loss: 0.7508, val_acc: 0.2708, val_f: 0.1946
Epoch [21/300], Loss: 0.0014, val_loss: 1.1441, val_acc: 0.3333, val_f: 0.2601
Epoch [31/300], Loss: 0.0002, val_loss: 1.0947, val_acc: 0.3958, val_f: 0.2988
Epoch [41/300], Loss: 0.0001, val_loss: 1.0582, val_acc: 0.3750, val_f: 0.2850
Epoch [51/300], Loss: 0.0000, val_loss: 1.1348, val_acc: 0.3542, val_f: 0.2586
Epoch [61/300], Loss: 0.0000, val_loss: 1.1661, val_acc: 0.3542, val_f: 0.2586
Epoch [71/300], Loss: 0.0000, val_loss: 1.1774, val_acc: 0.3542, val_f: 0.2586
Epoch [81/300], Loss: 0.0000, val_loss: 1.1816, val_acc: 0.3542, val_f: 0.2586
Epoch [91/300], Loss: 0.0000, val_loss: 1.1833, val_acc: 0.3542, val_f: 0.2586
Epoch [101/300], Loss: 0.0000, val_loss: 1.1840, val_acc: 0.3542, val_f: 0.2586
Epoch [111/300], Loss: 0.0000, val_loss: 1.1843, val_acc: 0.3542, val_f: 0.2586
Epoch [121/300], Loss: 0.0000, val_loss: 1.1844, v

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=48.0), HTML(value='')))


Epoch [1/300], Loss: 0.8349, val_loss: 0.2143, val_acc: 0.1042, val_f: 0.0731
Epoch [11/300], Loss: 0.0014, val_loss: 0.3892, val_acc: 0.3333, val_f: 0.2406
Epoch [21/300], Loss: 0.0001, val_loss: 0.5230, val_acc: 0.3958, val_f: 0.3018
Epoch [31/300], Loss: 0.0001, val_loss: 0.5854, val_acc: 0.4167, val_f: 0.3191
Epoch [41/300], Loss: 0.0000, val_loss: 0.6074, val_acc: 0.4167, val_f: 0.3191
Epoch [51/300], Loss: 0.0000, val_loss: 0.6146, val_acc: 0.4375, val_f: 0.3330
Epoch [61/300], Loss: 0.0000, val_loss: 0.6170, val_acc: 0.4375, val_f: 0.3330
Epoch [71/300], Loss: 0.0000, val_loss: 0.6177, val_acc: 0.4375, val_f: 0.3330
Epoch [81/300], Loss: 0.0000, val_loss: 0.6179, val_acc: 0.4375, val_f: 0.3330
Epoch [91/300], Loss: 0.0000, val_loss: 0.6179, val_acc: 0.4375, val_f: 0.3330
Epoch [101/300], Loss: 0.0000, val_loss: 0.6178, val_acc: 0.4375, val_f: 0.3330
Epoch [111/300], Loss: 0.0000, val_loss: 0.6177, val_acc: 0.4375, val_f: 0.3330
Epoch [121/300], Loss: 0.0000, val_loss: 0.6176, v

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=48.0), HTML(value='')))


Epoch [1/300], Loss: 0.1769, val_loss: 0.0581, val_acc: 0.0417, val_f: 0.0076
Epoch [11/300], Loss: 0.1679, val_loss: 0.0556, val_acc: 0.1250, val_f: 0.0437
Epoch [21/300], Loss: 0.1531, val_loss: 0.0519, val_acc: 0.1875, val_f: 0.0688
Epoch [31/300], Loss: 0.1389, val_loss: 0.0485, val_acc: 0.2708, val_f: 0.1529
Epoch [41/300], Loss: 0.1263, val_loss: 0.0455, val_acc: 0.2917, val_f: 0.1733
Epoch [51/300], Loss: 0.1155, val_loss: 0.0430, val_acc: 0.3958, val_f: 0.2750
Epoch [61/300], Loss: 0.1063, val_loss: 0.0409, val_acc: 0.5000, val_f: 0.3650
Epoch [71/300], Loss: 0.0985, val_loss: 0.0391, val_acc: 0.5000, val_f: 0.3688
Epoch [81/300], Loss: 0.0916, val_loss: 0.0376, val_acc: 0.5417, val_f: 0.4250
Epoch [91/300], Loss: 0.0857, val_loss: 0.0363, val_acc: 0.5625, val_f: 0.4417
Epoch [101/300], Loss: 0.0804, val_loss: 0.0351, val_acc: 0.5625, val_f: 0.4417
Epoch [111/300], Loss: 0.0758, val_loss: 0.0341, val_acc: 0.5625, val_f: 0.4417
Epoch [121/300], Loss: 0.0716, val_loss: 0.0332, v

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=16.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=48.0), HTML(value='')))


Epoch [1/300], Loss: 0.1771, val_loss: 0.0594, val_acc: 0.1250, val_f: 0.0504
Epoch [11/300], Loss: 0.1686, val_loss: 0.0569, val_acc: 0.1458, val_f: 0.0744
Epoch [21/300], Loss: 0.1540, val_loss: 0.0531, val_acc: 0.1875, val_f: 0.0835
Epoch [31/300], Loss: 0.1399, val_loss: 0.0496, val_acc: 0.1875, val_f: 0.0902
Epoch [41/300], Loss: 0.1281, val_loss: 0.0467, val_acc: 0.3333, val_f: 0.2158
Epoch [51/300], Loss: 0.1185, val_loss: 0.0444, val_acc: 0.3542, val_f: 0.2290
Epoch [61/300], Loss: 0.1106, val_loss: 0.0425, val_acc: 0.3542, val_f: 0.2233
Epoch [71/300], Loss: 0.1040, val_loss: 0.0411, val_acc: 0.3750, val_f: 0.2742
Epoch [81/300], Loss: 0.0983, val_loss: 0.0398, val_acc: 0.4792, val_f: 0.4085
Epoch [91/300], Loss: 0.0933, val_loss: 0.0388, val_acc: 0.5000, val_f: 0.4205
Epoch [101/300], Loss: 0.0888, val_loss: 0.0378, val_acc: 0.5000, val_f: 0.4039
Epoch [111/300], Loss: 0.0848, val_loss: 0.0370, val_acc: 0.5000, val_f: 0.4018
Epoch [121/300], Loss: 0.0812, val_loss: 0.0363, v