In [7]:
import sys 
sys.path.append('../')
import os
import glob
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset
import torch.optim as optimizers
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torchvision import datasets
import torchvision.transforms as transforms 
from utils.callbacks import EarlyStopping
from sklearn.metrics import classification_report, accuracy_score


class BiRNN(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__() 
        self.l1 = nn.LSTM(1463, hidden_dim, batch_first=True, bidirectional=True)
        self.l2 = nn.LSTM(hidden_dim*2, hidden_dim, batch_first=True, bidirectional=True)
        self.l3 = nn.Linear(hidden_dim*2, 2) 

        nn.init.xavier_normal_(self.l1.weight_ih_l0) 
        nn.init.orthogonal_(self.l1.weight_hh_l0)
        nn.init.xavier_normal_(self.l2.weight_ih_l0) 
        nn.init.orthogonal_(self.l2.weight_hh_l0)
         
        nn.init.xavier_normal_(self.l3.weight) 

    def forward(self, x):
        h, _ = self.l1(x) 
        h, _ = self.l2(h) 
        y = self.l3(h)
        y = F.softmax(y, dim=1)
        return y

def clean(df):
    #共通で不要
    col = ['start(exchange)[ms]', 'end(system)[ms]', 'end(exchange)[ms]',\
    'kinectstart(exchange)[ms]', 'kinectend(system)[ms]',\
    'kinectend(exchange)[ms]', 'SS_ternary', 'TC_ternary', 'TS_ternary', 'SS',\
    'TC1', 'TC2', 'TC3', 'TC4', 'TC5', 'TS1', 'TS2', 'TS3', 'TS4', 'TS5',
    ]

    res = []
    
    for data in df:
        res.append(torch.from_numpy(data.drop(col, axis=1).values))

    return res

def louocv(files, testfile):

    train_df = []
    test_df = []


    for file in files:
        if file == testfile:
            test_df.append(pd.read_csv(file))
        else:
            train_df.append(pd.read_csv(file))

    return train_df, test_df

def make_target(df):
    res = [] 

    for data in df:
        tmp = data.loc[:, 'TS1':'TS5'].sum(axis=1) > 20
        res.append(tmp.astype('int'))
    return res

def make_scaler(datas):
    scaler = StandardScaler() 
    # scaler.fit(torch.cat(datas, axis=1))
    data = torch.cat(datas)
    scaler.fit(data)
    return scaler 

def standardize(datas, scaler):
    res = []
    for data in datas:
        res.append(scaler.transform(data).tolist())
    return res


In [12]:
FILE_PATH = "../data/dumpfiles/*.csv"
files = glob.glob(FILE_PATH)

test_preds = []
test_ans = []

i = 0

for testfile in files:
    i += 1
    print(f'{i}人目')
    train_df, test_df = louocv(files, testfile)

    train_data = clean(train_df)
    train_target = make_target(train_df)


    test_data = clean(test_df)
    test_target = make_target(test_df)
    test_ans = test_ans + test_target[0].values.tolist()

    x_train, x_valid, y_train, y_valid = train_test_split(train_data, train_target, shuffle=True, train_size=24)

    scaler = make_scaler(x_train)
    x_train = standardize(x_train, scaler) 
    x_valid = standardize(x_valid, scaler) 
    test_data = standardize(test_data, scaler)

    model = BiRNN(200) 


    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), amsgrad=True)

    def compute_loss(t, y):
        print(t.shape)
        print(t)
        print(y.shape)
        print(y)
        return criterion(y, t)

    def train_step(x, t):
        model.train()
        preds = model(x)
        print(t)
        loss = compute_loss(t, preds)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        return loss, preds

    def val_step(x, t):
        model.eval()
        preds = model(x)
        loss = criterion(preds, t)

        return loss, preds

    # エポック数
    epochs = 200
    hist = {'loss': [], 'accuracy': [],
            'val_loss': [], 'val_accuracy': []}
    es = EarlyStopping(patience=5, verbose=1)

    for epoch in range(epochs):
        train_loss = 0. 
        train_acc = 0. 
        val_loss = 0. 
        val_acc = 0. 

        for (x, t) in zip(x_train, y_train):
            x = torch.tensor(x)
            t = torch.tensor(t)
            loss, preds = train_step(x, t) 
            train_loss += loss.item() 
            train_acc += accuracy_score(t.tolist(), preds.argmax(dim=-1).tolist()) 

        train_loss /= len(x_train) 
        train_acc /= len(x_train) 

        for (x, t) in zip(x_valid, y_valid):
            x = torch.tensor(x) 
            t = torch.tensor(t)
            loss, preds = val_step(x, t) 
            val_loss += loss.item() 
            val_acc += accuracy_score(t.tolist(), preds.argmax(dim=-1).tolist()) 

        val_loss /= len(x_valid) 
        val_acc /= len(x_valid)

        hist['loss'].append(train_loss) 
        hist['accuracy'].append(train_acc) 
        hist['val_loss'].append(val_loss) 
        hist['val_accuracy'].append(val_acc) 

        # print('epoch: {}, loss: {:.3}, acc: {:.3f}, val_loss: {:.3}, val_acc: {:.3f}'.format(epoch+1, train_loss, train_acc, val_loss, val_acc))

        if es(val_loss):
            break
    
    # モデルの評価
    loss = hist['loss'] 
    val_loss = hist['val_loss'] 

    fig = plt.figure() 
    plt.rc('font', family='serif') 
    plt.plot(range(len(loss)), loss, color='gray', linewidth=1, label='loss') 
    plt.plot(range(len(val_loss)), val_loss, color='black', linewidth=1, label='val_loss') 
    plt.xlabel('epoch') 
    plt.ylabel('loss') 
    plt.legend() 
    plt.show() 

    # テストデータの評価
    def test_step(x, t):
        return val_step(x, t) 

    test_loss = 0.
    test_acc = 0. 

    for (x, t) in zip(test_data, test_target):
        x = torch.tensor(x) 
        t = torch.tensor(t)
        loss, preds = test_step(x, t) 
        test_loss += loss.item() 
        test_acc += accuracy_score(t.tolist(), preds.argmax(dim=-1).tolist()) 
        test_preds += preds.argmax(dim=-1).tolist()
    
    print('test_loss: {:.3f}, test_acc: {:.3f}'.format(test_loss, test_acc))

print(classification_report(test_ans, test_preds))
print('accuracy: ', accuracy_score(test_ans, test_preds))

1人目
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0])
torch.Size([68])
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
        1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0])
torch.Size([68, 2])
tensor([[0.4945, 0.5055],
        [0.4875, 0.5125],
        [0.4868, 0.5132],
        [0.5116, 0.4884],
        [0.4755, 0.5245],
        [0.4412, 0.5588],
        [0.4899, 0.5101],
        [0.5130, 0.4870],
        [0.5035, 0.4965],
        [0.5152, 0.4848],
        [0.5265, 0.4735],
        [0.5149, 0.4851],
        [0.5246, 0.4754],
        [0.5107, 0.4893],
        [0.4878, 0.5122],
        [0.5126, 0.4874],
        [0.4912, 0.5088],
        [0.4801, 0.5199],
        [0.4735, 0.5265],
       

KeyboardInterrupt: 

In [None]:
import glob
import sys 
import numpy as np
sys.path.append('../')
from utils.dataset import Hazumi1902

FILE_PATH = '../data/dumpfiles/*.csv' 
files = glob.glob(FILE_PATH) 
hazumi = Hazumi1902(files) 

for testfile in files:
    x_train, x_test, t_train, t_test = hazumi.load_data(testfile=testfile)
    tmp = np.array(x_train)
    print(tmp[1])
    print(tmp.shape)
    break

[[176000.0, 178854.0, 182556.0, 176432.0, 179286.0, 182988.0, 1.0, 1.0, 2.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 6.0, 4.0, 0.01083376, 1.05e-05, 0.01082324, 0.0, 296.0, 0.000315418, -3.55e-06, 0.000962592, 1.14e-06, 0.001129406, 6.641697, 52.34112, -3.478232, -27.16053, 23.6823, 5.0, 155.0, -11.98343, -0.01262694, -9.679012, 15.45622, 4.151629, -0.9982084, 5.664769, 9.706817, -24.62845, 34.33527, 154.0, 0.0, -6.650698, 0.01129324, -8.711713, 24.39712, 5.081418, -0.2293322, 6.127852, 5.15893, -24.17351, 29.33244, 83.0, 183.0, -6.494698, 0.004478175, -7.311965, 25.46677, 5.068593, -1.004895, 4.394867, 4.21364, -22.87647, 27.09011, 179.0, 62.0, -10.25804, 0.002809222, -10.77072, 15.75249, 3.980023, -0.08493628, 4.568296, 18.9781, -26.07538, 45.05348, 172.0, 13.0, -11.89473, 0.006532381, -13.08689, 46.33308, 6.841741, 2.027878, 8.782903, 5.375834, -28.51829, 33.89412, 56.0, 102.0, -6.849606, 0.006363887, -8.011016, 18.03553, 4.299724, -1.526117, 8.338561, 8.3373, -18.1423, 26.4796

  tmp = np.array(x_train)
