In [1]:
import os
import numpy as np 
from sklearn.model_selection import KFold  # 라벨 x_data, y_data 분포를 유지 (함께 fold)

from IPython.display import clear_output

np.random.seed(7)
np.set_printoptions(precision=4, suppress=True)  # 과학적 표기 대신 소숫점 자리 4자리까지 표현

rootDir = "D:/바탕화면/Dataset/TUG/trainSet"
expertFolder = "/" + "2021_01_24_saveResults_최윤정"  # --- 변경 할 부분
viewFolder = "/" + "0_sideView"

print(" [Home Dir.] Current path", os.getcwd())  

datasetDir = rootDir + expertFolder + viewFolder
os.chdir(datasetDir) ; print(" Current workaing path [dataset] ", os.getcwd())  # -- Dataset 있는 곳으로 경로 변경 

 [Home Dir.] Current path D:\바탕화면\conda\tug\own\tug-main\0_temporalCNN_wLabel
 Current workaing path [dataset]  D:\바탕화면\Dataset\TUG\trainSet\2021_01_24_saveResults_최윤정\0_sideView


### Train/Test Set -- subject 기준으로 분할   
##### (Kfold_num = 0 이면 직접 분할, 5 등 숫자면 5-fold dataset)

In [2]:
def trainData_split(datasetDir,train_ratio = 0.8, shuffle = False,  Kfold_num = 5):      
    expDates = next(os.walk(datasetDir))[1]   #['2020_11_03', '2020_11_20_v1', '2020_11_20_v2', '2020_11_23', '2020_12_09', '2020_12_30']
    
    subject_number = 0
    subjects_list = []
    
    for dateFolder in expDates:
        dateDir = os.path.join(datasetDir, dateFolder)   # D:/바탕화면/Dataset/TUG/trainSet/2021_01_24_saveResults_최윤정/0_sideView\2020_11_03
        tmpSubjects = next(os.walk(dateDir))[1]
        subjects_list.append(tmpSubjects)  
        subject_number += len(tmpSubjects)

    # ------ 2D -> 1D [[sub1, sub2], [sub3, sub4]] -> [sub1, sub2, sub3, sub4]
    subjects = []
    for eachSub in subjects_list:
        subjects += eachSub
    print(" [before shuffle] 1D sub list: " , subjects)

    if shuffle:
        np.random.shuffle(subjects)
        print(" [After shuffle] 1D sub list: " , subjects)
        
    # ---- Split train/test subjects (for arbitrary split)
    trainSub_number = np.round( (len(subjects)*train_ratio), 0).astype(int)
    testSub_number = subject_number - trainSub_number
    
#     # --- K-fold cross validation
    

    if Kfold_num is not 0:
        Fold_results =[]
        print(" [K-Fold] K = ", Kfold_num)
        kfold = KFold(n_splits=  Kfold_num)
        for trainIdx, testIdx in kfold.split(subjects):
            Fold_results.append([trainIdx, testIdx])        
        train_Fold= []
        test_Fold = []
        
        for i in range(Kfold_num):
            tmp_train = []
            tmp_test = []
            for subIdx in range(len(subjects)): 
                if subIdx in Fold_results[i][0]:  # 
                    tmp_train.append(subjects[subIdx])
                    
                if subIdx in Fold_results[i][1]:  # -- K-fold test
                    tmp_test.append(subjects[subIdx])
                    
            train_Fold.append(tmp_train)
            test_Fold.append(tmp_test)
            print("\t [process]", i, "-fold:", train_Fold[i], "\n\t\t\t   ,", test_Fold[i])
        print("\n [results]  trainSet: ", len(train_Fold[0]) , " 명 , testSet: ", len(test_Fold[0]), " 명" )
        return train_Fold, test_Fold

        
    else:
        print("--------- no fold ----")
        train_subjects = subjects[ :trainSub_number]
        test_subjects = subjects[trainSub_number: ]
        print("[results]  " ,train_subjects, "\n", test_subjects)
        print("\t--> # of train sub: " + str(trainSub_number) +"명  ,  # of test subjects: " + str(testSub_number) + "명")
        return train_subjects, test_subjects 


def getPelvisData(csvFile):
    rawData = np.loadtxt(csvFile, delimiter=",")
    timestamp = rawData[:,0]
    pelvis_x = rawData[:,1]
    pelvis_y = rawData[:,2]
    pelvis_z = rawData[:,3]
    actionList = rawData[:,4:9]
    return timestamp, pelvis_x, pelvis_y, pelvis_z, actionList

def create_dataset(train_subjects):
    train_x = np.zeros((0, numFeature))
    train_y = np.zeros((0, numActions))
    test_x = np.zeros((0, numFeature))
    test_y = np.zeros((0, numActions))    
    
    trainSet = None
    testSet = None
    
    for dirpath, foldername, files in os.walk(datasetDir):
        for filename in files:
            if "lpf_" in filename:
                subname = dirpath.split("\\")[2] 
                csvFile = os.path.join(dirpath, filename)
                timestamp, pelvis_x, pelvis_y, pelvis_z, actionList = getPelvisData(csvFile) 
                
                pelvisData = np.array([pelvis_x, pelvis_y, pelvis_z]).T
                actionData = np.array(actionList)
                #dataset = tf.data.Dataset.from_tensor_slices( (pelvisData.astype('float32'), actionData.astype('float32')) ) 
                dataset = np.zeros( (len(pelvisData), dataset_columns) ) 
                
                dataset[:, :-(numActions)] = pelvisData      # time , pelvis  넣음  (317,4) -- raw data 
                dataset[:, -(numActions): ] = actionData     # (335, 5) one hot encoding 수행한 actionsList
                

                if subname in train_subjects:
                    train_x = np.append(train_x, pelvisData, axis = 0 )
                    train_y = np.append(train_y, actionData, axis = 0 )

                else:
                    test_x = np.append(test_x, pelvisData, axis = 0 )
                    test_y = np.append(test_y, actionData, axis = 0 )    

    return train_x, train_y, test_x, test_y

In [3]:
# ---------------  1. subject 별로 dataset 분할 (K-fold 정함) ------------------------------------------------------#
Kfold_num = 0 # if kfold_num = 0 (arbitrary train/test set will be used)
train_Fold, test_Fold = trainData_split(datasetDir,train_ratio = 0.7, shuffle = False, Kfold_num = Kfold_num)  


numFeature = 3
numActions = 5
dataset_columns = numFeature + numActions

# ---------------  2. dataset 생성  train_x, train_y, test_x, test_y  (k-fold 면 앞에 KFold_ 붙음) -----------------#
if Kfold_num == 0:  
    train_x, train_y, test_x, test_y = create_dataset(train_Fold)
    print("\n Train Shape X , Y: {0} , {1} \t Test Shape X , Y: {2} , {3}".format(train_x.shape, train_y.shape, test_x.shape, test_y.shape))
    

    
else:      # -- K-fold dataset
    print("\n\n -------------------- K-Fold Dataset ------------------------ ")
    folds = []

    for i in range(Kfold_num):
        train_x, train_y, test_x, test_y = create_dataset(train_Fold[i])
        print(" \t Train (X,Y): {0} , {1} \t Test (X,Y): {2} , {3}".format(train_x.shape, train_y.shape, test_x.shape, test_y.shape))
        folds.append((train_x, train_y, test_x, test_y))
        
    print(np.array(folds[0][0]).shape)

 [before shuffle] 1D sub list:  ['bys_tug', 'cbd_tug', 'cyj_tug', 'kw', 'kyh', 'lhs', 'NHJ_60', 'nhs', 'pjh', 'pss', 'rjh', 'yjh', 'bys', 'cbd', 'cyj', 'cyj2', 'jdh', 'jek', 'kch', 'ryu', 'JDW_tug', 'BYS_TUG', 'CYJ_TUG', 'PHE_TUG', 'YSJ_TUG']
--------- no fold ----
[results]   ['bys_tug', 'cbd_tug', 'cyj_tug', 'kw', 'kyh', 'lhs', 'NHJ_60', 'nhs', 'pjh', 'pss', 'rjh', 'yjh', 'bys', 'cbd', 'cyj', 'cyj2', 'jdh', 'jek'] 
 ['kch', 'ryu', 'JDW_tug', 'BYS_TUG', 'CYJ_TUG', 'PHE_TUG', 'YSJ_TUG']
	--> # of train sub: 18명  ,  # of test subjects: 7명

 Train Shape X , Y: (69605, 3) , (69605, 5) 	 Test Shape X , Y: (36119, 3) , (36119, 5)


In [4]:
from sklearn.preprocessing import MinMaxScaler
import tqdm

lookback_window = 15

def sliding_window(data_x, data_y, lookback_window=15):
    x = []
    y = []
    enc = MinMaxScaler()
    enc_y = enc.fit_transform(data_y)
    for i in range(lookback_window, len(data_x)):
#     for i in tqdm(range(lookback_window, len(data_x))):
        x.append(data_x[i - lookback_window:i])
#         y.append(data_y[i])

        y.append(data_y[i - lookback_window:i])
    return np.array(x), np.array(y), enc

x_train, y_train, enc_train = sliding_window(train_x,train_y)
x_test, y_test, enc_test = sliding_window(test_x, test_y)
print("x_train: ", x_train.shape," , y_train: ", y_train.shape)
print("x_test: ",x_test.shape, " , y_test: ",y_test.shape)

x_train:  (69590, 15, 3)  , y_train:  (69590, 15, 5)
x_test:  (36104, 15, 3)  , y_test:  (36104, 15, 5)


https://github.com/ashishpatel26/tcn-keras-Examples

In [5]:
from tcn import compiled_tcn, TCN
from keras.layers import Input, Dense
from keras.models import Model

In [6]:
nb_epochs= 7
batch_size= 250

nb_classes = 5


i = Input(shape=(lookback_window, 3))
m = TCN(return_sequences=True)(i)
m = Dense(nb_classes, activation='softmax')(m)

model = Model(inputs=[i], outputs=[m])
model.summary()

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['accuracy'])

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 15, 3)]           0         
_________________________________________________________________
tcn (TCN)                    (None, 15, 64)            91520     
_________________________________________________________________
dense (Dense)                (None, 15, 5)             325       
Total params: 91,845
Trainable params: 91,845
Non-trainable params: 0
_________________________________________________________________


In [7]:
print('Train...')
history = model.fit(x_train,  y_train, epochs=nb_epochs, batch_size=batch_size)

Train...
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


In [10]:
# ------------- Evaluation 
verbosity = 1
results_1 = model.evaluate(x_test, y_test, verbose = verbosity)
print("--> Evaluation on Test Dataset:")
print("**** Accuracy for Activity Recognition task is: ", results_1)



--> Evaluation on Test Dataset:
**** Accuracy for Activity Recognition task is:  [0.5939515233039856, 0.8422520160675049]


In [15]:
# ----------------- 
y_raw_pred = model.predict(x_test)
print(y_raw_pred.shape)
y_pred = enc_test.inverse_transform(y_raw_pred).flatten()
# print(y_pred.shape)
# incorrect = np.nonzero(y_true_idx != y_pred_idx) # print(incorrect, len(incorrect[0]))

# valid_num = len(correct[0]) + len(incorrect[0])
# tot_num = len(y_true_idx)
# # print("[must same] # of real data: ", tot_num, "==  # of pred data: ", valid_num)
# percent_correct = len(correct[0]) /tot_num
# percent_incorrect = len(incorrect[0])/tot_num
# print(" correct: ", percent_correct," [%]", ",  incorrect: ", percent_incorrect," [%]")

(36104, 15, 5)


AttributeError: 'MinMaxScaler' object has no attribute 'fit_transforms'

In [None]:
# print(history.)
# y_vloss = history.history['val_acc']
# y_loss = history.history['acc']

# x_len = numpy.arange(len(y_loss))
# plt.plot(x_len, y_vloss, marker='.', c='red', label="Validation-set Loss")
# plt.plot(x_len, y_loss, marker='.', c='blue', label="Train-set Loss")

# plt.legend(loc='upper right')
# plt.grid()
# plt.xlabel('epoch')
# plt.ylabel('loss')
# plt.show()

In [None]:
# predict the value from x_test
y_raw_pred = model.predict(np.array([x_test]))


# Invert transform for get a original value
y_pred = enc_test.inverse_transform(y_raw_pred).flatten()
y_true = enc_test.inverse_transform([y_test]).flatten()
