- 라이브러리 로드

In [1]:
import pickle
import torch 
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd 

from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import drive
drive.mount('/content/drive')

- 데이터셋 로드

In [3]:
# dir = '/content/drive/My Drive/Colab Notebooks/'
# with open(dir+'FordA_TRAIN_x.pickle', 'rb') as f:
#     X = pickle.load(f) 
# with open(dir+'FordA_TRAIN_y.pickle', 'rb') as f:
#     y = pickle.load(f) 
# with open(dir+'FordA_TEST_x.pickle', 'rb') as f:
#     X_test = pickle.load(f) 

In [4]:
dir = '/content/drive/My Drive/Colab Notebooks/'
with open(dir+'HAR_TRAIN_x.pickle', 'rb') as f:
    X = pickle.load(f) 
with open(dir+'HAR_TRAIN_y.pickle', 'rb') as f:
    y = pickle.load(f) 
with open(dir+'HAR_TEST_x.pickle', 'rb') as f:
    X_test = pickle.load(f) 

In [None]:
# plot
color_item = ['blue','green','red','black']
data_cnt = 1

for label in list(set(y)):
    for i in range(data_cnt):
        target_X = X[y==label]
        data = target_X[:,:,:]
        print('Label:{}, Data:{}'.format(label,i))
        for j in range(X.shape[2]):    
            plt.plot(range(data.shape[1]), data[i,:,j], color=color_item[j])
        plt.show()

- 데이터로더 구성

In [6]:
class TSDataset(Dataset):
    def __init__(self, X, y=None):
        _, n_length, n_dim = X.shape
        X_reshape = X.reshape(-1, n_dim)
        scaler = StandardScaler()
        X_reshape = scaler.fit_transform(X_reshape)
        X_reshape = X_reshape.reshape(-1, n_length, n_dim)
        X = X_reshape
        
        self.X = torch.from_numpy(X)        
        if y is None:
            self.y = torch.zeros(len(self.X))
        else:
            self.y = torch.from_numpy(y)
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]
    
    def __len__(self):
        return self.X.shape[0]

In [7]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.2, shuffle=True)
batch_size = 100

In [8]:
train_loader = torch.utils.data.DataLoader(dataset=TSDataset(X_train, y_train),
                                           batch_size=batch_size,
                                           shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=TSDataset(X_val, y_val),
                                           batch_size=batch_size,
                                           shuffle=False)
test_loader = torch.utils.data.DataLoader(dataset=TSDataset(X_test),
                                           batch_size=batch_size,
                                           shuffle=False)
_, sequence_length, input_dim = X_train.shape
num_classes = len(set(y_train))

- 학습 모델 구성

In [9]:
class FC(nn.Module):
    def __init__(self, input_dim, hidden_dim, input_length, num_classes):
        super(FC, self).__init__()
        
        ''' base 모델은 nn.Linear를 이용한 가장 naive한 모델입니다.
        모든 time step과 input 차원에 대해 fully connected 방식으로 연결시켜서 학습하는 모델입니다.
        해당 모델을 CNN 혹은 RNN으로 변형하여 time-series 데이터에 적합하게 구성하시면 됩니다'''

        self.fc1 = nn.Linear(input_dim*input_length, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)  
        self.fc3 = nn.Linear(hidden_dim, num_classes)
    
    def forward(self, x):   
        
        ''' input으로 들어온 x의 shape은 다음과 같습니다.
        x.shape = [배치사이즈, 시계열의 길이, input의 차원=(univariate:1, multivariate:n)].
        여기에서는 모든 time step과 input 차원을 풀어서 fully connected로 학습을 하고 있습니다.
        CNN이나 RNN의 경우, 해당하는 경우에 맞게 shape을 맞추셔서 학습을 진행하시면 됩니다.'''

        batch_size = x.shape[0]        
        x_ = x.view(batch_size,-1)

        z = self.fc1(x_)
        z = F.relu(z)
        z = self.fc2(z)
        z = F.relu(z)
        out = self.fc3(z)

        return out

- 하이퍼파라미터 설정

In [10]:
# Hyper-parameters
hidden_dim = 64
n_epochs = 50
learning_rate = 0.001

- 모델, Loss, Optimizer 설정

In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = FC(input_dim, hidden_dim, sequence_length, num_classes).to(device)

criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

- Training & Validation

In [None]:
for epoch in range(n_epochs):
    # Train
    model.train()
    correct = 0
    total = 0
    for i, (X, y) in enumerate(train_loader):
        X = X.to(device).float()
        y = y.to(device).long()

        # Forward
        y_pred = model(X)
        loss = criterion(y_pred, y)
            
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        with torch.no_grad():
            correct += (y_pred.max(dim=1)[1]==y).float().sum()
            total += len(y_pred)  
    print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, n_epochs, loss.item()))     
    print ('Epoch [{}/{}], Train accuracy: {:.4f}'.format(epoch+1, n_epochs, correct/total))     

    # Validation
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for i, (X, y) in enumerate(val_loader):
            X = X.to(device).float()
            y = y.to(device).long()

            # Forward
            y_pred = model(X)

            correct += (y_pred.max(dim=1)[1]==y).float().sum()
            total += len(y_pred)  
        print ('Epoch [{}/{}], Validation accuracy: {:.4f}'.format(epoch+1, n_epochs, correct/total))              

- Test

In [13]:
res = []
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for i, (X, y) in enumerate(test_loader):
        X = X.to(device).float()

        # Forward pass
        y_pred = model(X)
        res.append(y_pred)

In [14]:
res = torch.cat(res,dim=0)
y = res.max(dim=1)[1]
y = y.detach().cpu().numpy()
pd.DataFrame(y,columns =['Category']).to_csv(dir+"result.csv",index_label='Id')