### 
### 필요한 패키지 로드

In [1]:
from class_DeepNetwork import DNN, DNN_decreasing, GCN, GCN_2
from import_data import toSparse, NORMALIZATION_v2
from configuration import *
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
from tqdm import tqdm
from utils import EarlyStopping
import os
from scipy import sparse

### 
### GPU 사용 가능 여부, 랜덤 시드넘버 설정, 배차사이즈/GCN Layer 수/필터 수 설정

In [2]:
if torch.cuda.is_available():
    print("Let's go CUDA!!!!!")
cuda = torch.device('cuda')

torch.cuda.empty_cache()
np.random.seed(777)
torch.manual_seed(777)

batch_size = 32

train_gem = 'basedata/GEM_train_fs_9074.csv'
val_gem = 'basedata/GEM_val_fs_9074.csv'
test_gem = 'basedata/GEM_test_fs_9074.csv'

train_label = 'basedata/label_train_9074.csv'
val_label = 'basedata/label_val_9074.csv'
test_label = 'basedata/label_test_9074.csv'
mode = 'standard'


Let's go CUDA!!!!!


### 
### Dataset, DataLoader 구축

In [3]:
class GcnDataset(Dataset):
    
    def __init__(self, data, train_path, gem_path, label_path, mode):
        self.data = data
        
        # Node Feature Data(GEM); Train data 불러오기
        train_gem = np.transpose(np.loadtxt(gem_path, dtype=str, delimiter=','))
        self.train_gem, self.means, self.stds = NORMALIZATION_v2(train_gem[1:, 1:], mode)
        
        if self.data == 'train':
            self.gem = self.train_gem

            # Label Data 불러오기
            train_label = np.loadtxt(label_path, dtype=str, delimiter=',')
            train_label = np.transpose(train_label)[1]
            self.label = np.array([int(float(i)) for i in train_label])[1:]

            if not len(os.listdir('train')) == len(self.gem) & len(self.gem) == len(self.label):
                print('Warning: 데이터 개수 불일치')
                print('Train 파일 개수: %d' % (len(os.listdir('val'))))
                print('GEM 개수: %d' % (len(self.gem)))
                print('Label 개수: %d' % (len(self.label)))
            else:
                print('Train 데이터 개수 일치')
                print('Size: {}'.format(self.gem.shape))
                
        elif self.data == 'val':
            val_gem = np.transpose(np.loadtxt(gem_path, dtype=str, delimiter=','))
            val_gem = np.array([i.astype(float) for i in val_gem[1:, 1:]])
            self.gem = (val_gem - self.means) / self.stds

            # Label Data 불러오기
            val_label = np.loadtxt(label_path, dtype=str, delimiter=',')
            val_label = np.transpose(val_label)[1]
            self.label = np.array([int(float(i)) for i in val_label])[1:]
            
            if not len(os.listdir('val')) == len(self.gem) & len(self.gem) == len(self.label):
                print('Warning: 데이터 개수 불일치')
                print('Val 파일 개수: %d' % (len(os.listdir('val'))))
                print('GEM 개수: %d' % (len(self.gem)))
                print('Label 개수: %d' % (len(self.label)))
            else:
                print('Val 데이터 개수 일치')
                print('Size: {}'.format(self.gem.shape))
        
        elif self.data == 'test':
            test_gem = np.transpose(np.loadtxt(gem_path, dtype=str, delimiter=','))
            test_gem = np.array([i.astype(float) for i in test_gem[1:, 1:]])
            self.gem = (test_gem - self.means) / self.stds

            # Label Data 불러오기
            test_label = np.loadtxt(label_path, dtype=str, delimiter=',')
            test_label = np.transpose(test_label)[1]
            self.label = np.array([int(float(i)) for i in test_label])[1:]
            
            if not len(os.listdir('test')) == len(self.gem) & len(self.gem) == len(self.label):
                print('Warning: 데이터 개수 불일치')
                print('Test 파일 개수: %d' % (len(os.listdir('test'))))
                print('GEM 개수: %d' % (len(self.gem)))
                print('Label 개수: %d' % (len(self.label)))
            else:
                print('Test 데이터 개수 일치')
                print('Size: {}'.format(self.gem.shape))
                
        self.gem = torch.Tensor(self.gem)
        self.label = torch.LongTensor(self.label)
        
        
    def __len__(self):
        return len(self.label)
    
    
    def __getitem__(self, idx):
        path = self.data + '/' + self.data + '_' + str(idx) + '.npz'
        support = sparse.load_npz(path)
        support = toSparse(support)

        return (self.gem[idx], support, self.label[idx])

    

In [4]:
train_dataset = GcnDataset('train', train_gem, train_gem, train_label, mode)
val_dataset = GcnDataset('val', train_gem, val_gem, val_label, mode)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


Train 데이터 개수 일치
Size: (625, 9074)
Val 데이터 개수 일치
Size: (157, 9074)


###  
### Modeling
1. Feature 및 Label 개수 측정

In [5]:
num_vars = len(np.loadtxt(val_gem, dtype=str, delimiter=',')) - 1
num_classes = len(list(set(np.loadtxt(val_label, dtype=str, delimiter=',')[1:, 1])))  

2. Parameter Setting: 최대 에폭, Loss 함수, Optimizer, GCN Layer 개수, filter 개수

In [6]:
MAX_EPOCH = 1000

def Parameters(net):
    """
        신경망으로부터 파라미터 받아오는 함수

    """
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=0.001)

    return criterion, optimizer

num_layers = 1
num_filter = 4
patience = 5

3. 학습

In [9]:
net = GCN(num_vars, 1, num_classes, num_filter)
criterion, optimizer = Parameters(net)
net.to(cuda)

for epoch in range(MAX_EPOCH):
    torch.cuda.empty_cache()
    if epoch == 0:
        early_stopping = EarlyStopping(patience=patience, path='gcn1.pt', verbose=False)
    else:
        early_stopping = EarlyStopping(patience=patience, best_score=best_score, counter=counter, path='gcn1.pt', verbose=False)
    
    net.train()
    train_loss = 0.
    tcnt = 0
    for k, data in enumerate(train_dataloader):
        inputs, support, labels = data
        inputs = inputs.unsqueeze(-1)

        inputs = inputs.to(cuda)
        support = support.to(cuda)
        labels = labels.to(cuda)

        optimizer.zero_grad()

        outputs = net((inputs, support))
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        tcnt += len(data)
        train_loss += loss.item()
        
        print('Train_loss: %.5f' % (train_loss / tcnt))
        
    net.eval()
    total = 0
    correct = 0
    val_loss = 0
    vcnt = 0 
    for k, data in enumerate(val_dataloader):
        inputs, support, labels = data
        inputs = inputs.unsqueeze(-1)

        inputs = inputs.to(cuda)
        labels = labels.to(cuda)

        outputs = net((inputs, support))
        loss = criterion(outputs, labels)
        val_loss += loss.item()
        vcnt += len(labels)

        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == labels).sum().item()
    
    print('Epoch: %5d / Train Loss: %5.5f / Val Loss: %5.5f / Val Acc: %5.3f'
          % (epoch, train_loss / tcnt, val_loss / vcnt, correct / vcnt))
    
    best_score, counter, finish = early_stopping(val_loss / vcnt, net)
    if finish:
        break

Train_loss: 0.54089
Train_loss: 0.48375
Train_loss: 0.43404
Train_loss: 0.39409
Train_loss: 0.44311
Train_loss: 0.42865
Train_loss: 0.41418
Train_loss: 0.40642
Train_loss: 0.39394
Train_loss: 0.38038
Train_loss: 0.36531
Train_loss: 0.36059
Train_loss: 0.34755
Train_loss: 0.34835
Train_loss: 0.35321
Train_loss: 0.34539
Train_loss: 0.33640
Train_loss: 0.34035
Train_loss: 0.33762
Train_loss: 0.33106
Epoch:     0 / Train Loss: 0.33106 / Val Loss: 0.02728 / Val Acc: 0.745
Train_loss: 0.11530
Train_loss: 0.14357
Train_loss: 0.17391
Train_loss: 0.18086
Train_loss: 0.18251
Train_loss: 0.16892
Train_loss: 0.16569
Train_loss: 0.15790
Train_loss: 0.16136
Train_loss: 0.15863
Train_loss: 0.15745
Train_loss: 0.15418
Train_loss: 0.15782
Train_loss: 0.15901
Train_loss: 0.15906
Train_loss: 0.16097
Train_loss: 0.15876
Train_loss: 0.15562
Train_loss: 0.15234
Train_loss: 0.15371
Epoch:     1 / Train Loss: 0.15371 / Val Loss: 0.02210 / Val Acc: 0.764
Train_loss: 0.10002
Train_loss: 0.09105
Train_loss: 0.09