In [8]:
import pandas as pd
import numpy as np
import os

In [9]:
CODE_PATH = os.getcwd()
BASE_PATH =  os.path.dirname(CODE_PATH) + '/'
print(BASE_PATH)
DATA_PATH = BASE_PATH + 'data/'

/home/masa1357/Dockerdata/gitfile/signate_LiverDiseaseDetermination/


In [10]:
train = pd.read_csv(DATA_PATH + 'train.csv')
test = pd.read_csv(DATA_PATH + 'test.csv')
df = pd.concat([train.drop(['disease'],axis=1), test], axis=0, sort=False)
df.head(3)

Unnamed: 0,Age,Gender,T_Bil,D_Bil,ALP,ALT_GPT,AST_GOT,TP,Alb,AG_ratio
0,59,Male,0.786886,0.150498,220.178691,13.467617,21.729246,6.815731,3.112276,1.006802
1,69,Male,1.003987,0.195625,221.218413,51.033462,64.752323,6.889608,3.051521,0.751346
2,65,Male,0.65714,0.081287,320.770533,12.625011,30.61318,5.947767,2.489167,0.774952


In [11]:
df.isnull().sum()

Age         0
Gender      0
T_Bil       0
D_Bil       0
ALP         0
ALT_GPT     0
AST_GOT     0
TP          0
Alb         0
AG_ratio    0
dtype: int64

In [12]:
df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1})
df.head(1)

Unnamed: 0,Age,Gender,T_Bil,D_Bil,ALP,ALT_GPT,AST_GOT,TP,Alb,AG_ratio
0,59,0,0.786886,0.150498,220.178691,13.467617,21.729246,6.815731,3.112276,1.006802


In [13]:
nrow, ncol = train.shape
print(nrow, ncol)

850 11


In [14]:
train_df = df[:nrow].join(train['disease'])
train_df.head(3)

Unnamed: 0,Age,Gender,T_Bil,D_Bil,ALP,ALT_GPT,AST_GOT,TP,Alb,AG_ratio,disease
0,59,0,0.786886,0.150498,220.178691,13.467617,21.729246,6.815731,3.112276,1.006802,0
1,69,0,1.003987,0.195625,221.218413,51.033462,64.752323,6.889608,3.051521,0.751346,1
2,65,0,0.65714,0.081287,320.770533,12.625011,30.61318,5.947767,2.489167,0.774952,0


In [15]:
test_df = df[nrow:]
test_df.head(3)

Unnamed: 0,Age,Gender,T_Bil,D_Bil,ALP,ALT_GPT,AST_GOT,TP,Alb,AG_ratio
0,25,0,0.801797,0.1868,214.448679,15.260516,19.496649,6.817903,3.600752,1.288006
1,26,1,0.834857,0.116426,234.136816,10.509159,16.733312,6.795433,3.111043,1.030562
2,64,0,0.79183,0.19794,216.039909,14.578304,20.695866,7.52403,3.627621,1.278579


### NN実装

In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [17]:
x = torch.tensor(train_df.drop(['disease'], axis=1).values, dtype=torch.float32)
t = torch.tensor(train_df['disease'].values, dtype=torch.int64)
print(type(x), type(t))
print(x.dtype, t.dtype)
print(x.shape, t.shape)

<class 'torch.Tensor'> <class 'torch.Tensor'>
torch.float32 torch.int64
torch.Size([850, 10]) torch.Size([850])


In [18]:
dataset = torch.utils.data.TensorDataset(x, t)
dataset[0]

(tensor([5.9000e+01, 0.0000e+00, 7.8689e-01, 1.5050e-01, 2.2018e+02, 1.3468e+01,
         2.1729e+01, 6.8157e+00, 3.1123e+00, 1.0068e+00]),
 tensor(0))

In [19]:
n_train = int(len(dataset) * 0.6)
n_val = int(len(dataset) * 0.2)
n_test = len(dataset) - n_train - n_val
n_train, n_val, n_test

(510, 170, 170)

In [20]:
torch.manual_seed(0)

# データセットの分割
train, val, test = torch.utils.data.random_split(dataset, [n_train, n_val, n_test])

In [21]:
batch_size = 50

In [22]:
train_loader = torch.utils.data.DataLoader(train, batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val, batch_size)
test_loader = torch.utils.data.DataLoader(test, batch_size)

In [23]:
class Net(nn.Module):

    # 使用するオブジェクトを定義
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(10, 8)
        self.fc2 = nn.Linear(8, 4)
        self.fc3 = nn.Linear(4, 2)
        

    # 順伝播
    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x1 = self.fc3(x)
        return x

In [24]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [25]:
max_epoch = 1000
      
# モデルの初期化
torch.manual_seed(0)

# モデルのインスタンス化とデバイスへの転送
net = Net().to(device)
criterion = F.cross_entropy
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

In [26]:
for epoch in range(max_epoch):
    
    for batch in train_loader:
        
        # バッチサイズ分のサンプルを抽出
        x, t = batch
        
        # 学習時に使用するデバイスへデータの転送
        x = x.to(device)
        t = t.to(device)
        
        # パラメータの勾配を初期化
        optimizer.zero_grad()
        
        # 予測値の算出
        y = net(x)
        
        # 目標値と予測値から目的関数の値を算出
        loss = criterion(y, t)
        
        # 目的関数の値を表示して確認
        # item(): tensot.Tensor => float
        print('loss: ', loss.item())
        
        y_label = torch.argmax(y, dim=1)
        acc  = torch.sum(y_label == t) * 1.0 / len(t)
        print('accuracy:', acc)
        
        # 各パラメータの勾配を算出
        loss.backward()
        
        # 勾配の情報を用いたパラメータの更新
        optimizer.step()

loss:  14.073111534118652
accuracy: tensor(0., device='cuda:0')
loss:  15.957496643066406
accuracy: tensor(0.3600, device='cuda:0')
loss:  1.5697845220565796
accuracy: tensor(0.5200, device='cuda:0')
loss:  1.3585689067840576
accuracy: tensor(0.6200, device='cuda:0')
loss:  1.3031271696090698
accuracy: tensor(0.6200, device='cuda:0')
loss:  1.3438963890075684
accuracy: tensor(0.5200, device='cuda:0')
loss:  1.3311545848846436
accuracy: tensor(0.6400, device='cuda:0')
loss:  1.3862946033477783
accuracy: tensor(0.5800, device='cuda:0')
loss:  1.3862946033477783
accuracy: tensor(0.4800, device='cuda:0')
loss:  1.3308509588241577
accuracy: tensor(0.6600, device='cuda:0')
loss:  1.2476649284362793
accuracy: tensor(0.7000, device='cuda:0')
loss:  1.358577013015747
accuracy: tensor(0.6400, device='cuda:0')
loss:  1.3308444023132324
accuracy: tensor(0.6400, device='cuda:0')
loss:  1.3862946033477783
accuracy: tensor(0.5200, device='cuda:0')
loss:  1.330859661102295
accuracy: tensor(0.5200, dev

In [27]:
# 正解率の計算
def calc_acc(data_loader):
    
    with torch.no_grad():
        
        accs = [] # 各バッチごとの結果格納用
        
        for batch in data_loader:
            x, t = batch
            x = x.to(device)
            t = t.to(device)
            y = net(x)
            
            y_label = torch.argmax(y, dim=1)
            acc = torch.sum(y_label == t) * 1.0 / len(t)
            accs.append(acc)
            
    # 全体の平均を算出
    avg_acc = torch.tensor(accs).mean()
    print('Accuracy: {:.1f}%'.format(avg_acc * 100))
    
    return avg_acc

In [28]:
calc_acc(val_loader)

Accuracy: 58.0%


tensor(0.5800)

In [29]:
calc_acc(test_loader)

Accuracy: 63.5%


tensor(0.6350)