In [34]:
import torch
import pandas as pd
import numpy as np
import os
import tqdm
import pytorch_lightning as pl
import torchmetrics
from torchsummary import summary
from matplotlib import pyplot as plt
from pytorch_lightning.core.lightning import LightningModule
from torch import nn

In [3]:
import torchvision
torchvision.__version__

'0.11.3'

In [4]:
print(torch.__version__)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
print(torch.cuda.get_device_name(0))

1.10.2
cuda
NVIDIA GeForce MX250


In [32]:
class model(LightningModule):
    def __init__(self):
        super().__init__()
        self.loss_fn = nn.CrossEntropyLoss().to(device)
        # self.metrics = torchmetrics.functional.accuracy()
        self.net = nn.Sequential(
            nn.Linear(32, 16),
            nn.BatchNorm1d(16),
            nn.ReLU(inplace=True),

            nn.Linear(16, 8),
            nn.BatchNorm1d(8),
            nn.ReLU(inplace=True),
            
            nn.Linear(8, 8),
            nn.BatchNorm1d(8),
            nn.ReLU(inplace=True),
            nn.Dropout(0,7),

            nn.Linear(8, 4)
        ).to(device)

    def forawrd(self, x):
        return self.net(x)
    
    def training_step(self, batch, batch_idx):
        x = batch[:, :32].to(device)
        target = batch[:, 32].to(torch.int64).to(device)
        pred = self.forawrd(x)
        loss = self.loss_fn(pred, target)
        acc = torchmetrics.functional.accuracy(pred, target)

        self.log("train_acc", acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return {"loss" : loss, "acc" : acc}
    
    def validation_step(self, batch, batch_idx):
        x = batch[:, :32].to(device)
        target = batch[:, 32].to(torch.int64).to(device)
        pred = self.net(x)
        loss = self.loss_fn(pred, target)
        acc = torchmetrics.functional.accuracy(pred, target)
        
        self.log("valid_acc", acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log("valid_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return {"loss" : loss, "acc" : acc}

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)

In [6]:
def create_datasets(batch_size):
    valid_size = 0.2

    # torch.FloatTensor로 변환
    train_csv = pd.read_csv('../data/train.csv')
    test_csv = pd.read_csv('../data/test.csv')

    train_data = torch.tensor(train_csv.to_numpy()[:, 1:]).float()
    test_data = torch.tensor(test_csv.to_numpy()[:, 1:]).float()

    # validation으로 사용할 trainning indices를 얻는다.
    num_train = len(train_data)
    indices = list(range(num_train))
    np.random.shuffle(indices)
    split = np.int32(np.floor(valid_size * num_train))
    train_idx, valid_idx = indices[split:], indices[:split]

    vaild_data = train_data[valid_idx]
    train_data = train_data[train_idx]
    # train_sampler = SubsetRandomSampler(train_idx)
    # valid_sampler = SubsetRandomSampler(valid_idx)

    # load training data in batches
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                            #    sampler=train_sampler,
                                               num_workers=0)

    # load validation data in batches
    valid_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                            #    sampler=valid_sampler,
                                               num_workers=0)

    # load test data in batches
    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=batch_size,
                                              num_workers=0)

    return train_loader, test_loader, valid_loader


In [33]:
batch_size = 64
train_loader, test_loader, valid_loader = create_datasets(batch_size)

model_nn = model()
n_epochs = 500

# training
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

earlyStopping = EarlyStopping(monitor="valid_loss", min_delta=0.00, patience=5, verbose=True, mode="min")
trainer = pl.Trainer(max_epochs=n_epochs, gpus=1, callbacks=[earlyStopping])
trainer.fit(model_nn, train_loader, valid_loader)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params
---------------------------------------------
0 | loss_fn | CrossEntropyLoss | 0     
1 | net     | Sequential       | 836   
---------------------------------------------
836       Trainable params
0         Non-trainable params
836       Total params
0.003     Total estimated model params size (MB)


                                                              

  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Epoch 0: 100%|██████████| 60/60 [00:02<00:00, 28.17it/s, loss=1.43, v_num=5, train_acc_step=0.500, train_loss_step=1.280, valid_acc_step=0.417, valid_loss_step=1.280, valid_acc_epoch=0.251, valid_loss_epoch=1.400]

Metric valid_loss improved. New best score: 1.401


Epoch 1: 100%|██████████| 60/60 [00:01<00:00, 32.87it/s, loss=1.38, v_num=5, train_acc_step=0.583, train_loss_step=1.220, valid_acc_step=0.583, valid_loss_step=1.220, valid_acc_epoch=0.288, valid_loss_epoch=1.370, train_acc_epoch=0.210, train_loss_epoch=1.440]

Metric valid_loss improved by 0.032 >= min_delta = 0.0. New best score: 1.368


Epoch 2: 100%|██████████| 60/60 [00:02<00:00, 27.01it/s, loss=1.35, v_num=5, train_acc_step=0.583, train_loss_step=1.180, valid_acc_step=0.667, valid_loss_step=1.210, valid_acc_epoch=0.336, valid_loss_epoch=1.340, train_acc_epoch=0.270, train_loss_epoch=1.390]

Metric valid_loss improved by 0.028 >= min_delta = 0.0. New best score: 1.340


Epoch 3: 100%|██████████| 60/60 [00:01<00:00, 37.34it/s, loss=1.32, v_num=5, train_acc_step=0.750, train_loss_step=1.130, valid_acc_step=0.667, valid_loss_step=1.180, valid_acc_epoch=0.392, valid_loss_epoch=1.310, train_acc_epoch=0.322, train_loss_epoch=1.360]

Metric valid_loss improved by 0.028 >= min_delta = 0.0. New best score: 1.312


Epoch 4: 100%|██████████| 60/60 [00:02<00:00, 28.43it/s, loss=1.29, v_num=5, train_acc_step=0.750, train_loss_step=1.060, valid_acc_step=0.667, valid_loss_step=1.140, valid_acc_epoch=0.438, valid_loss_epoch=1.280, train_acc_epoch=0.371, train_loss_epoch=1.330]

Metric valid_loss improved by 0.031 >= min_delta = 0.0. New best score: 1.281


Epoch 5: 100%|██████████| 60/60 [00:01<00:00, 38.41it/s, loss=1.26, v_num=5, train_acc_step=0.833, train_loss_step=1.020, valid_acc_step=0.750, valid_loss_step=1.100, valid_acc_epoch=0.469, valid_loss_epoch=1.250, train_acc_epoch=0.419, train_loss_epoch=1.300]

Metric valid_loss improved by 0.033 >= min_delta = 0.0. New best score: 1.248


Epoch 6: 100%|██████████| 60/60 [00:01<00:00, 34.98it/s, loss=1.22, v_num=5, train_acc_step=0.833, train_loss_step=0.993, valid_acc_step=0.667, valid_loss_step=1.060, valid_acc_epoch=0.491, valid_loss_epoch=1.210, train_acc_epoch=0.445, train_loss_epoch=1.270]

Metric valid_loss improved by 0.035 >= min_delta = 0.0. New best score: 1.213


Epoch 7: 100%|██████████| 60/60 [00:01<00:00, 40.80it/s, loss=1.19, v_num=5, train_acc_step=0.833, train_loss_step=0.956, valid_acc_step=0.667, valid_loss_step=0.996, valid_acc_epoch=0.512, valid_loss_epoch=1.180, train_acc_epoch=0.475, train_loss_epoch=1.240]

Metric valid_loss improved by 0.036 >= min_delta = 0.0. New best score: 1.176


Epoch 8: 100%|██████████| 60/60 [00:02<00:00, 28.16it/s, loss=1.15, v_num=5, train_acc_step=0.833, train_loss_step=0.910, valid_acc_step=0.750, valid_loss_step=0.936, valid_acc_epoch=0.537, valid_loss_epoch=1.140, train_acc_epoch=0.503, train_loss_epoch=1.200]

Metric valid_loss improved by 0.038 >= min_delta = 0.0. New best score: 1.138


Epoch 9: 100%|██████████| 60/60 [00:02<00:00, 24.88it/s, loss=1.11, v_num=5, train_acc_step=0.917, train_loss_step=0.825, valid_acc_step=0.750, valid_loss_step=0.875, valid_acc_epoch=0.561, valid_loss_epoch=1.100, train_acc_epoch=0.519, train_loss_epoch=1.170]

Metric valid_loss improved by 0.041 >= min_delta = 0.0. New best score: 1.097


Epoch 10: 100%|██████████| 60/60 [00:02<00:00, 28.32it/s, loss=1.07, v_num=5, train_acc_step=0.833, train_loss_step=0.801, valid_acc_step=0.750, valid_loss_step=0.836, valid_acc_epoch=0.575, valid_loss_epoch=1.060, train_acc_epoch=0.534, train_loss_epoch=1.130]

Metric valid_loss improved by 0.038 >= min_delta = 0.0. New best score: 1.059


Epoch 11:  50%|█████     | 30/60 [00:01<00:01, 25.34it/s, loss=1.03, v_num=5, train_acc_step=0.833, train_loss_step=0.742, valid_acc_step=0.750, valid_loss_step=0.836, valid_acc_epoch=0.575, valid_loss_epoch=1.060, train_acc_epoch=0.557, train_loss_epoch=1.090]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


Epoch 11:  55%|█████▌    | 33/60 [00:19<00:15,  1.72it/s, loss=1.03, v_num=5, train_acc_step=0.833, train_loss_step=0.742, valid_acc_step=0.750, valid_loss_step=0.836, valid_acc_epoch=0.575, valid_loss_epoch=1.060, train_acc_epoch=0.557, train_loss_epoch=1.090]

In [1]:
import tensorboard

SyntaxError: invalid syntax (2012125320.py, line 1)

In [31]:
import pandas as pd

sample_submission = pd.read_csv('../data/sample_submission.csv')

batch_index = 0

for i, data in enumerate(test_loader):
    data = data.float()
    outputs = model_nn.forawrd(data)
    batch_index = i * batch_size
    max_vals, max_indices = torch.max(outputs, 1)
    sample_submission.iloc[batch_index:batch_index + batch_size, 1:] = max_indices.long().cpu().numpy()[:,np.newaxis]

sample_submission.to_csv('dacon_object_1.csv', index=False)

# 0.58
일단 lightning에 적응해야 한다.  
속성 데이터로서는 이정도로 마무리하고, cnn으로 85 정도 정확도를 얻고 다시 생각해보자.
