In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

!pip install -q transformers

!nvidia-smi

Mounted at /content/drive
[K     |████████████████████████████████| 4.4 MB 15.0 MB/s 
[K     |████████████████████████████████| 6.6 MB 64.0 MB/s 
[K     |████████████████████████████████| 101 kB 10.5 MB/s 
[K     |████████████████████████████████| 596 kB 79.2 MB/s 
[?25hFri Jul 22 13:40:20 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    23W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                   

In [2]:
#@title 기본 제목 텍스트
%cd /content/drive/MyDrive/NLP/ENG/ai4code/src2

import easydict
import gc
import torch

import pandas as pd
from dataset import data_setup

from preprocessor import _20CodeCellPreprocessor
from train import train, train_setup
from util import debug_setup


def main():
    args = {
        'model_name_or_path':'microsoft/codebert-base',
        'input_path':'../input/',
        'train_path':'./data/train.csv',
        'train_mark_path':'./data/train_mark.csv',
        'train_features_path':'./data/train_fts.json',
        'val_path':"./data/val.csv",
        'val_mark_path':'./data/val_mark.csv',
        'val_features_path':'./data/val_fts.json',
        'output_path':'./output',
        'md_max_len':64,
        'total_max_len':512,
        'batch_size':8,
        'accumulation_steps':4,
        'epoch':0,
        'epochs':5,
        'n_workers':8,
        'debug':True,
        'load_train':False,
        'max_lr':3e-5,
        'min_lr':.3e-6
        }
    args = easydict.EasyDict(args)

    preprocessor = _20CodeCellPreprocessor(**vars(args))
    train_df, val_df, train_df_mark, val_df_mark, train_fts, val_fts = preprocessor.run()

    print('before debug', train_df.shape, val_df.shape,
          train_df_mark.shape, val_df_mark.shape, len(train_fts), len(val_fts))

    if args.debug:
        train_df, train_df_mark, train_fts, val_df, val_df_mark, val_fts = debug_setup(
            train_df, train_df_mark, train_fts, val_df, val_df_mark, val_fts)

    print('after debug', train_df.shape, val_df.shape,
          train_df_mark.shape, val_df_mark.shape, len(train_fts), len(val_fts))

    train_loader, val_loader = data_setup(train_df_mark, val_df_mark, train_fts, val_fts, args)

    df_orders = pd.read_csv(args.input_path + 'train_orders.csv', index_col='id', squeeze=True).str.split()

    del train_df, train_df_mark, val_df_mark, train_fts, val_fts, preprocessor
    gc.collect()

    # This is variable
    args.num_train_steps = len(train_loader) / args.accumulation_steps / 4

    model, optimizer, scheduler, scaler = train_setup(args)

    if args.load_train:
        checkpoint = torch.load(args.checkpoint_path)
        epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['model_state_dict'])

        model.cuda()

        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        scaler.load_state_dict(checkpoint['scaler_state_dict'])
    else:
        model.cuda()

    train(model, train_loader, val_loader, optimizer,
          scheduler, scaler, val_df, df_orders, args)

if __name__ == '__main__':
    main()


/content/drive/MyDrive/NLP/ENG/ai4code/src2
train_df, val_df are already exits
train_fts, val_fts are already exists
before debug (5740832, 8) (629814, 8) (1950118, 8) (215946, 8) 125292 13964
after debug (575215, 8) (63342, 8) (195597, 8) (21813, 8) 12529 1396


Downloading:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/498 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/476M [00:00<?, ?B/s]



Epoch 1 Loss: 0.2604 lr: [5.040819665426121e-07, 5.040819665426121e-07]: 100%|██████████| 24449/24449 [43:33<00:00,  9.35it/s]
100%|██████████| 2727/2727 [01:39<00:00, 27.50it/s]
Preds score 0.6291638370551047
Epoch 2 Loss: 0.258 lr: [7.518957830586426e-07, 7.518957830586426e-07]: 100%|██████████| 24449/24449 [43:32<00:00,  9.36it/s]
100%|██████████| 2727/2727 [01:39<00:00, 27.51it/s]
Preds score 0.6294795002964728
Epoch 3 Loss: 0.2574 lr: [9.997095995746734e-07, 9.997095995746734e-07]: 100%|██████████| 24449/24449 [43:26<00:00,  9.38it/s]
100%|██████████| 2727/2727 [01:38<00:00, 27.61it/s]
Preds score 0.6306121282047064
Epoch 4 Loss: 0.2569 lr: [1.2475234160907038e-06, 1.2475234160907038e-06]: 100%|██████████| 24449/24449 [43:26<00:00,  9.38it/s]
100%|██████████| 2727/2727 [01:39<00:00, 27.49it/s]
Preds score 0.6296972531031733
Epoch 5 Loss: 0.2568 lr: [1.4953372326067346e-06, 1.4953372326067346e-06]: 100%|██████████| 24449/24449 [43:31<00:00,  9.36it/s]
100%|██████████| 2727/2727 [01