In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

!pip install -q transformers

!nvidia-smi

Mounted at /content/drive
[K     |████████████████████████████████| 4.4 MB 14.3 MB/s 
[K     |████████████████████████████████| 101 kB 12.4 MB/s 
[K     |████████████████████████████████| 596 kB 86.4 MB/s 
[K     |████████████████████████████████| 6.6 MB 61.8 MB/s 
[?25hFri Jul 22 13:35:19 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    24W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                   

In [None]:
#@title 기본 제목 텍스트
%cd /content/drive/MyDrive/NLP/ENG/ai4code/src2

import easydict
import gc
import torch

import pandas as pd
from dataset import data_setup

from preprocessor import _20CodeCellPreprocessor
from train import train, train_setup
from util import debug_setup


def main():
    args = {
        'model_name_or_path':'microsoft/codebert-base',
        'input_path':'../input/',
        'train_path':'./data/train.csv',
        'train_mark_path':'./data/train_mark.csv',
        'train_features_path':'./data/train_fts.json',
        'val_path':"./data/val.csv",
        'val_mark_path':'./data/val_mark.csv',
        'val_features_path':'./data/val_fts.json',
        'output_path':'./output',
        'md_max_len':64,
        'total_max_len':512,
        'batch_size':8,
        'accumulation_steps':4,
        'epoch':0,
        'epochs':5,
        'n_workers':8,
        'debug':True,
        'load_train':False,
        'max_lr':3e-5,
        'min_lr':.3e-6
        }
    args = easydict.EasyDict(args)

    preprocessor = _20CodeCellPreprocessor(**vars(args))
    train_df, val_df, train_df_mark, val_df_mark, train_fts, val_fts = preprocessor.run()

    print('before debug', train_df.shape, val_df.shape,
          train_df_mark.shape, val_df_mark.shape, len(train_fts), len(val_fts))

    if args.debug:
        train_df, train_df_mark, train_fts, val_df, val_df_mark, val_fts = debug_setup(
            train_df, train_df_mark, train_fts, val_df, val_df_mark, val_fts)

    print('after debug', train_df.shape, val_df.shape,
          train_df_mark.shape, val_df_mark.shape, len(train_fts), len(val_fts))

    train_loader, val_loader = data_setup(train_df_mark, val_df_mark, train_fts, val_fts, args)

    df_orders = pd.read_csv(args.input_path + 'train_orders.csv', index_col='id', squeeze=True).str.split()

    del train_df, train_df_mark, val_df_mark, train_fts, val_fts, preprocessor
    gc.collect()

    # This is variable
    args.num_train_steps = len(train_loader) / args.accumulation_steps

    model, optimizer, scheduler, scaler = train_setup(args)

    if args.load_train:
        checkpoint = torch.load(args.checkpoint_path)
        epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['model_state_dict'])

        model.cuda()

        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        scaler.load_state_dict(checkpoint['scaler_state_dict'])
    else:
        model.cuda()

    train(model, train_loader, val_loader, optimizer,
          scheduler, scaler, val_df, df_orders, args)

if __name__ == '__main__':
    main()


/content/drive/MyDrive/NLP/ENG/ai4code/src2
