In [1]:
from trainer import Trainer
from utils import init_logger, load_tokenizer, set_seed, MODEL_CLASSES, MODEL_PATH_MAP
from data_loader import load_and_cache_examples
import argparse

In [2]:
parser = argparse.ArgumentParser()

parser.add_argument("--task", default="nsmc", type=str, help="The name of the task to train")
parser.add_argument("--model_dir", default="./model", type=str, help="Path to save, load model")
parser.add_argument("--data_dir", default="./data", type=str, help="The input data dir")
parser.add_argument("--train_file", default="ratings_train.txt", type=str, help="Train file")
parser.add_argument("--test_file", default="ratings_test.txt", type=str, help="Test file")

parser.add_argument("--model_type", default="kobert", type=str, help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()))

parser.add_argument('--seed', type=int, default=42, help="random seed for initialization")
parser.add_argument("--train_batch_size", default=32, type=int, help="Batch size for training.")
parser.add_argument("--eval_batch_size", default=64, type=int, help="Batch size for evaluation.")
parser.add_argument("--max_seq_len", default=50, type=int, help="The maximum total input sequence length after tokenization.")
parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.")
parser.add_argument("--num_train_epochs", default=5.0, type=float, help="Total number of training epochs to perform.")
parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.")
parser.add_argument('--gradient_accumulation_steps', type=int, default=1,
                    help="Number of updates steps to accumulate before performing a backward/update pass.")
parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.")
parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.")
parser.add_argument("--max_steps", default=-1, type=int, help="If > 0: set total number of training steps to perform. Override num_train_epochs.")
parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")

parser.add_argument('--logging_steps', type=int, default=2000, help="Log every X updates steps.")
parser.add_argument('--save_steps', type=int, default=2000, help="Save checkpoint every X updates steps.")

parser.add_argument("--do_train", type=bool, default=True, help="Whether to run training.")
parser.add_argument("--do_eval", type=bool, default=True, help="Whether to run eval on the test set.")
parser.add_argument("--no_cuda", type=bool, default=False, help="Avoid using CUDA when available")

args = parser.parse_args([])

args.model_name_or_path = MODEL_PATH_MAP[args.model_type]

In [3]:
MODEL_PATH_MAP[args.model_type]

'monologg/kobert'

## Load tokenizer, build dataset

In [4]:
tokenizer = load_tokenizer(args)
train_dataset = load_and_cache_examples(args, tokenizer, mode="train")
dev_dataset = None
test_dataset = load_and_cache_examples(args, tokenizer, mode="test")


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'BertTokenizer'. 
The class this function is called from is 'KoBertTokenizer'.


## Train

In [5]:
trainer = Trainer(args, train_dataset, dev_dataset, test_dataset)
trainer.train()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at monologg/kobert and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch:   0%|          | 0/5 [00:00<?, ?it/s]
Iteration:   0%|          | 0/4688 [00:00<?, ?it/s][A
Iteration:   0%|          | 1/4688 [00:00<15:57,  4.90it/s][A
Iteration:   0%|          | 2/4688 [00:00<14:22,  5.43it/s][A
Iteration:   0%|          | 3/4688 [00:00<13:50,  5.64it/s][A
Iteration:   0%|          | 4/4688 [00:00<13:33,  5.76it/s][A
Iteration:   0%|          | 5/4688 [00:00<13:24,  5.82it/s][A
Iteration:   0%|          | 6/4688 [00:01<13:18,  5.87it/s][A
Iteration:   0%|          | 7/4688 [00:01<13:14,  5.89it/s][A
Iteration:   0%|          | 8/4688 [00:01<13:13,  5.89it/s][A
Iteration:   0%|          | 9/4688 [00:01<13:12,  5.90it/s][A
Iteration:   0%|          | 10/4688 [00:01

KeyboardInterrupt: 

## Inference

In [None]:
from predict import predict
parser = argparse.ArgumentParser()

parser.add_argument("--input_file", default="sample_pred_in.txt", type=str, help="Input file for prediction")
parser.add_argument("--output_file", default="sample_pred_out.txt", type=str, help="Output file for prediction")
parser.add_argument("--model_dir", default="./model", type=str, help="Path to save, load model")

parser.add_argument("--batch_size", default=32, type=int, help="Batch size for prediction")
parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available")

pred_config = parser.parse_args([])
predict(pred_config)