In [1]:
import os
import random
import logging
import numpy as np
import pandas as pd
from tqdm import tqdm, trange

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from pytorch_pretrained_bert.tokenization import BertTokenizer
from pytorch_pretrained_bert.modeling import BertForSequenceClassification
from pytorch_pretrained_bert.optimization import BertAdam

seed = 20190104
max_seq_length = 192
bert_model = "bert-base-uncased"
do_lower_case = True

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
# The GPU id to use, usually either "0" or "1"
os.environ["CUDA_VISIBLE_DEVICES"]="1"
os.environ["TFHUB_CACHE_DIR"]="tfhub_modules"

logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                    datefmt = '%m/%d/%Y %H:%M:%S',
                    level = logging.DEBUG)
logger = logging.getLogger(__name__)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
n_gpu = torch.cuda.device_count()

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [2]:
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if n_gpu > 0:
    torch.cuda.manual_seed_all(seed)

In [6]:
tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case=do_lower_case)
print(tokenizer.tokenize("I love Kung Fu Panda and chicken tikka masala!"))

01/22/2019 16:47:54 - DEBUG - urllib3.connectionpool -   Starting new HTTPS connection (1): s3.amazonaws.com:443
01/22/2019 16:47:55 - DEBUG - urllib3.connectionpool -   https://s3.amazonaws.com:443 "HEAD /models.huggingface.co/bert/bert-base-uncased-vocab.txt HTTP/1.1" 200 0
01/22/2019 16:47:55 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/david/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084


['i', 'love', 'kung', 'fu', 'panda', 'and', 'chicken', 'ti', '##kka', 'mas', '##ala', '!']
