In [1]:
# !git clone https://github.com/openai/gpt-2.git
# !mv gpt-2 gpt_2
# !python3 gpt_2/download_model.py 345M
# !pip3 install regex --user
# !wget https://raw.githubusercontent.com/minimaxir/gpt-2-simple/master/gpt_2_simple/src/accumulate.py

In [2]:
from gpt_2.src import model, encoder
from accumulate import AccumulatingOptimizer
import tensorflow as tf

In [3]:
enc = encoder.get_encoder('345M', 'models')
hparams = model.default_hparams()

In [4]:
import json

with open('models/345M/hparams.json') as f:
    hparams.override_from_dict(json.load(f))

In [5]:
enc.encode('hi nice to meet u')

[5303, 3621, 284, 1826, 334]

In [6]:
class Model:
    def __init__(
        self,
        dimension_output,
        learning_rate = 0.0001,
    ):
        self.X = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.int32, [None])
        output = model.model(hparams=hparams, X=self.X)['logits']
        output = tf.reduce_mean(output, axis = 1)
        self.logits = tf.layers.dense(output, dimension_output)
        
        self.cost = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits = self.logits, labels = self.Y
            )
        )
        self.all_vars = [v for v in tf.trainable_variables() if 'model' in v.name]
        train_vars = [v for v in self.all_vars if '/h' in v.name]
#         opt = AccumulatingOptimizer(
#             opt=tf.train.AdamOptimizer(learning_rate=learning_rate),
#             var_list=train_vars)
#         opt_reset = opt.reset()
#         opt_compute = opt.compute_gradients(self.cost)
#         self.optimizer = opt.apply_gradients()

        self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost)
        
        print(output)
        
        correct_pred = tf.equal(
            tf.argmax(self.logits, 1, output_type = tf.int32), self.Y
        )
        self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [7]:
dimension_output = 2
learning_rate = 0.0001

tf.reset_default_graph()
sess = tf.InteractiveSession()
modelnn = Model(
    dimension_output,
    learning_rate
)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Tensor("Mean:0", shape=(?, 50257), dtype=float32)


In [8]:
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver(var_list = modelnn.all_vars)
saver.restore(sess, 'models/345M/model.ckpt')

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from models/345M/model.ckpt


In [9]:
from utils import *
from sklearn.cross_validation import train_test_split

trainset = sklearn.datasets.load_files(container_path = 'data', encoding = 'UTF-8')
trainset.data, trainset.target = separate_dataset(trainset,1.0)
print (trainset.target_names)
print (len(trainset.data))
print (len(trainset.target))



['negative', 'positive']
10662
10662


In [10]:
maxlen = 100
batch_size = 16

In [11]:
from tqdm import tqdm

X = []
for text in tqdm(trainset.data):
    X.append(enc.encode(text)[:maxlen])

100%|██████████| 10662/10662 [00:01<00:00, 5777.76it/s]


In [12]:
from keras.preprocessing.sequence import pad_sequences
X = pad_sequences(X, padding='post')
X.shape

Using TensorFlow backend.


(10662, 67)

In [13]:
train_X, test_X, train_Y, test_Y = train_test_split(
    X, trainset.target, test_size = 0.2
)

In [None]:
import time

EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 3, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break

    train_acc, train_loss, test_acc, test_loss = 0, 0, 0, 0
    pbar = tqdm(
        range(0, len(train_X), batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        index = min(i + batch_size, len(train_X))
        batch_x = train_X[i: index]
        batch_y = train_Y[i: index]
        acc, cost, _ = sess.run(
            [modelnn.accuracy, modelnn.cost, modelnn.optimizer],
            feed_dict = {
                modelnn.Y: batch_y,
                modelnn.X: batch_x
            },
        )
        assert not np.isnan(cost)
        train_loss += cost
        train_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)
    
    pbar = tqdm(range(0, len(test_X), batch_size), desc = 'test minibatch loop')
    for i in pbar:
        index = min(i + batch_size, len(test_X))
        batch_x = test_X[i: index]
        batch_y = test_Y[i: index]
        acc, cost = sess.run(
            [modelnn.accuracy, modelnn.cost],
            feed_dict = {
                modelnn.Y: batch_y,
                modelnn.X: batch_x,
            },
        )
        test_loss += cost
        test_acc += acc
        pbar.set_postfix(cost = cost, accuracy = acc)

    train_loss /= len(train_X) / batch_size
    train_acc /= len(train_X) / batch_size
    test_loss /= len(test_X) / batch_size
    test_acc /= len(test_X) / batch_size

    if test_acc > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, test_acc)
        )
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1
        
    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'
        % (EPOCH, train_loss, train_acc, test_loss, test_acc)
    )
    EPOCH += 1

train minibatch loop: 100%|██████████| 534/534 [02:47<00:00,  3.92it/s, accuracy=0, cost=2.44]     
test minibatch loop: 100%|██████████| 134/134 [00:12<00:00, 10.32it/s, accuracy=0.6, cost=1.06]   
train minibatch loop:   0%|          | 0/534 [00:00<?, ?it/s]

epoch: 0, pass acc: 0.000000, current acc: 0.596156
time taken: 180.55004000663757
epoch: 0, training loss: 2.145222, training acc: 0.696916, valid loss: 1.032916, valid acc: 0.596156



train minibatch loop: 100%|██████████| 534/534 [02:45<00:00,  3.91it/s, accuracy=1, cost=0.0136]    
test minibatch loop: 100%|██████████| 134/134 [00:12<00:00, 12.07it/s, accuracy=0.6, cost=0.578]  
train minibatch loop:   0%|          | 0/534 [00:00<?, ?it/s]

epoch: 1, pass acc: 0.596156, current acc: 0.786029
time taken: 177.94658660888672
epoch: 1, training loss: 0.565232, training acc: 0.796694, valid loss: 0.530423, valid acc: 0.786029



train minibatch loop: 100%|██████████| 534/534 [02:46<00:00,  3.90it/s, accuracy=1, cost=0.0168]    
test minibatch loop: 100%|██████████| 134/134 [00:12<00:00, 12.06it/s, accuracy=0.8, cost=0.699]  
train minibatch loop:   0%|          | 0/534 [00:00<?, ?it/s]

time taken: 178.13427686691284
epoch: 2, training loss: 0.623421, training acc: 0.842068, valid loss: 0.744256, valid acc: 0.776278



train minibatch loop: 100%|██████████| 534/534 [02:46<00:00,  3.91it/s, accuracy=1, cost=1.79e-5]   
test minibatch loop: 100%|██████████| 134/134 [00:12<00:00, 12.12it/s, accuracy=0.6, cost=1.97]   
train minibatch loop:   0%|          | 0/534 [00:00<?, ?it/s]

epoch: 3, pass acc: 0.786029, current acc: 0.795406
time taken: 178.0716052055359
epoch: 3, training loss: 0.567666, training acc: 0.887208, valid loss: 1.181159, valid acc: 0.795406



train minibatch loop: 100%|██████████| 534/534 [02:45<00:00,  3.90it/s, accuracy=1, cost=0.0897]    
test minibatch loop: 100%|██████████| 134/134 [00:12<00:00, 12.05it/s, accuracy=0.8, cost=2.69]   
train minibatch loop:   0%|          | 0/534 [00:00<?, ?it/s]

time taken: 177.9404056072235
epoch: 4, training loss: 0.375595, training acc: 0.925079, valid loss: 1.823777, valid acc: 0.783779



train minibatch loop: 100%|██████████| 534/534 [02:45<00:00,  3.91it/s, accuracy=1, cost=9.3e-6]    
test minibatch loop: 100%|██████████| 134/134 [00:12<00:00, 11.97it/s, accuracy=0.8, cost=1.32]   
train minibatch loop:   0%|          | 0/534 [00:00<?, ?it/s]

time taken: 177.9206998348236
epoch: 5, training loss: 0.939741, training acc: 0.888967, valid loss: 1.156936, valid acc: 0.740647



train minibatch loop:  40%|███▉      | 212/534 [01:05<01:39,  3.22it/s, accuracy=0.938, cost=0.209] 