In [1]:
import tensorflow as tf
import gpt_2_simple as gpt2

  from ._conv import register_converters as _register_converters


### Notebook that allows you to train your own GPT2 model. The networks uses the pretrained 124M parameters model, and can quickly easily fit on a 6GB GPU. 

### The model takes ~5.8GB to store the parameters so a small batch size is required (A size of 4 seems best but you could change this). 

## Another option is to use the CPU but that would be quite slow

In [2]:
import os 

if not os.path.exists("models/124M"):
    gpt2.download_gpt2("124M")

In [3]:
!nvidia-smi

Wed Feb 03 17:00:27 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 456.71       Driver Version: 456.71       CUDA Version: 11.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 106... WDDM  | 00000000:25:00.0  On |                  N/A |
|  0%   57C    P0    27W / 200W |    849MiB /  6144MiB |      5%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|       

In [4]:
tf.test.is_gpu_available()

True

In [5]:
sess = gpt2.start_tf_sess()

In [6]:
file_name = "../hi_all_text.txt"

In [7]:
# gpt2.reset_session(sess)

In [8]:
"""
    Restore model from 'fresh' if the model is new, or 'latest' if you want to use a model that has been saved (checkpoint). To change, simply
    uncomment the one you wish to you and comment out the other using #
    
    Models are saved in the checkpoint folder, and contains all the information about the model.
    
    You may need to restart the session, perhaps if you continue training. In this case simply use the code above 
    
    >> gpt2.reset_session(sess)
    
"""


gpt2.finetune(sess,
              dataset=file_name,
              model_name='124M',
              steps=50,
              restore_from='fresh',
              #restore_from='latest',
              run_name='hi-machina',
              print_every=25,
              sample_every=10,
              overwrite=True,
              save_every=50,
              batch_size=4,
              use_memory_saving_gradients=True,
              sample_length=200,
              accumulate_gradients=1
              )

W0203 17:01:02.105000 12056 deprecation.py:323] From D:\GitHub\HelloInternet_old\gpt-2-simple\gpt_2_simple\src\sample.py:17: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0203 17:01:18.024088 12056 deprecation.py:323] From D:\Miniconda3\envs\tensorflow_GPU\lib\site-packages\tensorflow\python\training\saver.py:1276: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.


Loading checkpoint models/124M\model.ckpt


  0%|                                                                                            | 0/1 [00:00<?, ?it/s]

Loading dataset...


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:20<00:00, 20.05s/it]


dataset has 3423010 tokens
Training...
 lux.
[Pg 80]
[Pg 81]Well, yes, I know this in case it's my second time reading this. I've come upon it as I've come across an article about the French philosopher in 1887.
[Pg 82]Of course that was he's famous. It was published by a bookseller in France. And there's this very interesting article that he wrote about the bookseller in 1887 called What is a man? It's about this great Frenchman. He's a philosopher in England and he's a philosopher by his bookseller. So in 1887, when it became fashionable in Britain, he was writing as a bookseller, but apparently he didn't go and do his thing, and was quite a bit of a problem in British society. Like he wrote about himself. So he found another, another way of looking at it. And it was in 1889, when it was fashionable, that he made a book of it and put out

 was in the wrong category. If you were thinking about my family of friends who are going to get married in the future, I think maybe it was my dau

# You can play about with the temperature, which effects the creativity of the predictions (The example below is from a model that trained for 10000 steps)

In [23]:
prefix = "[Grey] Brady no more plane crash corner"
temperature = 0.7

gpt2.generate_to_file(sess, run_name='hi-machina', prefix=prefix)

In [24]:
text = []
with open("gpt_2_gen_texts.txt", "r") as f:
    for line in f:
        if len(line.strip()) > 0:
            text.append(line.strip())

In [25]:
for line in text:
    print(line)

[Grey] Brady no more plane crash corner. Yeah,
[Brady] yeah. But But yeah, we've got plane crash corner on the internet.
[Grey] It's plane crash corner, right?
[Brady] Well, I mean, if you say something plane crash corner, you go. Okay, well,
[Grey] I went on the internet to see the website and I got that Brady's paper cut not that I've got not that I've got paper cuts internet
[Brady] I have gone on the internet. And I got the link to this article about plane crash corner. I'm looking at it straight away. And I'm like, okay, all right, but it's not plane crash corner. I went looking for it. And I haven't come across it. I went looking for it on the internet. But no one has seen it. And I was like, No, no one has. But somebody has.
[Grey] This is why I was like a little bit terrified when the article came out. Because I don't know who this is. This is just this is what this is. This is the internet. This is what the internet is. It's the world's premier source of news. It's the source 