<a href="https://colab.research.google.com/github/scholzie/ai-story-builder/blob/main/ai_story_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil psutil humanize
import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()

Collecting gputil
  Downloading GPUtil-1.4.0.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: gputil
  Building wheel for gputil (setup.py) ... [?25l[?25hdone
  Created wheel for gputil: filename=GPUtil-1.4.0-py3-none-any.whl size=7394 sha256=fc6f092a50964207028bd9209f7208505bb7cbbfd15c7c21118fc7d0f77f7522
  Stored in directory: /root/.cache/pip/wheels/a9/8a/bd/81082387151853ab8b6b3ef33426e98f5cbfebc3c397a9d4d0
Successfully built gputil
Installing collected packages: gputil
Successfully installed gputil-1.4.0
Gen RAM Free: 10.4 GB  | Proc size: 1.9 GB
GPU RAM Free: 13619MB | Used: 1483MB | Util  10% | Total 15360MB


In [None]:
# !kill -9 -1

In [12]:
!pip install torch transformers datasets requests==2.31.0
!pip install accelerate -U

Collecting datasets
  Downloading datasets-2.19.2-py3-none-any.whl (542 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.1/542.1 kB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.

In [1]:
import torch

if torch.cuda.is_available():
    print(f'CUDA is available! Training on GPU: {torch.cuda.get_device_name(0)}')
else:
    print('CUDA is not available. Training on CPU...')


CUDA is available! Training on GPU: Tesla T4


In [21]:
# del memory_hog
torch.cuda.empty_cache()  # Clear cache to ensure memory is freed


In [15]:
import torch
from transformers import GPT2LMHeadModel

# Load model
model = GPT2LMHeadModel.from_pretrained('gpt2-medium')
model.to('cuda')  # Move model to GPU

# Desired memory to reserve in GB (example: half of the free memory)
desired_reserve_gb = 10  # Adjust this value as needed

# Calculate the number of elements in a tensor that would take up this much memory
num_elements = int(desired_reserve_gb * (1024 ** 3) / 4)  # Since each float32 element takes 4 bytes

# Create the memory_hog tensor
memory_hog = torch.zeros(num_elements, dtype=torch.float32, device='cuda')


In [3]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

model_name = "gpt2-large"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

In [4]:
def generate_stories(prompt, max_length=300, num_return_sequences=1):
    inputs = tokenizer.encode(prompt, return_tensors='pt')
    inputs = inputs.to('cuda')
    attention_mask = torch.ones(inputs.shape, dtype=torch.long)
    outputs = model.generate(
        inputs,
        max_length=max_length,
        num_return_sequences=num_return_sequences,
        no_repeat_ngram_size=2,
        do_sample=True,  # Enable sampling
        top_k=50,
        top_p=0.95,
        temperature=0.7,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,  # Set pad_token_id to eos_token_id
        attention_mask=attention_mask
    )

    return [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]


In [5]:
def test_run(prompt=None):
    prompt = "Once upon a time, in a land far, far away"

    stories = generate_stories(prompt)
    for i, story in enumerate(stories):
        print(f"Story {i+1}:")
        print(story)
        print("\n" + "="*80 + "\n")


In [6]:
from datasets import load_dataset, DatasetDict

def tokenize_function(examples):
    # Tokenize the text
    tokenized_inputs = tokenizer(examples['text'], truncation=True, padding="max_length", max_length=512)
    # Ensure that labels are the input_ids shifted by one token to the right for next token prediction
    tokenized_inputs["labels"] = tokenized_inputs["input_ids"][:]
    return tokenized_inputs


In [7]:
tokenizer.pad_token = tokenizer.eos_token
dataset = load_dataset('text', data_files={'train': 'cleaned_merged_fairy_tales_without_eos.txt'})

train_test_split = dataset['train'].train_test_split(test_size=0.1)

tokenized_data = train_test_split.map(tokenize_function, batched=True)

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/82584 [00:00<?, ? examples/s]

Map:   0%|          | 0/9176 [00:00<?, ? examples/s]

In [8]:
print(dataset)
print(train_test_split)
print(tokenized_data)

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 91760
    })
})
DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 82584
    })
    test: Dataset({
        features: ['text'],
        num_rows: 9176
    })
})
DatasetDict({
    train: Dataset({
        features: ['text', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 82584
    })
    test: Dataset({
        features: ['text', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 9176
    })
})


In [9]:
# Check a sample from the training set
print(tokenized_data['train'][0])

# Check a sample from the validation set
print(tokenized_data['test'][0])

print(tokenized_data)

{'text': '“They have begun to fight!” I cried. “Follow me.”', 'input_ids': [447, 250, 2990, 423, 9258, 284, 1907, 0, 447, 251, 314, 16896, 13, 564, 250, 7155, 502, 13, 447, 251, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 502

In [16]:
from transformers import TrainingArguments, Trainer
import torch
batch_size=max(int(torch.cuda.get_device_properties(0).total_memory/1e9+.5)*16, desired_reserve_gb)

training_args = TrainingArguments(
    output_dir='./results',
    eval_strategy='steps',
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=100,
    num_train_epochs=3,
    save_total_limit=3
)

trainer = Trainer(
    model = model,
    args=training_args,
    train_dataset=tokenized_data['train'],
    eval_dataset=tokenized_data['test']
)

In [20]:
import time

start_time = time.time()
# Clear the large tensor to free up the memory for actual training
# del memory_hog
torch.cuda.empty_cache()  # Clear cache to ensure memory is freed

trainer.train()
end_time = time.time()

print(f"Total training time: {end_time - start_time:.2f} seconds")

OutOfMemoryError: CUDA out of memory. Tried to allocate 4.00 GiB. GPU 