In [None]:
pip install transformers

In [None]:
import pandas as pd
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load the data into a Pandas dataframe
df = pd.read_csv('/content/plot_dataset.csv')

# Use only the first 1200 rows
df = df[:1200]

# Drop any rows with null values
df = df.dropna()

# Prepare the data for training
input_texts = df['plot'].tolist()

# Tokenize the input texts
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
input_encodings = tokenizer(input_texts, truncation=True, padding=True, return_tensors='pt')

# Train the model on your dataset
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.resize_token_embeddings(len(tokenizer))
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)

model.train()
batch_size = 16 # batch size
max_length = 128  # limit the sequence length
num_epochs = 3

for epoch in range(num_epochs):
    total_loss = 0
    num_batches = 0
    for i in range(0, len(input_encodings['input_ids']), batch_size):
        loss = None  # Initialize loss as None at the beginning of each loop
        try:
            optimizer.zero_grad()

            # Ensure the batch size fits in memory
            batch_input_ids = input_encodings['input_ids'][i:i+batch_size]
            batch_input_ids = batch_input_ids[:, :max_length]  # limit the sequence length

            # Move tensors to the device where the model is
            batch_input_ids = batch_input_ids.to(model.device)

            outputs = model(batch_input_ids, labels=batch_input_ids)
            loss = outputs.loss
            loss.backward()

            # Clip gradients to prevent explosion
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            optimizer.step()

            total_loss += loss.item()
            num_batches += 1

        except Exception as e:
            print(f"An error occurred: {e}")

        finally:
            # Clearing the GPU memory after the end of each step
            del batch_input_ids
            del outputs
            torch.cuda.empty_cache()

    if num_batches > 0:
        print(f'Epoch {epoch+1} average loss: {total_loss/num_batches}')
    else:
        print(f'Epoch {epoch+1} no data to process')

# Save the model and tokenizer
model.save_pretrained('title_generation_model.pt')
tokenizer.save_pretrained('/content/tokenizer_title')