In [None]:
# FinGPT Model Training.ipynb

import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import tqdm


In [None]:
pretrained_model_name = 'gpt2-medium'
tokenizer = GPT2Tokenizer.from_pretrained(pretrained_model_name)
model = GPT2LMHeadModel.from_pretrained(pretrained_model_name)


In [None]:
class FinancialDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=768):
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text = self.data[idx]
        inputs = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            pad_to_max_length=True,
            return_tensors='pt'
        )
        return inputs['input_ids'].squeeze(0), inputs['attention_mask'].squeeze(0)
# Implementation of the dataset class

In [None]:
def train(model, loader, optimizer, scheduler, device):
    model.train()
    # Training loop implementation


In [None]:
def evaluate(model, loader, device):
    model.eval()
    # Evaluation loop


In [None]:
model.save_pretrained('path/to/save/model')
tokenizer.save_pretrained('path/to/save/tokenizer')
