In [12]:
import pandas as pd
from torch.utils.data import Dataset
import torch
from transformers import AutoTokenizer, BartForConditionalGeneration
from transformers import Trainer, TrainingArguments, DataCollatorForSeq2Seq
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

import os
from torch.utils.data.dataset import random_split


## Dataset 
The dataset I choose to use is the huffington times dataset. This dataset provides a description and a short_description for each article. We can then train a model to map the description to the short description effectively making a news artical summarizer. It will be very interesting to see how well this model will generalize to non-news article data. I believe since news articles are very diverse, the model should generalize very well to other text summarizations. Such as summarizing stories or even research papers.

In [2]:
# Load the dataset
class dset(Dataset):
    def __init__(self, path, tokenizer):
        df = pd.read_csv(path)
        df = df.sample(frac=1).reset_index(drop=True)
        self.texts = df['description'].tolist()
        self.labels = df['short_description'].tolist()
        self.filter()
        self.tokenizer = tokenizer
    def __len__(self):
        return len(self.texts)

    def filter(self):
        t = []
        l = []
        for i in range(len(self.texts)):
            if (not isinstance(self.texts[i], str)):
                continue
            if (not isinstance(self.labels[i], str)):
                continue
            t.append(self.texts[i])
            l.append(self.labels[i])
        self.texts = t
        self.labels = l

    def __getitem__(self, idx):
        x = self.tokenizer.encode(self.texts[idx], add_special_tokens=True, max_length=1024, truncation=True)
        y = self.tokenizer.encode(self.texts[idx], add_special_tokens=True, max_length=1024, truncation=True)
        return {
        "input_ids": x,  
        "labels": y 
        }

In [3]:

model_name = "google-t5/t5-small" 
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

d = dset("huffpost_news_data.csv", tokenizer)

dataset_size = len(d)
train_size = int(dataset_size * 0.95)
test_size = dataset_size - train_size 

train_dataset, test_dataset = random_split(d, [train_size, test_size])


In [4]:

def decode_preds_and_labels(predictions, labels):
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True, clean_up_tokenization_spaces=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True, clean_up_tokenization_spaces=True)
    decoded_preds = [pred.strip() for pred in decoded_preds]
    decoded_labels = [[label.strip()] for label in decoded_labels]
    return decoded_preds, decoded_labels
    
def compute_metrics(p):
    predictions, labels = p
    decoded_preds, decoded_labels = decode_preds_and_labels(predictions, labels)
    bleu_result = bleu.compute(predictions=decoded_preds, references=decoded_labels)
    rouge_result = rouge.compute(predictions=decoded_preds, references=decoded_labels)
    result = {key: value.mid.fmeasure * 100 for key, value in rouge_result.items()}
    result["bleu"] = bleu_result["bleu"]

In [14]:
from transformers import Trainer, TrainingArguments

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

training_args = TrainingArguments(
    output_dir="/media/lenny/e8491f8e-2ac1-4d31-a37f-c115e009ec90/results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=3,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator, # this will dynamically pad examples in each batch to be equal length
    compute_metrics=compute_metrics,
)

# train model
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
trainer.train()



dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.57 GiB. GPU 0 has a total capacty of 7.79 GiB of which 1.41 GiB is free. Including non-PyTorch memory, this process has 6.36 GiB memory in use. Of the allocated memory 3.76 GiB is allocated by PyTorch, and 2.43 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

# Reinforcement Learning

## Part I
I am currently working on a personal project, related to deep q-learning. The project is a bot, trained with RL to play ultimate tic tac toe.
Ultimate tictactoe is a tictactoe board of tictactoe boards. And each move in the corresponding subsquare specifies which super square the
next move will be made in. This like most other board games can be summarized as a MDP. The state is made up by the current board state and
the current super board (which of the 9 tic tac toe boards you are allowed to play on). The action space is really simple, it is described 
by each of the squares on the board which their are 81 of. The problem with posing the MDP as a pure MDP / tabular problem to be solved by 
Q-Learning is the raw scale of the state space. While this is not a fully accurate estimate, we can approximate (poorly) the amount of 
valid states their are by 3**81 as each square can either have an x, o , or be blank. An approach to deal with such a large MDP is to approximate the tabular method with parameterized functions which are trained to be close to Q(s,a). The reward for this game should be -1
for a loss, 0 for a tie, and 1 for a win. However you could make the arguement that adding sub rewards for capturing sub boards would allow the model to learn the basics of the game quicker.
## Part II
Of the provided sub-fields, I believe the one most aligned with RL is trading and reccomender systems. Since I have to choose one I will choose trading since I find it more interesting. Trading seems like it would be harder to solve with supervised learning as compared to
reccomender systems and medical diagnosis. Trading seems as though it is a game, and learning the optimal strategy/policy is something 
which cannot be done with supervised learning. One can use supervised learning models to device a strategy but when trying to essentially 
solve the problem end to end, this can only be done by RL. It is intiutively the most straight forward to translate into a RL problem. A cool open source RL project in finance is FinRL (https://github.com/AI4Finance-Foundation/FinRL). I also recently found out that professor
Yanglet Liu is a big contributor to this project. This project aims to make Deep Reinforcement Learning for finance more beginner friendly. Training a model from scratch using DRL in this field seems difficult for many reasons. Firstly data is hard to come by, in terms of data that is high frequency. Secondly, creating the models themselves are hard. Even worse is setting up the AI enviorment for which the model will interact. FinRL provides the models and the enviorment allowing beginers to jump right into the field and attempt to generate novel trading strategies, for which they can profit from. 