In [6]:
# Homework 6
# Part 1 Transformers
from datasets import load_dataset

# Dataset Description:
# I used the CNN/DailyMail dataset (v3.0.0), a widely used benchmark for abstractive text summarization. 
# Each example consists of a news article and a corresponding human-written summary (highlights).
# It contains over 280,000 training examples, making it ideal for fine-tuning large language models like BART.
# 
# The dataset includes:
# - 'article': the full news article
# - 'highlights': the summary of the article
dataset = load_dataset("cnn_dailymail", "3.0.0")

# Part 1 
# 90-10 split of the training set

train_dataset = dataset["train"]
test_dataset = dataset["validation"]

split_dataset = train_dataset.train_test_split(test_size=0.1, seed=42)
train_dataset_90 = split_dataset["train"]
test_dataset_10 = split_dataset["test"]

README.md: 0.00B [00:00, ?B/s]

train-00000-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

train-00001-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

train-00002-of-00003.parquet:   0%|          | 0.00/259M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/34.7M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/30.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/287113 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/13368 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/11490 [00:00<?, ? examples/s]

In [7]:
# The train and test size
print("Train size:", len(train_dataset_90))
print("Test size:", len(test_dataset_10))

Train size: 258401
Test size: 28712


In [8]:
import torch
from torch.utils.data import DataLoader
from transformers import BartTokenizer, BartForConditionalGeneration
from torch.optim import AdamW
import numpy as np

# code to load the pre-trained tokenizer and model
model_name = "facebook/bart-large-cnn"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

# I wrote this code to make google colab to make use of the A
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Preprocessing function which tokenizes the article-input and the summary -label
def preprocess_function(examples, max_input_length=512, max_target_length=128):
    inputs = tokenizer(
        examples["article"],
        max_length=max_input_length,
        truncation=True,
        padding="max_length"
    )
    labels = tokenizer(
        examples["highlights"],
        max_length=max_target_length,
        truncation=True,
        padding="max_length"
    )

    inputs["labels"] = labels["input_ids"]
    return inputs

#preprocessing
train_dataset_proc = train_dataset_90.map(preprocess_function, batched=True)
test_dataset_proc = test_dataset_10.map(preprocess_function, batched=True)

# relevant dataset format for PyTorch
columns_to_return = ["input_ids", "attention_mask", "labels"]
train_dataset_proc.set_format(type="torch", columns=columns_to_return)
test_dataset_proc.set_format(type="torch", columns=columns_to_return)

# dataLoaders
train_dataloader = DataLoader(train_dataset_proc, batch_size=2, shuffle=True)
test_dataloader = DataLoader(test_dataset_proc, batch_size=2)

# AdamW optimizer
optimizer = AdamW(model.parameters(), lr=1e-5)

# Training hyperparameters, only 1 epoch
epochs = 1  

# 5. Training Loop 
model.train()
for epoch in range(epochs):
    total_loss = 0
    for batch_idx, batch in enumerate(train_dataloader):
        optimizer.zero_grad()

        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        #forward pass
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )

        loss = outputs.loss
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        if (batch_idx+1) % 1000 == 0:
            print(f"Epoch {epoch+1}, Step {batch_idx+1}, Avg Loss: {total_loss/(batch_idx+1):.4f}")
    print(f"Epoch {epoch+1} completed. Average Loss: {total_loss/len(train_dataloader):.4f}")

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Map:   0%|          | 0/258401 [00:00<?, ? examples/s]

Map:   0%|          | 0/28712 [00:00<?, ? examples/s]

Epoch 1, Step 1000, Avg Loss: 0.9336
Epoch 1, Step 2000, Avg Loss: 0.8472
Epoch 1, Step 3000, Avg Loss: 0.8162
Epoch 1, Step 4000, Avg Loss: 0.8010
Epoch 1, Step 5000, Avg Loss: 0.7878
Epoch 1, Step 6000, Avg Loss: 0.7834
Epoch 1, Step 7000, Avg Loss: 0.7808
Epoch 1, Step 8000, Avg Loss: 0.7783
Epoch 1, Step 9000, Avg Loss: 0.7763
Epoch 1, Step 10000, Avg Loss: 0.7729
Epoch 1, Step 11000, Avg Loss: 0.7719
Epoch 1, Step 12000, Avg Loss: 0.7723
Epoch 1, Step 13000, Avg Loss: 0.7718
Epoch 1, Step 14000, Avg Loss: 0.7705
Epoch 1, Step 15000, Avg Loss: 0.7682
Epoch 1, Step 16000, Avg Loss: 0.7680
Epoch 1, Step 17000, Avg Loss: 0.7675
Epoch 1, Step 18000, Avg Loss: 0.7669
Epoch 1, Step 19000, Avg Loss: 0.7661
Epoch 1, Step 20000, Avg Loss: 0.7650
Epoch 1, Step 21000, Avg Loss: 0.7649
Epoch 1, Step 22000, Avg Loss: 0.7638
Epoch 1, Step 23000, Avg Loss: 0.7630
Epoch 1, Step 24000, Avg Loss: 0.7623
Epoch 1, Step 25000, Avg Loss: 0.7623
Epoch 1, Step 26000, Avg Loss: 0.7621
Epoch 1, Step 27000, 

In [9]:
import evaluate

#this will load the metrics
rouge_metric = evaluate.load("rouge")
bleu_metric = evaluate.load("bleu")

model.eval()
generated_summaries = []
reference_summaries = []

for batch in test_dataloader:
    input_ids = batch["input_ids"].to(device)
    attention_mask = batch["attention_mask"].to(device)

    # this generates the summaries (greedy or beam search)
    with torch.no_grad():
        summary_ids = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_length=128,
            num_beams=4
        )

    # This converts the generated summaries back to text
    for i, gen_ids in enumerate(summary_ids):
        gen_text = tokenizer.decode(gen_ids, skip_special_tokens=True)
        ref_text = tokenizer.decode(batch["labels"][i], skip_special_tokens=True)

        generated_summaries.append(gen_text)
        reference_summaries.append(ref_text)

# this code computes the ROUGE score
results_rouge = rouge_metric.compute(
    predictions=generated_summaries,
    references=reference_summaries
)

# this code computes the BLEU score
results_bleu = bleu_metric.compute(
    predictions=generated_summaries,         # list of strings
    references=reference_summaries           # list of strings
)

print("ROUGE scores:")
for k, v in results_rouge.items():
    if hasattr(v, "mid"):
        print(f"{k}: {v.mid}")
    else:
        print(f"{k}: {v}") 

print("BLEU score:", results_bleu["bleu"])

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

ROUGE scores:
rouge1: 0.4055762160965247
rouge2: 0.19251539735709045
rougeL: 0.28426960825129666
rougeLsum: 0.3797806697085129

BLEU score: 0.14696631258657059


In [None]:
# Results:
# Save the analysis as a text file

analysis_text = """
Results  and Hyperparameter Impact

Final Scores:
- ROUGE-1: 0.406
- ROUGE-2: 0.193
- ROUGE-L: 0.284
- ROUGE-Lsum: 0.380
- BLEU: 0.147
- Average Training Loss (1 epoch): 0.767

Interpretation:
The ROUGE and BLEU scores indicate that the model was  successful at capturing the core content of the input articles. The ROUGE-1 score (~40%) shows a strong overlap in important unigrams or keywords, while ROUGE-2 (~19%) and ROUGE-Lsum (~38%) show the model's ability to reconstruct meaningful sequences and maintain logical sentence structure.

A BLEU score of ~15% is much lower, as it's a more strict metric focusing on exact n-gram matches.  BLEU is originally designed for machine translation, its value in summarization tasks is often limited, but it still gives insight into overall fluency and structure.

Impact of the  Hyperparameters:

1. Learning Rate: 0.00001:
A low learning rate of 0.00001 helped avoid catastrophic forgetting of pre-trained weights while allowing for gradual fine-tuning.

2. Epochs (1)
Only one epoch was used due to resource constraints. While loss was decreasing and summaries were improving, additional epochs would likely lead to significantly better performance as the model continued to adapt to the data.

3. Batch Size (2)
A small batch size was used to fit the model in GPU memory (especially with bart-large).

4. Max Input and Output Lengths
- max_input_length = 512: allowed enough of the article to be passed in, capturing most context.
- max_target_length = 128: sufficient for summarization but might truncate longer gold summaries.

5. Beam Search (num_beams = 4)
Beam search helps generate more coherent summaries compared to greedy decoding, at the cost of speed. Increasing the beam width (e.g., to 6 or 8) could slightly improve ROUGE scores but would increase inference time.

Impact of the LLM (BART)

Using BART, especially the facebook/bart-large-cnn variant, played a significant role in achieving strong summarization performance with relatively little training. This model is pre-trained on a denoising autoencoder objective and fine-tuned on summarization tasks, making it ideal for abstractive summarization like this.

a different LLMs would have:
- A model like T5-base may have performed similarly but would require more careful prompt formatting.
- GPT-style models (decoder-only) would struggle more with summarization since they aren't fine-tuned for this and lack an encoder.
- A smaller model like distilBART would train faster but likely score lower.

Conclusion:
The model showed strong summarization capability after just one epoch of fine-tuning, thanks largely to the power of pre-trained transformer architectures like BART. While the hyperparameters chosen were conservative, they provided a solid baseline, and further improvements are possible with more training epochs, larger batch sizes, and tuning beam search settings.
"""


In [None]:
# Part 2 Reinforcement Learning:
# Describing a real world application of an Markov Decision Process:
# Define the content to be saved in a text file
# 
# States (S):
# The agent’s (car’s) position on the road, velocity, surrounding cars’ positions, traffic lights, etc.
# A simplified representation might encode “distance from lane center,” “relative speeds,” “distance to the car in front,” and so on.
# 
# Actions (A):
# The car can accelerate, decelerate, steer left, steer right, maintain speed etc.
# Discrete or continuous actions are possible. For simplicity, we might consider discrete actions like “turn left slightly,” “turn right slightly,” “accelerate,” “brake,” or “keep speed.”
# 
# Transition Model (T):
# This would describe how the environment (road, other cars, etc.) transitions from one state to another given the action.
# For instance, if you accelerate while turning left, your position changes in the lane, your speed changes, etc.
# 
# Reward (R):
# Positive reward for staying in your lane, driving safely, and negative rewards for collisions, dangerous maneuvers, or going off-lane.
# Additional reward could be given for reaching the destination quickly (efficiency).

In [1]:
# Task 3:
# Reinforcement Learning  is well-suited for trading because it naturally models the sequential and dynamic nature of financial markets. In trading, actions like buying, selling, or holding affect not just immediate returns but also future opportunities — making the problem ideal for Reinforcement learning’s trial-and-error learning and reward feedback. Unlike traditional models that rely on static rules or short-term predictions, reinforcment learning agents can learn optimal strategies by interacting with the market environment, adapting to changes, handling delayed rewards, and balancing risk and reward through experience. This ability to learn from the consequences of actions over time gives RL a significant advantage in building robust and adaptive trading systems.


#FinRL is a popular open-source framework developed by the AI4Finance Foundation that applies deep reinforcement learning to algorithmic trading. It provides end-to-end support for building trading agents, from financial data collection and preprocessing to environment creation, training, and backtesting. FinRL integrates with powerful RL libraries like Stable-Baselines3 and supports state-of-the-art algorithms such as PPO, DDPG, and A2C. It allows agents to learn how to trade a portfolio of stocks using real historical market data and technical indicators as input features. FinRL stands out due to its modular design, educational tutorials, and research-backed implementations, making it an ideal toolkit for both beginners and researchers exploring RL in quantitative finance.

# Professor Yanglet(Xiao-Yang Liu) from RPI works on this project https://github.com/AI4Finance-Foundation