In [None]:
!pip install -U transformers
!pip install -U datasets
!pip install rouge_score
!pip install nltk
!pip install evaluate
!pip install bert_score



In [None]:
from transformers import pipeline
from datasets import load_dataset, Dataset
import evaluate
import random
import pandas as pd

In [None]:
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")
bertscore = evaluate.load("bertscore")

In [None]:
dataset = load_dataset("Kallia/stock-news-summaries-finetuning", split="test")

pipe_base = pipeline("summarization", model="t5-small", tokenizer="t5-small")
pipe_finetuned = pipeline("summarization", model="Kallia/t5-small-finetuned-stock-news", tokenizer="Kallia/t5-small-finetuned-stock-news")

README.md:   0%|          | 0.00/541 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.62M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/447k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/457k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2144 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/268 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/268 [00:00<?, ? examples/s]

Device set to use cpu
Device set to use cpu


In [None]:
dataset

Dataset({
    features: ['article', 'summary'],
    num_rows: 268
})

In [None]:
def get_predictions(dataset, pipe, n=268):
    preds, refs = [], []
    for sample in dataset.select(range(n)):
        article = sample["article"]
        ref = sample["summary"]
        pred = pipe(article, max_length=128, truncation=True)[0]['summary_text']
        preds.append(pred)
        refs.append(ref)
    return preds, refs

print("Generating with base model...")
base_preds, refs = get_predictions(dataset, pipe_base)

print("Generating with fine-tuned model...")
finetuned_preds, _ = get_predictions(dataset, pipe_finetuned)

Generating with base model...


Your max_length is set to 128, but your input_length is only 67. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=33)
Your max_length is set to 128, but your input_length is only 113. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=56)


Generating with fine-tuned model...


Your max_length is set to 128, but your input_length is only 67. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=33)
Your max_length is set to 128, but your input_length is only 113. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=56)


In [None]:
def evaluate_model(preds, refs, model_name):
  print(f"Evaluating {model_name}")
  r = rouge.compute(predictions=preds, references=refs)
  b = bleu.compute(predictions=preds,
                   references=refs)
  bs = bertscore.compute(predictions=preds, references=refs, lang="en")

  print("ROUGE:", {k: round(v, 2) for k, v in r.items()})
  print("BLEU:", round(b["bleu"], 2))
  print("BERTScore (F1 avg):", round(sum(bs["f1"]) / len(bs["f1"]), 4), "\n")

evaluate_model(base_preds, refs, "T5-small (zero-shot)")
evaluate_model(finetuned_preds, refs, "T5-small (fine-tuned)")


Evaluating T5-small (zero-shot)
ROUGE: {'rouge1': np.float64(0.39), 'rouge2': np.float64(0.25), 'rougeL': np.float64(0.32), 'rougeLsum': np.float64(0.32)}
BLEU: 0.22
BERTScore (F1 avg): 0.8679 

Evaluating T5-small (fine-tuned)
ROUGE: {'rouge1': np.float64(0.56), 'rouge2': np.float64(0.5), 'rougeL': np.float64(0.53), 'rougeLsum': np.float64(0.53)}
BLEU: 0.43
BERTScore (F1 avg): 0.9159 



In [None]:
def show_examples(preds, refs, articles, n=10):
    print("Random samples for human evaluation:\n")
    indices = random.sample(range(len(preds)), n)
    for i in indices:
        print(f"Example {i+1}:")
        print("Article:", articles[i])
        print("Reference Summary:", refs[i])
        print("Base Model:", base_preds[i])
        print("Fine-Tuned Model:", finetuned_preds[i])
        print("\n")

articles = dataset["article"]
show_examples(finetuned_preds, refs, articles)

Random samples for human evaluation:

Example 216:
Article: summarize: (Bloomberg) -- Tesla Inc. was delivered another blow Tuesday by Morgan Stanley analysts who slashed their worse-case scenario for the share price to just $10 over concerns the electric-car market is saturated.
“Demand is at the heart of the problem,” analysts led by Adam Jonas said in a note. “Tesla has grown too big relative to near-term demand, putting great strain on the fundamentals.”
Jonas lowered his “bear case” for Tesla shares from a previous estimate of $97, which assumes Tesla misses its current sales forecast in China by about half, and kept a price target of $230. The stock fell 2.6% to $200 a share in pre-market trading.
Tesla has drawn criticism for weak deliveries. Tesla handed over just 63,000 cars in the first quarter, yet expects to deliver as many as 100,000 cars in the second and four times that for the year. Hitting the full-year target is going to be a “Herculean task,” Wedbush Securities analy

In [None]:
def show_examples(preds, refs, articles, n=10):
    print("Random samples for human evaluation:\n")
    indices = random.sample(range(len(preds)), n)
    for i in indices:
        print(f"Example {i+1}:")
        print("Article:", articles[i])
        print("Reference Summary:", refs[i])
        print("Base Model:", base_preds[i])
        print("Fine-Tuned Model:", finetuned_preds[i])
        print("\n")

articles = dataset["article"]
show_examples(finetuned_preds, refs, articles)

Random samples for human evaluation:

Example 210:
Article: summarize: (Bloomberg) -- Elon Musk said Tesla Inc. will make about 500,000 cars this year, a prediction that’s difficult to gauge given the chief executive officer’s proclivity for setting stretch goals.
The number Musk put in a Twitter posting late Tuesday is roughly in line with his previous comments, depending on which figures investors paid attention to when Tesla reported earnings Jan. 30.
February 20, 2019
In a letter to shareholders, Musk and his chief financial officer forecast as many as 400,000 total vehicle deliveries this year. But within hours, the CEO told an analyst on Tesla’s earnings call to expect sales for just the Model 3 to reach as many as 500,000 units in 2019.
To Musk’s credit, Tesla did make substantial progress in mass-manufacturing electric vehicles last year, after overcoming the “production hell” he predicted the company would go through starting in the fall of 2017. Total deliveries more than tri