## Zero Shot: 
ZSL leverages state-of-the-art NLP models with no training.To leverage ZSL models we can use Hugging Face’s Pipeline API.

In [1]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


## Install Libraries

In [2]:

!pip install --upgrade transformers
!pip install datasets
!pip install rouge_score
!pip install rouge

Collecting transformers
  Downloading transformers-4.18.0-py3-none-any.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 8.6 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 74.2 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.11.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.5 MB)
[K     |████████████████████████████████| 6.5 MB 64.2 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 66.9 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.5.1-py3-none-any.whl (77 kB)
[K     |████████████████████████████████| 77 kB 7.4 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Fo

In [3]:
import transformers
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, Seq2SeqTrainingArguments, Seq2SeqTrainer, DataCollatorForSeq2Seq, pipeline
from datasets import load_dataset, load_metric, Dataset
import torch
import numpy as np
import pandas as pd


## Load Dataset

In [4]:
path = "/content/drive/MyDrive/NN_Project/amazon_review_dataset_processed.csv"
df = pd.read_csv(path)
#Truncated length as model has max_length = 1024
df['reviewText'] = df['reviewText'].str.slice(0, 2500)
amazon = Dataset.from_pandas(df)
amazon.shape

(11848, 3)

In [5]:
NotTest_Test = amazon.train_test_split(test_size=0.1,seed=42)
NotTest = NotTest_Test["train"]
test = NotTest_Test["test"]

Train_Val = NotTest.train_test_split(test_size=0.1,seed=42)
train = Train_Val["train"]
val = Train_Val["test"]

## Summarisation Pipeline:
https://huggingface.co/docs/transformers/v4.17.0/en/main_classes/pipelines#transformers.SummarizationPipeline

In [6]:
generator = pipeline(task="summarization", model="facebook/bart-large-cnn")

Downloading:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.51G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

### Generate Zero Shot Summaries

In [7]:
reviews = test['reviewText']
summaries = test['summary']
batch_size=200
outputs= generator(reviews[:200], max_length=25, min_length=2,num_beams=2, do_sample=True)
generated_summaries = [outputs[i]['summary_text'] for i in range(len(outputs))]

In [8]:
df_result = pd.DataFrame({'review':reviews[:200],'generated_summaries':generated_summaries,'target_summaries':summaries[:200]})
df_result.to_csv('/content/drive/MyDrive/NN_Project/zero_shot_bart.csv') 


### Evaluation


In [10]:
metric = load_metric('rouge')

def calc_rouge_scores(candidates, references):
    result = metric.compute(predictions=candidates, references=references, use_stemmer=True)
    result = {key: round(value.mid.fmeasure * 100, 1) for key, value in result.items()}
    return result

calc_rouge_scores(generated_summaries, summaries[:200])

{'rouge1': 13.7, 'rouge2': 4.1, 'rougeL': 12.2, 'rougeLsum': 12.2}