# Abstractive summarization

## T5 transformer

In [1]:
from transformers import T5ForConditionalGeneration, T5Tokenizer
import torch


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

def T5_summarization_model (text):
    # initialize the model architecture and weights
    t5model = T5ForConditionalGeneration.from_pretrained("t5-base")
    # initialize the model tokenizer
    t5tokenizer = T5Tokenizer.from_pretrained("t5-base")
    device = torch.device('cpu')
    
    t5tokenized_text = t5tokenizer.encode("summarize:"+ text,
                                    truncation=True,
                                    return_attention_mask=True,
                                    add_special_tokens=True, 
                                    padding='max_length',     
                                    return_tensors="pt").to(device)


    t5summary_ids =  t5model.generate(input_ids=t5tokenized_text,
                    num_beams=3,  ##modelo olha para 3 possiveis palavras
                    min_length=20, ## number min of tokens
                    max_length=70,  ##number maximo of tokens
                    repetition_penalty=1.0,
                    early_stopping=True)

    output = t5tokenizer.decode(t5summary_ids[0],  
                             skip_special_tokens=True, 
                             clean_up_tokenization_spaces=True)
    return  output



## Summarization with BERT Model

In [3]:
import torch
import json
from summarizer import Summarizer
from summarizer import TransformerSummarizer

In [4]:


def bert_summarization_model (text):
    #Create default summarizer model
    # By default bert-extractive-summarizer uses the ‘bert-large-uncased‘ pretrained model.
    bert_model = Summarizer()

    summary = ''.join(bert_model(text, max_length=50))
    return summary 

In [5]:
for file in os.listdir("../../../extractors/Snopes/extractions")[:5]:
    with open("../../../extractors/Snopes/extractions/" + file) as f:
            data = json.load(f)
            f.close()

    summary = bert_summarization_model (' '.join(data['postText']))
    print(summary)

Downloading: 100%|██████████| 571/571 [00:00<00:00, 244kB/s]
Downloading: 100%|██████████| 1.34G/1.34G [01:54<00:00, 11.8MB/s] 
Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Downloading: 100%|████████

And he called her his “very gentle Valentine.” She first advertised her business in 1852.”


Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).





Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Azrieli Center, 26 Harokmim St., Holon, Israel.”


KeyboardInterrupt: 

## Summarization with GPT2 Model

In [6]:
from summarizer import TransformerSummarizer

In [7]:
def gtp2_summariation_model(text):
    GPT2_model = TransformerSummarizer(transformer_type="GPT2",
                         transformer_model_key="gpt2-medium")
    summary = ''.join(GPT2_model(text,  max_length=50))
    return summary

In [None]:
for file in os.listdir("../../../extractors/Snopes/extractions")[:1]:
    with open("../../../extractors/Snopes/extractions/" + file) as f:
            data = json.load(f)
            f.close()

    summary = gtp2_summariation_model (' '.join(data['postText']))
    print(summary)

## Summarization with XLNet

In [8]:
def XLNet_summarization_model (text):
    xlnet_model = TransformerSummarizer(transformer_type="XLNet",
                     transformer_model_key="xlnet-base-cased")
    summary = ''.join(xlnet_model(text, max_length=50))

    return summary

In [9]:
for file in os.listdir("../../../extractors/Snopes/extractions")[2:3]:
    with open("../../../extractors/Snopes/extractions/" + file) as f:
            data = json.load(f)
            f.close()

    summary = XLNet_summarization_model (' '.join(data['postText']))
    print(summary)

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetModel: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Downloading: 100%|██████████| 798k/798k [00:00<00:00, 1.73MB/s]


Azrieli Center, 26 Harokmim St., Holon, Israel.”


## Summaziton with BART MODEL 

#### From META

In [10]:
from transformers import pipeline

In [11]:

def bart_summarization_model(text):
    summarizer = pipeline('summarization', model='facebook/bart-base', tokenizer='facebook/bart-large-cnn')
   
    summary = summarizer(text, max_length = 50)[0]['summary_text']
    return summary

  

## Choose 100 posts for test

In [12]:
import os
import json
import pandas as pd

In [None]:
df = pd.DataFrame()

for file in os.listdir("../../../extractors/Snopes/extractions")[:100]:
    with open("../../../extractors/Snopes/extractions/" + file) as f:
            data = json.load(f)
            f.close()



    summary_T5 = T5_summarization_model (' '.join(data['postText']))
    summary_BERT = bert_summarization_model(' '.join(data['postText']))
    summary_BART = bart_summarization_model(' '.join(data['postText']))
    summary_XLNET =  XLNet_summarization_model(' '.join(data['postText']))
    summary_GTP2 = gtp2_summariation_model (' '.join(data['postText']))
    dict = {'id': data['id'], 'allegation':data['allegation'], 'evaluation':data['evaluation'], 'T5':summary_T5, 'BERT': summary_BERT, 'BART': summary_BART, 'XLNet':summary_XLNET , 'GTP2': summary_GTP2 }

    df = df.append(dict, ignore_index = True)


df.to_csv('abs_summarization.csv')



    