In [21]:
# Loading Relevant Packages 
import torch
import pandas as pd
from transformers import BartForConditionalGeneration, BartTokenizer
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from transformers import XLNetConfig, XLNetModel
from transformers import XLNetLMHeadModel, XLNetTokenizer
from tqdm import tqdm

In [22]:
#Loading the dataset 
df = pd.read_csv('news_summary_more.csv', encoding='latin-1', nrows= 1000)
print(df)

                                             headlines  \
0    upGrad learner switches to career in ML & Al w...   
1    Delhi techie wins free food from Swiggy for on...   
2    New Zealand end Rohit Sharma-led India's 12-ma...   
3    Aegon life iTerm insurance plan helps customer...   
4    Have known Hirani for yrs, what if MeToo claim...   
..                                                 ...   
995  Aamir's 'Rubaru Roshni' screened for Sri Sri R...   
996  Playing Meena Thackeray was an honour: Amrita ...   
997  Won't apologise: Kangana on Karni Sena's threa...   
998  Wonderful, humbling feeling: Anupam on meeting...   
999  Erotic drawing by late rapper Tupac sold for $...   

                                                  text  
0    Saurav Kant, an alumnus of upGrad and IIIT-B's...  
1    Kunal Shah's credit card bill payment platform...  
2    New Zealand defeated India by 8 wickets in the...  
3    With Aegon Life iTerm Insurance plan, customer...  
4    Speaking abou

In [4]:
#Checking if the variable distribution is even 
df.count()

headlines    1000
text         1000
dtype: int64

### 'text' Summarizations using BERT using pre-trained model 

In [6]:
texts_to_summarize = df['text']
texts_to_summarize

0      Saurav Kant, an alumnus of upGrad and IIIT-B's...
1      Kunal Shah's credit card bill payment platform...
2      New Zealand defeated India by 8 wickets in the...
3      With Aegon Life iTerm Insurance plan, customer...
4      Speaking about the sexual harassment allegatio...
                             ...                        
995    Aamir Khan screened his upcoming short film 'R...
996    Amrita Rao, who plays Bal Thackeray's wife Mee...
997    Kangana Ranaut, who was threatened by the Karn...
998    Sharing a picture with US talk show host Jimmy...
999    An erotic drawing by late rapper Tupac Shakur ...
Name: text, Length: 1000, dtype: object

In [25]:
model_name = 'facebook/bart-large-cnn'
model = BartForConditionalGeneration.from_pretrained(model_name)
tokenizer = BartTokenizer.from_pretrained(model_name)

texts_to_summarize=df['text']

# Tokenize and summarize
summaries = []
for index, text_to_summarize in tqdm(texts_to_summarize.items(), total=len(texts_to_summarize), desc="Summarizing"):
    inputs = tokenizer.encode("summarize: " + text_to_summarize, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(inputs, max_length=150, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    summaries.append(summary)

Summarizing: 100%|███████████████████████████████████████████████████████████████| 1000/1000 [2:24:21<00:00,  8.66s/it]


In [26]:
if isinstance(df, pd.DataFrame):
    target_column = 'text'
    target_column_index = df.columns.get_loc(target_column)
    df.insert(target_column_index + 1, 'summary-text_BART', summaries)
else:
    print("Error: 'df' is not a DataFrame.")

In [27]:
df.head(10)

Unnamed: 0,headlines,text,summary-text_BART
0,upGrad learner switches to career in ML & Al w...,"Saurav Kant, an alumnus of upGrad and IIIT-B's...","Saurav Kant, an alumnus of upGrad and IIIT-B's..."
1,Delhi techie wins free food from Swiggy for on...,Kunal Shah's credit card bill payment platform...,Kunal Shah's credit card bill payment platform...
2,New Zealand end Rohit Sharma-led India's 12-ma...,New Zealand defeated India by 8 wickets in the...,New Zealand defeated India by 8 wickets in the...
3,Aegon life iTerm insurance plan helps customer...,"With Aegon Life iTerm Insurance plan, customer...","With Aegon Life iTerm Insurance plan, customer..."
4,"Have known Hirani for yrs, what if MeToo claim...",Speaking about the sexual harassment allegatio...,Rajkumar Hirani has been accused by an assista...
5,Rahat Fateh Ali Khan denies getting notice for...,Pakistani singer Rahat Fateh Ali Khan has deni...,Rahat Fateh Ali Khan has denied receiving any ...
6,"India get all out for 92, their lowest ODI tot...",India recorded their lowest ODI total in New Z...,India recorded their lowest ODI total in New Z...
7,Govt directs Alok Verma to join work 1 day bef...,Weeks after ex-CBI Director Alok Verma told th...,The Home Ministry asked him to join work on th...
8,Called PM Modi 'sir' 10 times to satisfy his e...,Andhra Pradesh CM N Chandrababu Naidu has said...,Andhra Pradesh CM N Chandrababu Naidu addresse...
9,"Cong wins Ramgarh bypoll in Rajasthan, takes t...",Congress candidate Shafia Zubair won the Ramga...,Congress candidate Shafia Zubair won the Ramga...


In [None]:
# Download as feather format (Rdata)
import feather
path = 'BERT_Patents_Similarity_Summary_text.feather'
feather.write_dataframe(df, path)

### 'Text' Summarizations using GPT2

In [12]:
texts_to_summarize_2 = df['text']
texts_to_summarize_2

0      Saurav Kant, an alumnus of upGrad and IIIT-B's...
1      Kunal Shah's credit card bill payment platform...
2      New Zealand defeated India by 8 wickets in the...
3      With Aegon Life iTerm Insurance plan, customer...
4      Speaking about the sexual harassment allegatio...
                             ...                        
995    Aamir Khan screened his upcoming short film 'R...
996    Amrita Rao, who plays Bal Thackeray's wife Mee...
997    Kangana Ranaut, who was threatened by the Karn...
998    Sharing a picture with US talk show host Jimmy...
999    An erotic drawing by late rapper Tupac Shakur ...
Name: text, Length: 1000, dtype: object

In [13]:
model_name = 'gpt2'
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Tokenize and generate text
generated_texts = []
for index, text_to_generate in tqdm(texts_to_summarize_2.items(), total=len(texts_to_summarize_2), desc="Generating Text"):
    inputs = tokenizer.encode(text_to_generate, return_tensors="pt", max_length=1024, truncation=True)
    generated_ids = model.generate(inputs, max_length=150, num_beams=4, early_stopping=True)
    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    generated_texts.append(generated_text)

Generating Text:   0%|                                                                        | 0/1000 [00:00<?, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:   0%|                                                              | 1/1000 [00:06<1:42:02,  6.13s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:   0%|                                                              | 2/1000 [00:10<1:25:45,  5.16s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:   2%|█▎                                                           | 22/1000 [01:56<1:27:28,  5.37s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:   2%|█▍                                                           | 23/1000 [02:02<1:32:02,  5.65s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:   2%|█▍                                                           | 24/1000 [02:07<1:29:50,  5.52s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:   4%|██▋                                                          | 44/1000 [03:46<1:08:42,  4.31s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:   4%|██▋                                                          | 45/1000 [03:52<1:17:36,  4.88s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:   5%|██▊                                                          | 46/1000 [03:57<1:18:10,  4.92s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:   7%|████                                                         | 66/1000 [05:45<1:24:18,  5.42s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:   7%|████                                                         | 67/1000 [05:49<1:15:51,  4.88s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:   7%|████▏                                                        | 68/1000 [05:53<1:14:25,  4.79s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:   9%|█████▎                                                       | 88/1000 [07:39<1:16:26,  5.03s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:   9%|█████▍                                                       | 89/1000 [07:44<1:14:50,  4.93s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:   9%|█████▍                                                       | 90/1000 [07:50<1:18:45,  5.19s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  11%|██████▌                                                     | 110/1000 [09:35<1:18:09,  5.27s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  11%|██████▋                                                     | 111/1000 [09:37<1:01:00,  4.12s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  11%|██████▋                                                     | 112/1000 [09:42<1:05:53,  4.45s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  13%|███████▉                                                    | 132/1000 [11:29<1:18:53,  5.45s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  13%|███████▉                                                    | 133/1000 [11:35<1:21:15,  5.62s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  13%|████████                                                    | 134/1000 [11:41<1:19:59,  5.54s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  15%|█████████▏                                                  | 154/1000 [13:26<1:15:08,  5.33s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  16%|█████████▎                                                  | 155/1000 [13:31<1:15:58,  5.39s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  16%|█████████▎                                                  | 156/1000 [13:35<1:09:48,  4.96s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  18%|██████████▌                                                 | 176/1000 [15:19<1:10:36,  5.14s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  18%|██████████▌                                                 | 177/1000 [15:24<1:11:44,  5.23s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  18%|██████████▋                                                 | 178/1000 [15:30<1:12:28,  5.29s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  20%|███████████▉                                                | 198/1000 [17:15<1:08:06,  5.10s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  20%|███████████▉                                                | 199/1000 [17:20<1:08:44,  5.15s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  20%|████████████                                                | 200/1000 [17:26<1:09:57,  5.25s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  22%|█████████████▏                                              | 220/1000 [19:11<1:10:19,  5.41s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  22%|█████████████▎                                              | 221/1000 [19:17<1:14:10,  5.71s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  22%|█████████████▎                                              | 222/1000 [19:23<1:15:54,  5.85s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  24%|██████████████▌                                             | 242/1000 [21:14<1:16:29,  6.05s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  24%|██████████████▌                                             | 243/1000 [21:20<1:13:56,  5.86s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  24%|██████████████▋                                             | 244/1000 [21:26<1:15:04,  5.96s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  26%|███████████████▊                                            | 264/1000 [23:08<1:01:05,  4.98s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  26%|███████████████▉                                            | 265/1000 [23:14<1:04:01,  5.23s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  27%|███████████████▉                                            | 266/1000 [23:19<1:05:41,  5.37s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  29%|█████████████████▋                                            | 286/1000 [25:01<55:53,  4.70s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  29%|█████████████████▊                                            | 287/1000 [25:06<55:31,  4.67s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  29%|█████████████████▊                                            | 288/1000 [25:12<59:50,  5.04s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  31%|██████████████████▍                                         | 308/1000 [26:55<1:03:21,  5.49s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  31%|██████████████████▌                                         | 309/1000 [27:00<1:00:44,  5.27s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  31%|███████████████████▏                                          | 310/1000 [27:05<59:36,  5.18s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  33%|████████████████████▍                                         | 330/1000 [28:49<57:02,  5.11s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  33%|████████████████████▌                                         | 331/1000 [28:54<56:08,  5.04s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  33%|████████████████████▌                                         | 332/1000 [28:59<55:49,  5.01s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  35%|█████████████████████▊                                        | 352/1000 [30:38<55:42,  5.16s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  35%|█████████████████████▉                                        | 353/1000 [30:44<58:37,  5.44s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  35%|█████████████████████▉                                        | 354/1000 [30:50<59:31,  5.53s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  37%|███████████████████████▏                                      | 374/1000 [32:36<56:31,  5.42s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  38%|███████████████████████▎                                      | 375/1000 [32:42<57:35,  5.53s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  38%|███████████████████████▎                                      | 376/1000 [32:47<55:59,  5.38s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  40%|████████████████████████▌                                     | 396/1000 [34:34<55:18,  5.49s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  40%|████████████████████████▌                                     | 397/1000 [34:39<53:49,  5.36s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  40%|████████████████████████▋                                     | 398/1000 [34:44<52:44,  5.26s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  42%|█████████████████████████▉                                    | 418/1000 [36:27<41:56,  4.32s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  42%|█████████████████████████▉                                    | 419/1000 [36:32<42:17,  4.37s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  42%|██████████████████████████                                    | 420/1000 [36:37<44:44,  4.63s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  44%|███████████████████████████▎                                  | 440/1000 [38:13<48:09,  5.16s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  44%|███████████████████████████▎                                  | 441/1000 [38:19<50:22,  5.41s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  44%|███████████████████████████▍                                  | 442/1000 [38:25<51:48,  5.57s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  46%|████████████████████████████▋                                 | 462/1000 [40:00<44:47,  5.00s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  46%|████████████████████████████▋                                 | 463/1000 [40:05<45:54,  5.13s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  46%|████████████████████████████▊                                 | 464/1000 [40:11<46:26,  5.20s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  48%|██████████████████████████████                                | 484/1000 [41:59<45:48,  5.33s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  48%|██████████████████████████████                                | 485/1000 [42:03<42:31,  4.95s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  49%|██████████████████████████████▏                               | 486/1000 [42:09<45:22,  5.30s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  51%|███████████████████████████████▎                              | 506/1000 [44:01<47:59,  5.83s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  51%|███████████████████████████████▍                              | 507/1000 [44:07<48:04,  5.85s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  51%|███████████████████████████████▍                              | 508/1000 [44:13<47:37,  5.81s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  53%|████████████████████████████████▋                             | 528/1000 [46:02<46:39,  5.93s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  53%|████████████████████████████████▊                             | 529/1000 [46:08<45:09,  5.75s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  53%|████████████████████████████████▊                             | 530/1000 [46:12<42:39,  5.45s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  55%|██████████████████████████████████                            | 550/1000 [48:01<39:30,  5.27s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  55%|██████████████████████████████████▏                           | 551/1000 [48:06<39:36,  5.29s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  55%|██████████████████████████████████▏                           | 552/1000 [48:11<37:45,  5.06s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  57%|███████████████████████████████████▍                          | 572/1000 [49:57<38:07,  5.34s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  57%|███████████████████████████████████▌                          | 573/1000 [50:02<36:58,  5.20s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  57%|███████████████████████████████████▌                          | 574/1000 [50:08<38:21,  5.40s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  59%|████████████████████████████████████▊                         | 594/1000 [51:44<35:44,  5.28s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  60%|████████████████████████████████████▉                         | 595/1000 [51:47<31:17,  4.64s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  60%|████████████████████████████████████▉                         | 596/1000 [51:53<33:18,  4.95s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  62%|██████████████████████████████████████▏                       | 616/1000 [53:39<32:22,  5.06s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  62%|██████████████████████████████████████▎                       | 617/1000 [53:45<33:06,  5.19s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  62%|██████████████████████████████████████▎                       | 618/1000 [53:51<34:38,  5.44s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  64%|███████████████████████████████████████▌                      | 638/1000 [55:31<30:35,  5.07s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  64%|███████████████████████████████████████▌                      | 639/1000 [55:36<31:35,  5.25s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  64%|███████████████████████████████████████▋                      | 640/1000 [55:42<32:09,  5.36s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  66%|████████████████████████████████████████▉                     | 660/1000 [57:33<30:41,  5.42s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  66%|████████████████████████████████████████▉                     | 661/1000 [57:38<30:56,  5.48s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  66%|█████████████████████████████████████████                     | 662/1000 [57:44<31:24,  5.58s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  68%|██████████████████████████████████████████▎                   | 682/1000 [59:34<29:29,  5.57s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  68%|██████████████████████████████████████████▎                   | 683/1000 [59:39<28:50,  5.46s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  68%|██████████████████████████████████████████▍                   | 684/1000 [59:44<28:15,  5.37s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  70%|██████████████████████████████████████████▏                 | 704/1000 [1:01:36<26:49,  5.44s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  70%|██████████████████████████████████████████▎                 | 705/1000 [1:01:42<27:23,  5.57s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  71%|██████████████████████████████████████████▎                 | 706/1000 [1:01:46<25:31,  5.21s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  73%|███████████████████████████████████████████▌                | 726/1000 [1:03:24<17:53,  3.92s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  73%|███████████████████████████████████████████▌                | 727/1000 [1:03:29<19:27,  4.28s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  73%|███████████████████████████████████████████▋                | 728/1000 [1:03:35<21:40,  4.78s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  75%|████████████████████████████████████████████▉               | 748/1000 [1:05:24<22:16,  5.30s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  75%|████████████████████████████████████████████▉               | 749/1000 [1:05:29<21:19,  5.10s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  75%|█████████████████████████████████████████████               | 750/1000 [1:05:35<22:20,  5.36s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  77%|██████████████████████████████████████████████▏             | 770/1000 [1:07:30<22:06,  5.77s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  77%|██████████████████████████████████████████████▎             | 771/1000 [1:07:35<21:52,  5.73s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  77%|██████████████████████████████████████████████▎             | 772/1000 [1:07:40<21:05,  5.55s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  79%|███████████████████████████████████████████████▌            | 792/1000 [1:09:28<18:58,  5.47s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  79%|███████████████████████████████████████████████▌            | 793/1000 [1:09:34<19:12,  5.57s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  79%|███████████████████████████████████████████████▋            | 794/1000 [1:09:40<19:19,  5.63s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  81%|████████████████████████████████████████████████▊           | 814/1000 [1:11:29<16:31,  5.33s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  82%|████████████████████████████████████████████████▉           | 815/1000 [1:11:33<15:19,  4.97s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  82%|████████████████████████████████████████████████▉           | 816/1000 [1:11:38<15:38,  5.10s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  84%|██████████████████████████████████████████████████▏         | 836/1000 [1:13:19<12:32,  4.59s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  84%|██████████████████████████████████████████████████▏         | 837/1000 [1:13:23<12:24,  4.57s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  84%|██████████████████████████████████████████████████▎         | 838/1000 [1:13:28<12:39,  4.69s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  86%|███████████████████████████████████████████████████▍        | 858/1000 [1:15:24<13:27,  5.69s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  86%|███████████████████████████████████████████████████▌        | 859/1000 [1:15:30<13:33,  5.77s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  86%|███████████████████████████████████████████████████▌        | 860/1000 [1:15:36<13:23,  5.74s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  88%|████████████████████████████████████████████████████▊       | 880/1000 [1:17:26<11:14,  5.62s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  88%|████████████████████████████████████████████████████▊       | 881/1000 [1:17:30<09:59,  5.04s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  88%|████████████████████████████████████████████████████▉       | 882/1000 [1:17:34<09:35,  4.88s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  90%|██████████████████████████████████████████████████████      | 902/1000 [1:19:21<09:04,  5.56s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  90%|██████████████████████████████████████████████████████▏     | 903/1000 [1:19:27<09:24,  5.82s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  90%|██████████████████████████████████████████████████████▏     | 904/1000 [1:19:33<09:29,  5.93s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  92%|███████████████████████████████████████████████████████▍    | 924/1000 [1:21:19<07:16,  5.74s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  92%|███████████████████████████████████████████████████████▌    | 925/1000 [1:21:25<07:00,  5.61s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  93%|███████████████████████████████████████████████████████▌    | 926/1000 [1:21:30<06:44,  5.46s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  95%|████████████████████████████████████████████████████████▊   | 946/1000 [1:23:20<04:37,  5.15s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  95%|████████████████████████████████████████████████████████▊   | 947/1000 [1:23:27<04:52,  5.52s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  95%|████████████████████████████████████████████████████████▉   | 948/1000 [1:23:32<04:39,  5.37s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  97%|██████████████████████████████████████████████████████████  | 968/1000 [1:25:18<03:03,  5.74s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  97%|██████████████████████████████████████████████████████████▏ | 969/1000 [1:25:24<03:02,  5.89s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  97%|██████████████████████████████████████████████████████████▏ | 970/1000 [1:25:29<02:46,  5.56s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

Generating Text:  99%|███████████████████████████████████████████████████████████▍| 990/1000 [1:27:22<00:49,  4.97s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  99%|███████████████████████████████████████████████████████████▍| 991/1000 [1:27:29<00:49,  5.47s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Generating Text:  99%|███████████████████████████████████████████████████████████▌| 992/1000 [1:27:34<00:44,  5.52s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attent

In [28]:
if isinstance(df, pd.DataFrame):
    target_column = 'text'
    target_column_index = df.columns.get_loc(target_column)
    df.insert(target_column_index + 1, 'summary-text_GPT2', generated_texts)
else:
    print("Error: 'df' is not a DataFrame.")

In [29]:
df.head(10)

Unnamed: 0,headlines,text,summary-text_GPT2,summary-text_BART
0,upGrad learner switches to career in ML & Al w...,"Saurav Kant, an alumnus of upGrad and IIIT-B's...","Saurav Kant, an alumnus of upGrad and IIIT-B's...","Saurav Kant, an alumnus of upGrad and IIIT-B's..."
1,Delhi techie wins free food from Swiggy for on...,Kunal Shah's credit card bill payment platform...,Kunal Shah's credit card bill payment platform...,Kunal Shah's credit card bill payment platform...
2,New Zealand end Rohit Sharma-led India's 12-ma...,New Zealand defeated India by 8 wickets in the...,New Zealand defeated India by 8 wickets in the...,New Zealand defeated India by 8 wickets in the...
3,Aegon life iTerm insurance plan helps customer...,"With Aegon Life iTerm Insurance plan, customer...","With Aegon Life iTerm Insurance plan, customer...","With Aegon Life iTerm Insurance plan, customer..."
4,"Have known Hirani for yrs, what if MeToo claim...",Speaking about the sexual harassment allegatio...,Speaking about the sexual harassment allegatio...,Rajkumar Hirani has been accused by an assista...
5,Rahat Fateh Ali Khan denies getting notice for...,Pakistani singer Rahat Fateh Ali Khan has deni...,Pakistani singer Rahat Fateh Ali Khan has deni...,Rahat Fateh Ali Khan has denied receiving any ...
6,"India get all out for 92, their lowest ODI tot...",India recorded their lowest ODI total in New Z...,India recorded their lowest ODI total in New Z...,India recorded their lowest ODI total in New Z...
7,Govt directs Alok Verma to join work 1 day bef...,Weeks after ex-CBI Director Alok Verma told th...,Weeks after ex-CBI Director Alok Verma told th...,The Home Ministry asked him to join work on th...
8,Called PM Modi 'sir' 10 times to satisfy his e...,Andhra Pradesh CM N Chandrababu Naidu has said...,Andhra Pradesh CM N Chandrababu Naidu has said...,Andhra Pradesh CM N Chandrababu Naidu addresse...
9,"Cong wins Ramgarh bypoll in Rajasthan, takes t...",Congress candidate Shafia Zubair won the Ramga...,Congress candidate Shafia Zubair won the Ramga...,Congress candidate Shafia Zubair won the Ramga...


In [None]:
# Download as feather format (Rdata)
import feather
path = 'BERT_Patents_Similarity_Summary.feather'
feather.write_dataframe(df, path)

### 'Text' Summarizations using XLNet

In [5]:
texts_to_summarize3 = df['text']

model_name_xlnet = 'xlnet-large-cased'
model_xlnet = XLNetLMHeadModel.from_pretrained(model_name_xlnet)
tokenizer_xlnet = XLNetTokenizer.from_pretrained(model_name_xlnet)

In [6]:
generated_texts_xlnet = []
for index, text_to_generate in tqdm(texts_to_summarize3.items(), total=len(texts_to_summarize3), desc="Generating Text"):
    inputs_xlnet = tokenizer_xlnet.encode(text_to_generate, return_tensors="pt", max_length=1024, truncation=True)
    generated_ids_xlnet = model_xlnet.generate(inputs_xlnet, max_length=150, num_beams=4, early_stopping=True)
    generated_text_xlnet = tokenizer_xlnet.decode(generated_ids_xlnet[0], skip_special_tokens=True)
    generated_texts_xlnet.append(generated_text_xlnet)

Generating Text:   0%|                                                                        | 0/1000 [00:00<?, ?it/s]This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (-1). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.
Generating Text: 100%|██████████████████████████████████████████████████████████| 1000/1000 [18:47:38<00:00, 67.66s/it]


In [30]:
if isinstance(df, pd.DataFrame):
    target_column = 'text'
    target_column_index = df.columns.get_loc(target_column)
    df.insert(target_column_index + 1, 'summary -text_XLNet', generated_texts_xlnet)
else:
    print("Error: 'df' is not a DataFrame.")

In [31]:
df.head(10)

Unnamed: 0,headlines,text,summary -text_XLNet,summary-text_GPT2,summary-text_BART
0,upGrad learner switches to career in ML & Al w...,"Saurav Kant, an alumnus of upGrad and IIIT-B's...","Saurav Kant, an alumnus of upGrad and IIIT-B's...","Saurav Kant, an alumnus of upGrad and IIIT-B's...","Saurav Kant, an alumnus of upGrad and IIIT-B's..."
1,Delhi techie wins free food from Swiggy for on...,Kunal Shah's credit card bill payment platform...,Kunal Shah's credit card bill payment platform...,Kunal Shah's credit card bill payment platform...,Kunal Shah's credit card bill payment platform...
2,New Zealand end Rohit Sharma-led India's 12-ma...,New Zealand defeated India by 8 wickets in the...,New Zealand defeated India by 8 wickets in the...,New Zealand defeated India by 8 wickets in the...,New Zealand defeated India by 8 wickets in the...
3,Aegon life iTerm insurance plan helps customer...,"With Aegon Life iTerm Insurance plan, customer...","With Aegon Life iTerm Insurance plan, customer...","With Aegon Life iTerm Insurance plan, customer...","With Aegon Life iTerm Insurance plan, customer..."
4,"Have known Hirani for yrs, what if MeToo claim...",Speaking about the sexual harassment allegatio...,Speaking about the sexual harassment allegatio...,Speaking about the sexual harassment allegatio...,Rajkumar Hirani has been accused by an assista...
5,Rahat Fateh Ali Khan denies getting notice for...,Pakistani singer Rahat Fateh Ali Khan has deni...,Pakistani singer Rahat Fateh Ali Khan has deni...,Pakistani singer Rahat Fateh Ali Khan has deni...,Rahat Fateh Ali Khan has denied receiving any ...
6,"India get all out for 92, their lowest ODI tot...",India recorded their lowest ODI total in New Z...,India recorded their lowest ODI total in New Z...,India recorded their lowest ODI total in New Z...,India recorded their lowest ODI total in New Z...
7,Govt directs Alok Verma to join work 1 day bef...,Weeks after ex-CBI Director Alok Verma told th...,Weeks after ex-CBI Director Alok Verma told th...,Weeks after ex-CBI Director Alok Verma told th...,The Home Ministry asked him to join work on th...
8,Called PM Modi 'sir' 10 times to satisfy his e...,Andhra Pradesh CM N Chandrababu Naidu has said...,Andhra Pradesh CM N Chandrababu Naidu has said...,Andhra Pradesh CM N Chandrababu Naidu has said...,Andhra Pradesh CM N Chandrababu Naidu addresse...
9,"Cong wins Ramgarh bypoll in Rajasthan, takes t...",Congress candidate Shafia Zubair won the Ramga...,Congress candidate Shafia Zubair won the Ramga...,Congress candidate Shafia Zubair won the Ramga...,Congress candidate Shafia Zubair won the Ramga...
