In [1]:
# !pip install transformers==4.30.2

### BART fine-tuning en text summarization

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf 
from unidecode import unidecode
from sklearn.model_selection import train_test_split
from transformers import  BartTokenizerFast, TFAutoModelForSeq2SeqLM
from tensorflow.keras.callbacks import EarlyStopping
import re
import warnings
warnings.filterwarnings("ignore")

In [3]:
def get_data(url: str, n_rows: int):
    data = pd.read_csv(url, names=["id", "input", "target"])
    data = data.sample(n=n_rows)
    data.drop(columns='id', inplace=True)
    data.dropna(inplace=True)
    data.reset_index(drop=True, inplace=True)
    data['input'] = data['input'].map(unidecode)
    data['target'] = data['target'].map(unidecode)
    return data

In [4]:
# https://www.kaggle.com/datasets/gowrishankarp/newspaper-text-summarization-cnn-dailymail
data = get_data(url="cnn_dailymail/train.csv", n_rows=100000)

In [5]:
data.head()

Unnamed: 0,input,target
0,By . James Gordon . If you ever wondered wheth...,Delighted terrier Duffy can't contain his exci...
1,"Dubai, United Arab Emirates (CNN) -- Mourners ...",Mahmoud al-Mabhouh buried Friday after his dea...
2,An American explorer recently became the first...,"Rob Mark 44, of Maplewood, New Jersey reached ..."
3,Bono is sorry he gave you free music. More tha...,U2's Bono apologizes to iTunes users angry ove...
4,(CNN) -- As the number of suspected and confir...,Health officials around world act to prevent s...


In [6]:
def preprocess(txt):
    txt = re.sub(r'^By \. [\w\s]+ \. ', ' ', txt) # By . Ellie Zolfagharifard . 
    txt = re.sub(r'\d{1,2}\:\d\d [a-zA-Z]{3}', ' ', txt) # 10:30 EST
    txt = re.sub(r'\d{1,2} [a-zA-Z]+ \d{4}', ' ', txt) # 10 November 1990
    txt = txt.replace('PUBLISHED:', ' ')
    txt = txt.replace('UPDATED', ' ')
    txt = re.sub(r' [\,\.\:\'\;\|] ', ' ', txt) # remove puncts with spaces before and after
    txt = txt.replace(' : ', ' ')
    txt = txt.replace('(CNN)', ' ')
    txt = txt.replace('--', ' ')
    txt = re.sub(r'^\s*[\,\.\:\'\;\|]', ' ', txt) # remove puncts at beginning of sent
    txt = re.sub(r' [\,\.\:\'\;\|] ', ' ', txt) # remove puncts with spaces before and after
    txt = " ".join(txt.split())
    return txt

In [7]:
data['input'] = data['input'].map(preprocess)
data['target'] = data['target'].map(preprocess)

In [8]:
def split_data(data: pd.DataFrame, input_col: str="input", target_col: str="target", test_size: float=0.1):
    x_train, x_test, y_train, y_test = train_test_split(data[input_col], data[target_col], 
                                                        random_state=42, test_size=test_size)
    
    print(f'x_train.shape: {x_train.shape}, x_test.shape: {x_test.shape}, '+
          f'y_train.shape: {y_train.shape}, y_test.shape: {y_test.shape}')
    x_train, x_test, y_train, y_test = x_train.to_list(), x_test.to_list(), y_train.to_list(), y_test.to_list()
    return x_train, x_test, y_train, y_test

In [9]:
x_train, x_test, y_train, y_test = split_data(data=data)

x_train.shape: (90000,), x_test.shape: (10000,), y_train.shape: (90000,), y_test.shape: (10000,)


In [10]:
x_train[0], y_train[0]

("Latvia has won approval today to become the 18th state to join the Euro after approval by the European Commission. European Union officials granted approval despite concerns about its banking system and Latvia is expected to replace its national currency, the lat, on January 1, 2014. EU leaders said the Baltic state's willingness to join next year is a vote of confidence for the shared currency. Latvia is expected to replace its currency, the Lat, with the Euro by January 1, 2014 A final decision will be made by eurozone finance ministers July 9 - following further consultation among EU leaders and Parliament. The 17 countries that use the Euro are struggling with a crisis over too much government debt, a recession, and 12.2 per cent unemployment. Olli Rehn, the EU's top economic and monetary official, said today that Latvia's membership bid was 'further evidence that those who predicted the disintegration of the euro area were wrong.' Latvia's Prime Minister Valdis Dombrovskis tweet

In [11]:
strategy = tf.distribute.MirroredStrategy()
CHECKPOINT = "facebook/bart-base"
INPUT_N_TOKENS = 400 # considering only 400 tokens due to memory constraints
TARGET_N_TOKENS = 100
# ideal way of computing N_TOKENS is to analyze the lengths of tokens of few samples in input & target
BATCH_SIZE = 16 * strategy.num_replicas_in_sync

In [12]:
def tokenize(input: list, target: list, input_n_tokens: int, target_n_tokens: int):
    tokenizer = BartTokenizerFast.from_pretrained(CHECKPOINT)
    tokenized_data = tokenizer(text=input, max_length=input_n_tokens, truncation=True, padding="max_length")
    tokenized_data["labels"] = tokenizer(text_target=target, max_length=target_n_tokens, truncation=True, padding="max_length")["input_ids"]
    return tokenized_data    

In [13]:
tokenize(input=x_train[:2], target=y_train[:2], input_n_tokens=INPUT_N_TOKENS, 
         target_n_tokens=TARGET_N_TOKENS)

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

{'input_ids': [[0, 41933, 11409, 34, 351, 2846, 452, 7, 555, 5, 504, 212, 194, 7, 1962, 5, 5122, 71, 2846, 30, 5, 796, 1463, 4, 796, 1332, 503, 4159, 2846, 1135, 1379, 59, 63, 3454, 467, 8, 27749, 16, 421, 7, 3190, 63, 632, 2593, 6, 5, 16619, 6, 15, 644, 112, 6, 777, 4, 1281, 917, 26, 5, 24532, 194, 18, 10640, 7, 1962, 220, 76, 16, 10, 900, 9, 2123, 13, 5, 1373, 2593, 4, 27749, 16, 421, 7, 3190, 63, 2593, 6, 5, 9882, 6, 19, 5, 5122, 30, 644, 112, 6, 777, 83, 507, 568, 40, 28, 156, 30, 24860, 2879, 5118, 550, 361, 111, 511, 617, 9434, 566, 1281, 917, 8, 3879, 4, 20, 601, 749, 14, 304, 5, 5122, 32, 3306, 19, 10, 1486, 81, 350, 203, 168, 1126, 6, 10, 7306, 6, 8, 316, 4, 176, 228, 715, 5755, 4, 384, 10054, 1223, 10245, 6, 5, 1281, 18, 299, 776, 8, 5775, 781, 6, 26, 452, 14, 27749, 18, 6332, 2311, 21, 128, 506, 39212, 1283, 14, 167, 54, 6126, 5, 32654, 8475, 9, 5, 2287, 443, 58, 1593, 955, 27749, 18, 1489, 692, 18852, 354, 211, 5223, 13359, 7771, 354, 2858, 14, 5, 568, 21, 128, 627, 220, 11

In [14]:
tokenized_train = tokenize(input=x_train, target=y_train, input_n_tokens=INPUT_N_TOKENS, 
         target_n_tokens=TARGET_N_TOKENS)
tokenized_test = tokenize(input=x_test, target=y_test, input_n_tokens=INPUT_N_TOKENS, 
         target_n_tokens=TARGET_N_TOKENS)

In [15]:
tokenized_train[0]

Encoding(num_tokens=400, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing])

In [16]:
tokenized_train[:5]

[Encoding(num_tokens=400, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing]),
 Encoding(num_tokens=400, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing]),
 Encoding(num_tokens=400, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing]),
 Encoding(num_tokens=400, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing]),
 Encoding(num_tokens=400, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing])]

In [17]:
def return_tf_tensors(data):
    data = tf.data.Dataset.from_tensor_slices(dict(data))
    data = data.prefetch(tf.data.AUTOTUNE)
    return data

In [18]:
del(x_train, y_train)

In [19]:
train_tf_data = return_tf_tensors(tokenized_train)
test_tf_data = return_tf_tensors(tokenized_test)

In [20]:
for i in train_tf_data.take(1):
    print(i)

{'input_ids': <tf.Tensor: shape=(400,), dtype=int32, numpy=
array([    0, 41933, 11409,    34,   351,  2846,   452,     7,   555,
           5,   504,   212,   194,     7,  1962,     5,  5122,    71,
        2846,    30,     5,   796,  1463,     4,   796,  1332,   503,
        4159,  2846,  1135,  1379,    59,    63,  3454,   467,     8,
       27749,    16,   421,     7,  3190,    63,   632,  2593,     6,
           5, 16619,     6,    15,   644,   112,     6,   777,     4,
        1281,   917,    26,     5, 24532,   194,    18, 10640,     7,
        1962,   220,    76,    16,    10,   900,     9,  2123,    13,
           5,  1373,  2593,     4, 27749,    16,   421,     7,  3190,
          63,  2593,     6,     5,  9882,     6,    19,     5,  5122,
          30,   644,   112,     6,   777,    83,   507,   568,    40,
          28,   156,    30, 24860,  2879,  5118,   550,   361,   111,
         511,   617,  9434,   566,  1281,   917,     8,  3879,     4,
          20,   601,   749,   

In [21]:
del(tokenized_train, tokenized_test)

In [22]:
from tensorflow.keras.optimizers.schedules import PolynomialDecay

In [23]:
def fit_model(train_data, val_data, epochs=3, eta=1e-4, early_stopping_patience=1, batch_size=BATCH_SIZE):
    with strategy.scope():
        model = TFAutoModelForSeq2SeqLM.from_pretrained(CHECKPOINT)
        learning_schedule = PolynomialDecay(initial_learning_rate=eta, decay_steps=len(train_data) * epochs, end_learning_rate=0)
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_schedule))

    print(model.summary())
    early_stop = EarlyStopping(monitor="val_loss", patience=early_stopping_patience, mode="min")
    model.fit(train_data.shuffle(len(train_data)).batch(batch_size), validation_data=val_data.shuffle(len(val_data)).batch(batch_size), 
          epochs=epochs, callbacks=[early_stop])
    return model

In [24]:
model = fit_model(train_data=train_tf_data, val_data=test_tf_data)

Downloading model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFBartForConditionalGeneration.

All the weights of TFBartForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBartForConditionalGeneration for predictions without further training.


Model: "tf_bart_for_conditional_generation"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 model (TFBartMainLayer)     multiple                  139420416 
                                                                 
 final_logits_bias (BiasLaye  multiple                 50265     
 r)                                                              
                                                                 
Total params: 139,470,681
Trainable params: 139,420,416
Non-trainable params: 50,265
_________________________________________________________________
None
Epoch 1/3
Epoch 2/3
Epoch 3/3


**INFERENCE**

In [25]:
def inference_tokenize(input: list, n_tokens: int):
    tokenizer = BartTokenizerFast.from_pretrained(CHECKPOINT)
    tokenized_data = tokenizer(text=input, max_length=n_tokens, truncation=True, padding="max_length", return_tensors="tf")
    return tokenizer, tokenized_data    

In [29]:
def inference(txt: str):
    txt = preprocess(txt)
    test_data = [txt]
    inference_tokenizer, tokenized_data = inference_tokenize(input=test_data, n_tokens=INPUT_N_TOKENS)
    pred = model.generate(**tokenized_data, max_new_tokens=TARGET_N_TOKENS)
    result = inference_tokenizer.decode(pred[0])
    result = re.sub("<.*?>", "", result).strip()
    print(f"NEWS:\n{txt}\n\nSUMMARY:\n{result}")
    return (txt, result)

In [30]:
txt = '''
Doctors are struggling to treat patients with severe injuries under dire 
conditions in Gaza as Israel maintains its bombardment of the enclave, 
a medic with aid group Doctors Without Borders said.
Speaking from Amman, Jordan, Dr. Tanya Haj-Hassan said 
the lack of medical supplies in Gaza meant doctors have been 
"completely stripped of all the tools of modern medicine" to 
treat patients — mostly women and children — with severe injuries and burns.
'''
txt, result = inference(txt)

NEWS:
Doctors are struggling to treat patients with severe injuries under dire conditions in Gaza as Israel maintains its bombardment of the enclave, a medic with aid group Doctors Without Borders said. Speaking from Amman, Jordan, Dr. Tanya Haj-Hassan said the lack of medical supplies in Gaza meant doctors have been "completely stripped of all the tools of modern medicine" to treat patients — mostly women and children — with severe injuries and burns.

SUMMARY:
Dr. Tanya Haj-Hassan says doctors have been stripped of all the tools of modern medicine in Gaza. Doctors are struggling to treat patients with severe injuries and burns, she says.


In [31]:
txt = '''
Donald Trump’s eldest son, Donald Trump Jr., testified on Wednesday that he 
was not involved in the preparation of his father’s financial statements at 
any point in time – including after his father became president in 2017 and he 
was appointed trustee on Donald Trump’s revocable trust.
Trump Jr. testified for 90 minutes in the civil fraud trial against the family 
and their business. He will continue on the stand on Thursday, followed by his brother, Eric Trump.
During his testimony Wednesday, Assistant Attorney General Colleen Faherty showed Trump Jr. the 2017 
statement of financial condition, which Judge Arthur Engoron has already ruled is fraudulent. 
The former president’s son said again that he didn’t help prepare the statement that year.
'''
txt, result = inference(txt)

NEWS:
Donald Trump’s eldest son, Donald Trump Jr., testified on Wednesday that he was not involved in the preparation of his father’s financial statements at any point in time – including after his father became president in 2017 and he was appointed trustee on Donald Trump’s revocable trust. Trump Jr. testified for 90 minutes in the civil fraud trial against the family and their business. He will continue on the stand on Thursday, followed by his brother, Eric Trump. During his testimony Wednesday, Assistant Attorney General Colleen Faherty showed Trump Jr. the 2017 statement of financial condition, which Judge Arthur Engoron has already ruled is fraudulent. The former president’s son said again that he didn’t help prepare the statement that year.

SUMMARY:
Donald Trump Jr. testified for 90 minutes in the civil fraud trial against the family and their business. He will continue on the stand on Thursday, followed by his brother, Eric Trump. The former president's son said again that he

In [32]:
txt = '''
Hong Kong
CNN
 — 
Blackpink star Lisa’s Chinese social media page on Weibo appears to have been taken down,
weeks after she performed a burlesque routine in Paris that sparked a huge debate on China’s tightly regulated internet.

On Wednesday afternoon, the Thai star’s verified account @lalalalisa_m was 
no longer searchable on Weibo, one of China’s most popular social media networks.

“The account can longer be viewed because it has received complaints of 
breaches of law and regulations, as well as relevant rules from the Weibo 
Community Management Regulations,” her replacement page stated.

It is not clear what types of complaint she has received but China’s 
web companies routinely suspend or remove pages that breach the country’s myriad 
censorship rules – or that simply generate too much controversy.

The suspension of her account comes after Lisa performed five shows at 
the Crazy Horse Paris in September. The venue is one of the city’s top burlesque 
performance spaces but her shows stirred controversy in some parts of Asia 
 where more conservative attitudes can prevail.
'''
txt, result = inference(txt)

NEWS:
Hong Kong CNN — Blackpink star Lisa’s Chinese social media page on Weibo appears to have been taken down, weeks after she performed a burlesque routine in Paris that sparked a huge debate on China’s tightly regulated internet. On Wednesday afternoon, the Thai star’s verified account @lalalalisa_m was no longer searchable on Weibo, one of China’s most popular social media networks. “The account can longer be viewed because it has received complaints of breaches of law and regulations, as well as relevant rules from the Weibo Community Management Regulations,” her replacement page stated. It is not clear what types of complaint she has received but China’s web companies routinely suspend or remove pages that breach the country’s myriad censorship rules – or that simply generate too much controversy. The suspension of her account comes after Lisa performed five shows at the Crazy Horse Paris in September. The venue is one of the city’s top burlesque performance spaces but her shows 

**SAVE MODEL**

In [33]:
model.save_weights("bart_en_summarizer.h5", save_format="h5")

**LOAD SAVED MODEL**

In [34]:
CHECKPOINT = "facebook/bart-base"
INPUT_N_TOKENS = 400
TARGET_N_TOKENS = 100

In [35]:
def prod_inference_tokenize(input: list, n_tokens: int):
    tokenizer = BartTokenizerFast.from_pretrained(CHECKPOINT)
    tokenized_data = tokenizer(text=input, max_length=n_tokens, truncation=True, padding="max_length", return_tensors="tf")
    return tokenizer, tokenized_data    

In [38]:
loaded_model = TFAutoModelForSeq2SeqLM.from_pretrained(CHECKPOINT)
loaded_model.load_weights("bart_en_summarizer.h5", by_name=True)

All PyTorch model weights were used when initializing TFBartForConditionalGeneration.

All the weights of TFBartForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBartForConditionalGeneration for predictions without further training.


In [39]:
loaded_model.summary()

Model: "tf_bart_for_conditional_generation_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 model (TFBartMainLayer)     multiple                  139420416 
                                                                 
 final_logits_bias (BiasLaye  multiple                 50265     
 r)                                                              
                                                                 
Total params: 139,470,681
Trainable params: 139,420,416
Non-trainable params: 50,265
_________________________________________________________________


In [40]:
def loaded_model_inference(txt: str):
    txt = preprocess(txt)
    test_data = [txt]
    inference_tokenizer, tokenized_data = prod_inference_tokenize(input=test_data, n_tokens=INPUT_N_TOKENS)
    pred = loaded_model.generate(**tokenized_data, max_new_tokens=TARGET_N_TOKENS)
    result = inference_tokenizer.decode(pred[0])
    result = re.sub("<.*?>", "", result).strip()
    print(f"NEWS:\n{txt}\n\nSUMMARY:\n{result}")
    return (txt, result)

In [41]:
txt = '''
Donald Trump’s eldest son, Donald Trump Jr., testified on Wednesday that he 
was not involved in the preparation of his father’s financial statements at 
any point in time – including after his father became president in 2017 and he 
was appointed trustee on Donald Trump’s revocable trust.
Trump Jr. testified for 90 minutes in the civil fraud trial against the family 
and their business. He will continue on the stand on Thursday, followed by his brother, Eric Trump.
During his testimony Wednesday, Assistant Attorney General Colleen Faherty showed Trump Jr. the 2017 
statement of financial condition, which Judge Arthur Engoron has already ruled is fraudulent. 
The former president’s son said again that he didn’t help prepare the statement that year.
'''
txt, result = loaded_model_inference(txt)

NEWS:
Donald Trump’s eldest son, Donald Trump Jr., testified on Wednesday that he was not involved in the preparation of his father’s financial statements at any point in time – including after his father became president in 2017 and he was appointed trustee on Donald Trump’s revocable trust. Trump Jr. testified for 90 minutes in the civil fraud trial against the family and their business. He will continue on the stand on Thursday, followed by his brother, Eric Trump. During his testimony Wednesday, Assistant Attorney General Colleen Faherty showed Trump Jr. the 2017 statement of financial condition, which Judge Arthur Engoron has already ruled is fraudulent. The former president’s son said again that he didn’t help prepare the statement that year.

SUMMARY:
Donald Trump Jr. testified for 90 minutes in the civil fraud trial against the family and their business. He will continue on the stand on Thursday, followed by his brother, Eric Trump. The former president's son said again that he

In [42]:
txt = '''
In October 1952, five years after the Indian subcontinent won independence from its 
British colonial rulers, a young Pakistan cricket team stepped foot in the newly formed Republic of India.

They had arrived to play a highly anticipated test series – the first for 
Pakistan after the country’s creation in 1947.

For many of the players, the drive from Lahore in the Muslim-majority 
nation of Pakistan, to Amritsar in Hindu-majority India, brought back 
painful memories of a bloody partition – one that hastily divided the 
former colony along religious lines with devastating results and gave 
rise to a fierce geopolitical rivalry.

In the 76 years since, India and Pakistan have fought three wars and 
introduced heavy restrictions on exchanges of goods or civilians, despite
the two countries sharing a border, a culture and a deeply intertwined history.
'''
txt, result = loaded_model_inference(txt)

NEWS:
In October 1952, five years after the Indian subcontinent won independence from its British colonial rulers, a young Pakistan cricket team stepped foot in the newly formed Republic of India. They had arrived to play a highly anticipated test series – the first for Pakistan after the country’s creation in 1947. For many of the players, the drive from Lahore in the Muslim-majority nation of Pakistan, to Amritsar in Hindu-majority India, brought back painful memories of a bloody partition – one that hastily divided the former colony along religious lines with devastating results and gave rise to a fierce geopolitical rivalry. In the 76 years since, India and Pakistan have fought three wars and introduced heavy restrictions on exchanges of goods or civilians, despite the two countries sharing a border, a culture and a deeply intertwined history.

SUMMARY:
India and Pakistan have fought three wars and introduced heavy restrictions on exchanges of goods or civilians. The two countries 

In [43]:
txt = '''
Australian police have brought murder charges against a woman who 
served a lunch earlier this year that led to the deaths of three 
people from suspected death cap mushroom poisoning.

Victoria Police confirmed a 49-year-old woman was charged with 
three counts of murder and five counts of attempted murder after
being arrested in connection with the case Thursday morning.

Local media, including CNN regional affiliates and national 
broadcaster ABC, identified the woman as Erin Patterson, 49, 
who police have previously identified as the person who cooked 
and served the meal that ended in the deaths. When asked to confirm 
the woman’s identity, Victoria Police declined to comment.
'''
txt, result = loaded_model_inference(txt)

NEWS:
Australian police have brought murder charges against a woman who served a lunch earlier this year that led to the deaths of three people from suspected death cap mushroom poisoning. Victoria Police confirmed a 49-year-old woman was charged with three counts of murder and five counts of attempted murder after being arrested in connection with the case Thursday morning. Local media, including CNN regional affiliates and national broadcaster ABC, identified the woman as Erin Patterson, 49, who police have previously identified as the person who cooked and served the meal that ended in the deaths. When asked to confirm the woman’s identity, Victoria Police declined to comment.

SUMMARY:
A 49-year-old woman has been charged with three counts of murder and five counts of attempted murder. Local media identified the woman as Erin Patterson, 49, who police have previously identified as the person who cooked and served the meal that ended in the deaths.


In [44]:
txt = '''

The rise of Rep. Mike Johnson in the House, coupled with the fall of 
former Vice President Mike Pence and the dominance of ex-President 
Donald Trump, shows that 2020 election denialism is a prerequisite for winning Republican power.

Johnson, who played a leading role in the effort to block the certification 
of President Joe Biden’s 2020 election win, benefited in his ascent to the 
speakership last week from the approval of pro-Trump lawmakers.

Pence suspended his run for the White House over the weekend after months 
mired in the single digits in surveys. His tortured explanations of his 
constitutionality correct decision – that he lacked power to overturn Biden’s 
2020 election win in Congress – failed to convince grassroots Republicans bought
into the ex-president’s false claims that he won the election. Pence now joins the
swelling ranks of Republicans, from former Wyoming Rep. Liz Cheney to retiring Utah Sen.
Mitt Romney, whose careers were eviscerated by contradicting Trump’s lies.
'''
txt, result = loaded_model_inference(txt)

NEWS:
The rise of Rep. Mike Johnson in the House, coupled with the fall of former Vice President Mike Pence and the dominance of ex-President Donald Trump, shows that 2020 election denialism is a prerequisite for winning Republican power. Johnson, who played a leading role in the effort to block the certification of President Joe Biden’s 2020 election win, benefited in his ascent to the speakership last week from the approval of pro-Trump lawmakers. Pence suspended his run for the White House over the weekend after months mired in the single digits in surveys. His tortured explanations of his constitutionality correct decision – that he lacked power to overturn Biden’s 2020 election win in Congress – failed to convince grassroots Republicans bought into the ex-president’s false claims that he won the election. Pence now joins the swelling ranks of Republicans, from former Wyoming Rep. Liz Cheney to retiring Utah Sen. Mitt Romney, whose careers were eviscerated by contradicting Trump’s 

In [45]:
txt = '''
Hong Kong
CNN
 — 
Blackpink star Lisa’s Chinese social media page on Weibo appears to have been taken down,
weeks after she performed a burlesque routine in Paris that sparked a huge debate on China’s tightly regulated internet.

On Wednesday afternoon, the Thai star’s verified account @lalalalisa_m was 
no longer searchable on Weibo, one of China’s most popular social media networks.

“The account can longer be viewed because it has received complaints of 
breaches of law and regulations, as well as relevant rules from the Weibo 
Community Management Regulations,” her replacement page stated.

It is not clear what types of complaint she has received but China’s 
web companies routinely suspend or remove pages that breach the country’s myriad 
censorship rules – or that simply generate too much controversy.

The suspension of her account comes after Lisa performed five shows at 
the Crazy Horse Paris in September. The venue is one of the city’s top burlesque 
performance spaces but her shows stirred controversy in some parts of Asia 
 where more conservative attitudes can prevail.
'''
txt, result = loaded_model_inference(txt)

NEWS:
Hong Kong CNN — Blackpink star Lisa’s Chinese social media page on Weibo appears to have been taken down, weeks after she performed a burlesque routine in Paris that sparked a huge debate on China’s tightly regulated internet. On Wednesday afternoon, the Thai star’s verified account @lalalalisa_m was no longer searchable on Weibo, one of China’s most popular social media networks. “The account can longer be viewed because it has received complaints of breaches of law and regulations, as well as relevant rules from the Weibo Community Management Regulations,” her replacement page stated. It is not clear what types of complaint she has received but China’s web companies routinely suspend or remove pages that breach the country’s myriad censorship rules – or that simply generate too much controversy. The suspension of her account comes after Lisa performed five shows at the Crazy Horse Paris in September. The venue is one of the city’s top burlesque performance spaces but her shows 

In [47]:
txt = '''
London
CNN
 -- 
The Bank of England downgraded its forecasts for UK economic growth Thursday
and said it would take longer than previously expected for inflation to come
back down to its 2% target.

The central bank now expects gross domestic product to have flatlined in 
the July-to-September quarter and to grow just 0.1% in the current quarter, 
gloomier forecasts than its projections in August.

Despite the economic slowdown, inflation will return to target only at the 
end of 2025, roughly six months later than previously forecast.

The Bank of England kept interest rates unchanged for the second time in 
a row as data shows the economy is weakening and inflation easing.

The decision to hold fire again, after a pause in September that followed 
14 successive rate hikes, keeps the main borrowing cost for commercial banks
in the United Kingdom at 5.25%. That is the highest level since February 2008.

The Federal Reserve also kept rates on hold Wednesday, while the European 
Central Bank paused its rate-hiking campaign for the first time in 15 months last week.
'''
txt, result = loaded_model_inference(txt)

NEWS:
London CNN The Bank of England downgraded its forecasts for UK economic growth Thursday and said it would take longer than previously expected for inflation to come back down to its 2% target. The central bank now expects gross domestic product to have flatlined in the July-to-September quarter and to grow just 0.1% in the current quarter, gloomier forecasts than its projections in August. Despite the economic slowdown, inflation will return to target only at the end of 2025, roughly six months later than previously forecast. The Bank of England kept interest rates unchanged for the second time in a row as data shows the economy is weakening and inflation easing. The decision to hold fire again, after a pause in September that followed 14 successive rate hikes, keeps the main borrowing cost for commercial banks in the United Kingdom at 5.25%. That is the highest level since February 2008. The Federal Reserve also kept rates on hold Wednesday, while the European Central Bank pause

In [48]:
txt = '''
CNN
 —- 
Scientists widely agree that an ancient planet likely smashed 
into Earth as it was forming billions of years ago, spewing debris 
that coalesced into the moon that decorates our night sky today.

The theory, called the giant-impact hypothesis, explains many fundamental 
features of the moon and Earth.

But one glaring mystery at the center of this hypothesis has endured: 
What ever happened to Theia? Direct evidence of its existence has remained 
elusive. No leftover fragments from the planet have been found in the solar 
system. And many scientists assumed any debris Theia left behind on Earth was 
blended in the fiery cauldron of our planet’s interior.
'''
txt, result = loaded_model_inference(txt)

NEWS:
CNN —- Scientists widely agree that an ancient planet likely smashed into Earth as it was forming billions of years ago, spewing debris that coalesced into the moon that decorates our night sky today. The theory, called the giant-impact hypothesis, explains many fundamental features of the moon and Earth. But one glaring mystery at the center of this hypothesis has endured: What ever happened to Theia? Direct evidence of its existence has remained elusive. No leftover fragments from the planet have been found in the solar system. And many scientists assumed any debris Theia left behind on Earth was blended in the fiery cauldron of our planet’s interior.

SUMMARY:
Theia likely smashed into Earth as it was forming billions of years ago. No evidence of its existence has remained elusive. Scientists assumed any debris left behind on Earth was blended in the fiery cauldron of our planet's interior.


In [49]:
txt = '''
American teenager Isabeau Levito has won her first senior major international 
figure staking competition, claiming the Grand Prix de France title on Saturday 
to set up a potential US clean sweep this weekend.

Despite finishing third in the Free Skate, the 16-year-old’s overall total of 
203.22 was enough for victory and qualification to December’s Grand Prix Final.

Belgium’s Nina Pinzarrone was second with 198.80 points, while Rion Sumiyoshi 
of Japan was third with 197.79 points.

“I’m very proud I was able to improve from last season result-wise. It was my 
goal to win one of my Grands Prix at least to really secure that I would make it to the 
Final. I’m really happy I improved in this aspect,” Levito said, per Olympics.com
'''
txt, result = loaded_model_inference(txt)

NEWS:
American teenager Isabeau Levito has won her first senior major international figure staking competition, claiming the Grand Prix de France title on Saturday to set up a potential US clean sweep this weekend. Despite finishing third in the Free Skate, the 16-year-old’s overall total of 203.22 was enough for victory and qualification to December’s Grand Prix Final. Belgium’s Nina Pinzarrone was second with 198.80 points, while Rion Sumiyoshi of Japan was third with 197.79 points. “I’m very proud I was able to improve from last season result-wise. It was my goal to win one of my Grands Prix at least to really secure that I would make it to the Final. I’m really happy I improved in this aspect,” Levito said, per Olympics.com

SUMMARY:
Isabeau Levito has won her first senior major international figure staking competition. The 16-year-old won the Grand Prix de France title on Saturday. Levito's overall total of 203.22 was enough for victory and qualification to December's Grand Prix F