In [None]:
import pandas as pd
import numpy as np
from unidecode import unidecode
import tensorflow as tf 
from sklearn.model_selection import train_test_split
from transformers import T5TokenizerFast, TFAutoModelForSeq2SeqLM
from tensorflow.keras.callbacks import EarlyStopping
import re
import warnings
warnings.filterwarnings("ignore")

In [2]:
def get_data(url: str, n_rows: int):
    data = pd.read_csv(url).iloc[:n_rows, 1:]
    data = data.sample(frac=1).copy()
    data.columns = ["input", "target"]
    data = data.loc[(~data["input"].isna()) & (~data["target"].isna())].copy()
    return data

In [3]:
data = get_data(url="data/train.csv", n_rows=20000)

In [4]:
data.head()

Unnamed: 0,input,target
5798,JERUSALEM (CNN) -- Clashes between Hamas milit...,NEW: Fighting nears densely populated Gaza Cit...
4640,"By . Sara Malm . PUBLISHED: . 05:36 EST, 25 Fe...",One died and four were injured at a wedding in...
9924,"Seoul, South Korea (CNN) -- North Korea says i...","SK minister: ""The agreement must be kept""\nThe..."
2451,An underground website that was dealing cocain...,"Silk Road owner, Ross William Ulbricht, 29, kn..."
6359,By . Associated Press . PUBLISHED: . 08:32 EST...,Steve Capus announced his resignation on Frida...


In [5]:
def preprocess_data(data: pd.DataFrame):
    data = data.copy()
    data["input"] = "summarize: " + data["input"].map(unidecode).copy()
    data["target"] = data["target"].map(unidecode).copy()
    return data

In [6]:
data = preprocess_data(data=data)

In [7]:
def split_data(data: pd.DataFrame, input_col: str="input", target_col: str="target", test_size: float=0.1):
    x_train, x_test, y_train, y_test = train_test_split(data[input_col], data[target_col], 
                                                        random_state=42, test_size=test_size)
    
    print(f'x_train.shape: {x_train.shape}, x_test.shape: {x_test.shape}, '+
          f'y_train.shape: {y_train.shape}, y_test.shape: {y_test.shape}')
    x_train, x_test, y_train, y_test = x_train.to_list(), x_test.to_list(), y_train.to_list(), y_test.to_list()
    return x_train, x_test, y_train, y_test

In [8]:
x_train, x_test, y_train, y_test = split_data(data=data)

x_train.shape: (18000,), x_test.shape: (2000,), y_train.shape: (18000,), y_test.shape: (2000,)


In [9]:
x_train[0], y_train[0]

("summarize: Back in the UK: John Cleese and his wife Jennifer Wade . The crippling cost of divorce has already forced John Cleese into crisis measures, such as launching his stand-up comedy 'Alimony Tour', and even moving to another country. But it would seem he is still looking for ways to keep his finances afloat. The Monty Python star has embarked on a sale of film props and signed photos he accumulated during his career. The items include a fibre-glass helmet used in the film Monty Python And The Holy Grail, which is being offered at PS999. There is also a 1970 photo which shows the Monty Python stars doing silly walks, priced at PS29.99, and a signed photo of the classic Fawlty Towers scene in which Cleese as Basil Fawlty thrashes his red Austin 1300 with a tree branch. The photos are among seven of Cleese performing in Python and Fawlty Towers sketches which, with the other items, are for sale on the Original Memorabilia Company website. It is the same website the 73-year-old ac

In [10]:
strategy = tf.distribute.MirroredStrategy()
CHECKPOINT = "t5-small"
INPUT_N_TOKENS = 300 # considering only 300 tokens due to memory constraints
TARGET_N_TOKENS = 150 # considering only 150 tokens due to memory constraints
BATCH_SIZE = 16 * strategy.num_replicas_in_sync

In [11]:
def tokenize(input: list, target: list, input_n_tokens: int, target_n_tokens: int):
    tokenizer = T5TokenizerFast.from_pretrained(CHECKPOINT)
#     print(f'Example:\n{input[0]}\n{tokenizer.tokenize(input[0])}')
    tokenized_data = tokenizer(text=input, max_length=input_n_tokens, truncation=True, padding="max_length")
    tokenized_data["labels"] = tokenizer(text_target=target, max_length=target_n_tokens, truncation=True, padding="max_length")["input_ids"]
    return tokenized_data    

In [12]:
tokenize(input=x_train[:2], target=y_train[:2], input_n_tokens=INPUT_N_TOKENS, 
         target_n_tokens=TARGET_N_TOKENS)

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

{'input_ids': [[21603, 10, 3195, 16, 8, 1270, 10, 1079, 205, 109, 15, 7, 15, 11, 112, 2512, 13560, 26765, 3, 5, 37, 31777, 53, 583, 13, 7759, 65, 641, 5241, 1079, 205, 109, 15, 7, 15, 139, 5362, 3629, 6, 224, 38, 3, 14138, 112, 1518, 18, 413, 12373, 3, 31, 188, 40, 23, 21208, 3351, 31, 6, 11, 237, 1735, 12, 430, 684, 5, 299, 34, 133, 1727, 3, 88, 19, 341, 479, 21, 1155, 12, 453, 112, 14272, 3, 9, 12660, 5, 37, 5788, 63, 20737, 2213, 65, 17046, 15, 26, 30, 3, 9, 1048, 13, 814, 6377, 7, 11, 3814, 1302, 3, 88, 3, 22148, 383, 112, 1415, 5, 37, 1173, 560, 3, 9, 10851, 18, 15548, 18691, 261, 16, 8, 814, 5788, 63, 20737, 275, 37, 6679, 350, 12977, 6, 84, 19, 271, 1860, 44, 5610, 19446, 5, 290, 19, 92, 3, 9, 7434, 1202, 84, 1267, 8, 5788, 63, 20737, 4811, 692, 17056, 10681, 6, 10565, 44, 5610, 357, 21316, 6, 11, 3, 9, 3814, 1202, 13, 8, 2431, 1699, 210, 40, 17, 63, 10677, 7, 3112, 16, 84, 205, 109, 15, 7, 15, 38, 23711, 1699, 210, 40, 17, 63, 3, 189, 52, 23604, 112, 1131, 8513, 209, 5426, 28, 

In [13]:
tokenized_train = tokenize(input=x_train, target=y_train, input_n_tokens=INPUT_N_TOKENS, 
         target_n_tokens=TARGET_N_TOKENS)
tokenized_test = tokenize(input=x_test, target=y_test, input_n_tokens=INPUT_N_TOKENS, 
         target_n_tokens=TARGET_N_TOKENS)

In [14]:
tokenized_train[0]

Encoding(num_tokens=300, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing])

In [15]:
tokenized_train[:5]

[Encoding(num_tokens=300, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing]),
 Encoding(num_tokens=300, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing]),
 Encoding(num_tokens=300, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing]),
 Encoding(num_tokens=300, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing]),
 Encoding(num_tokens=300, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing])]

In [16]:
def return_tf_tensors(data):
    data = tf.data.Dataset.from_tensor_slices(dict(data))
    data = data.prefetch(tf.data.AUTOTUNE)
    return data

In [17]:
train_tf_data = return_tf_tensors(tokenized_train)
test_tf_data = return_tf_tensors(tokenized_test)

In [18]:
for i in train_tf_data.take(1):
    print(i)

{'input_ids': <tf.Tensor: shape=(300,), dtype=int32, numpy=
array([21603,    10,  3195,    16,     8,  1270,    10,  1079,   205,
         109,    15,     7,    15,    11,   112,  2512, 13560, 26765,
           3,     5,    37, 31777,    53,   583,    13,  7759,    65,
         641,  5241,  1079,   205,   109,    15,     7,    15,   139,
        5362,  3629,     6,   224,    38,     3, 14138,   112,  1518,
          18,   413, 12373,     3,    31,   188,    40,    23, 21208,
        3351,    31,     6,    11,   237,  1735,    12,   430,   684,
           5,   299,    34,   133,  1727,     3,    88,    19,   341,
         479,    21,  1155,    12,   453,   112, 14272,     3,     9,
       12660,     5,    37,  5788,    63, 20737,  2213,    65, 17046,
          15,    26,    30,     3,     9,  1048,    13,   814,  6377,
           7,    11,  3814,  1302,     3,    88,     3, 22148,   383,
         112,  1415,     5,    37,  1173,   560,     3,     9, 10851,
          18, 15548, 18691,   

In [19]:
def fit_model(train_data, val_data, epochs=2, eta=1e-4, early_stopping_patience=1, batch_size=BATCH_SIZE):
    with strategy.scope():
        model = TFAutoModelForSeq2SeqLM.from_pretrained(CHECKPOINT)
        model.compile(optimizer=tf.keras.optimizers.Adam(eta))

    print(model.summary())
    early_stop = EarlyStopping(monitor="val_loss", patience=early_stopping_patience, mode="min")
    model.fit(train_data.shuffle(len(train_data)).batch(batch_size), validation_data=val_data.shuffle(len(val_data)).batch(batch_size), 
          epochs=epochs, callbacks=[early_stop])
    return model

In [20]:
model = fit_model(train_data=train_tf_data, val_data=test_tf_data)

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFT5ForConditionalGeneration.

All the weights of TFT5ForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


Model: "tft5_for_conditional_generation"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 shared (Embedding)          multiple                  16449536  
                                                                 
 encoder (TFT5MainLayer)     multiple                  35330816  
                                                                 
 decoder (TFT5MainLayer)     multiple                  41625344  
                                                                 
Total params: 60,506,624
Trainable params: 60,506,624
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/2
Epoch 2/2


**INFERENCE**

In [21]:
def inference_tokenize(input: list, n_tokens: int):
    tokenizer = T5TokenizerFast.from_pretrained(CHECKPOINT)
    tokenized_data = tokenizer(text=input, max_length=n_tokens, truncation=True, padding="max_length", return_tensors="tf")
    return tokenizer, tokenized_data    

In [23]:
def inference(txt: str):
    test_data = ["summarize: " + txt]
    inference_tokenizer, tokenized_data = inference_tokenize(input=test_data, n_tokens=INPUT_N_TOKENS)
    pred = model.generate(**tokenized_data, max_new_tokens=TARGET_N_TOKENS)
    result = inference_tokenizer.decode(pred[0])
    result = re.sub("<.*?>", "", result).strip()
    print(f"INPUT TEXT:\n{txt}\n\nSUMMARY:\n{result}")
    return (txt, result)

In [24]:
txt = '''
Heavy rainfall in several parts of north India has plunged the region into chaos, with more than 28 reported dead in the past three days. Cities and towns are grappling with the aftermath as roads and buildings remain submerged in knee-deep water, including the capital Delhi where the situation is expected to worsen as the weather department predicts more downpour in the coming days.

All schools in Delhi and Gurugram have been closed today due to the heavy rain-induced waterlogging. Videos show Delhi’s Connaught Place submerged in water prompting a Twitter user to brand it “Connaught River”.
'''
txt, result = inference(txt)

INPUT TEXT:

Heavy rainfall in several parts of north India has plunged the region into chaos, with more than 28 reported dead in the past three days. Cities and towns are grappling with the aftermath as roads and buildings remain submerged in knee-deep water, including the capital Delhi where the situation is expected to worsen as the weather department predicts more downpour in the coming days.

All schools in Delhi and Gurugram have been closed today due to the heavy rain-induced waterlogging. Videos show Delhi’s Connaught Place submerged in water prompting a Twitter user to brand it “Connaught River”.


SUMMARY:
Heavy rainfall in several parts of north India has plunged the region into chaos. More than 28 people have died in the past three days. All schools in Delhi and Gurugram have been closed due to the heavy rain-induced waterlogging.


In [25]:
txt = '''
Tata Consultancy Services (TCS) is holding up onboarding of lateral hires with 1.8 to up to 15 years of experience by three months amidst project commencement delays, multiple sources in the know told Moneycontrol. This is coming at a time when the Indian IT sector has already taken a hit by macroeconomic headwinds and is facing project deferrals and ramp-downs as their clients cut down on tech budgets.

Over 200 lateral recruits across cities including Bangalore, Pune, Kochi, Bhubaneswar, Delhi NCR, and Indore to name a few are impacted by the delays.

These joiners were hired between January and April and were initially facing onboarding delays by a month. Many of them got two to three subsequent new joining dates. On July 10, however, many received emails stating that their joining dates are getting pushed to October.

The information about a deferral in onboarding date hasn’t even been communicated proactively to those waiting to join the company, according to multiple people Moneycontrol spoke to.'''
txt, result = inference(txt)

INPUT TEXT:

Tata Consultancy Services (TCS) is holding up onboarding of lateral hires with 1.8 to up to 15 years of experience by three months amidst project commencement delays, multiple sources in the know told Moneycontrol. This is coming at a time when the Indian IT sector has already taken a hit by macroeconomic headwinds and is facing project deferrals and ramp-downs as their clients cut down on tech budgets.

Over 200 lateral recruits across cities including Bangalore, Pune, Kochi, Bhubaneswar, Delhi NCR, and Indore to name a few are impacted by the delays.

These joiners were hired between January and April and were initially facing onboarding delays by a month. Many of them got two to three subsequent new joining dates. On July 10, however, many received emails stating that their joining dates are getting pushed to October.

The information about a deferral in onboarding date hasn’t even been communicated proactively to those waiting to join the company, according to multiple

In [26]:
txt = '''
The Union government has collected Rs 4.75 lakh crore in direct taxes in the first quarter of 2023-24, the Finance Ministry said on July 9.

As per the ministry, as on July 9, the direct tax collected was 15.87 percent higher compared to the same period last financial year. This collection is 26.05 percent of the total budget estimates of direct taxes for FY 2023-24.

Refunds amounting to Rs. 42,000 crore have been issued during April 1 to July 9, which are 2.55 percent higher than refunds issued during the same period in the preceding year, the ministry said.

The overall rate of growth is faster than what the government had anticipated. In the Union Budget for 2023-24, presented by Finance Minister Nirmala Sitharaman on February 1, direct tax collections were seen rising 10.5 percent from 2022-23.
'''
txt, result = inference(txt)

INPUT TEXT:

The Union government has collected Rs 4.75 lakh crore in direct taxes in the first quarter of 2023-24, the Finance Ministry said on July 9.

As per the ministry, as on July 9, the direct tax collected was 15.87 percent higher compared to the same period last financial year. This collection is 26.05 percent of the total budget estimates of direct taxes for FY 2023-24.

Refunds amounting to Rs. 42,000 crore have been issued during April 1 to July 9, which are 2.55 percent higher than refunds issued during the same period in the preceding year, the ministry said.

The overall rate of growth is faster than what the government had anticipated. In the Union Budget for 2023-24, presented by Finance Minister Nirmala Sitharaman on February 1, direct tax collections were seen rising 10.5 percent from 2022-23.


SUMMARY:
The Union government has collected Rs 4.75 lakh crore in direct taxes in the first quarter of 2023-24. The direct tax collected was 15.87 percent higher compared to 

**SAVE MODEL**

In [27]:
model.save_weights("models/t5_news_summarizer.h5", save_format="h5")

**LOAD SAVED MODEL**

In [2]:
CHECKPOINT = "t5-small"
INPUT_N_TOKENS = 300
TARGET_N_TOKENS = 150

In [3]:
def prod_inference_tokenize(input: list, n_tokens: int):
    tokenizer = T5TokenizerFast.from_pretrained(CHECKPOINT)
    tokenized_data = tokenizer(text=input, max_length=n_tokens, truncation=True, padding="max_length", return_tensors="tf")
    return tokenizer, tokenized_data    

In [5]:
loaded_model = TFAutoModelForSeq2SeqLM.from_pretrained(CHECKPOINT)
# loaded_model.compile(optimizer=tf.keras.optimizers.Adam(1e-4))
loaded_model.load_weights("models/t5_news_summarizer.h5", by_name=True)

All PyTorch model weights were used when initializing TFT5ForConditionalGeneration.

All the weights of TFT5ForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


In [6]:
loaded_model.summary()

Model: "tft5_for_conditional_generation_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 shared (Embedding)          multiple                  16449536  
                                                                 
 encoder (TFT5MainLayer)     multiple                  35330816  
                                                                 
 decoder (TFT5MainLayer)     multiple                  41625344  
                                                                 
Total params: 60,506,624
Trainable params: 60,506,624
Non-trainable params: 0
_________________________________________________________________


In [92]:
def loaded_model_inference(txt: str):
    test_data = ["summarize: " + txt]
    inference_tokenizer, tokenized_data = prod_inference_tokenize(input=test_data, n_tokens=INPUT_N_TOKENS)
    pred = loaded_model.generate(**tokenized_data, max_new_tokens=TARGET_N_TOKENS)
#     print(pred[0])
    result = inference_tokenizer.decode(pred[0])
    result = re.sub("<.*?>", "", result).strip()
    print(f"INPUT TEXT:\n{txt}\n\nSUMMARY:\n{result}")
    return (txt, result)

In [93]:
txt = '''
Heavy rainfall in several parts of north India has plunged the region into chaos, with more than 28 reported dead in the past three days. Cities and towns are grappling with the aftermath as roads and buildings remain submerged in knee-deep water, including the capital Delhi where the situation is expected to worsen as the weather department predicts more downpour in the coming days.

All schools in Delhi and Gurugram have been closed today due to the heavy rain-induced waterlogging. Videos show Delhi’s Connaught Place submerged in water prompting a Twitter user to brand it “Connaught River”.
'''
txt, result = loaded_model_inference(txt)

INPUT TEXT:

Heavy rainfall in several parts of north India has plunged the region into chaos, with more than 28 reported dead in the past three days. Cities and towns are grappling with the aftermath as roads and buildings remain submerged in knee-deep water, including the capital Delhi where the situation is expected to worsen as the weather department predicts more downpour in the coming days.

All schools in Delhi and Gurugram have been closed today due to the heavy rain-induced waterlogging. Videos show Delhi’s Connaught Place submerged in water prompting a Twitter user to brand it “Connaught River”.


SUMMARY:
Heavy rainfall in several parts of north India has plunged the region into chaos. More than 28 people have died in the past three days. All schools in Delhi and Gurugram have been closed due to the heavy rain-induced waterlogging.


In [10]:
txt = '''
Tata Consultancy Services (TCS) is holding up onboarding of lateral hires with 1.8 to up to 15 years of experience by three months amidst project commencement delays, multiple sources in the know told Moneycontrol. This is coming at a time when the Indian IT sector has already taken a hit by macroeconomic headwinds and is facing project deferrals and ramp-downs as their clients cut down on tech budgets.

Over 200 lateral recruits across cities including Bangalore, Pune, Kochi, Bhubaneswar, Delhi NCR, and Indore to name a few are impacted by the delays.

These joiners were hired between January and April and were initially facing onboarding delays by a month. Many of them got two to three subsequent new joining dates. On July 10, however, many received emails stating that their joining dates are getting pushed to October.

The information about a deferral in onboarding date hasn’t even been communicated proactively to those waiting to join the company, according to multiple people Moneycontrol spoke to.'''
txt, result = loaded_model_inference(txt)

INPUT TEXT:

Tata Consultancy Services (TCS) is holding up onboarding of lateral hires with 1.8 to up to 15 years of experience by three months amidst project commencement delays, multiple sources in the know told Moneycontrol. This is coming at a time when the Indian IT sector has already taken a hit by macroeconomic headwinds and is facing project deferrals and ramp-downs as their clients cut down on tech budgets.

Over 200 lateral recruits across cities including Bangalore, Pune, Kochi, Bhubaneswar, Delhi NCR, and Indore to name a few are impacted by the delays.

These joiners were hired between January and April and were initially facing onboarding delays by a month. Many of them got two to three subsequent new joining dates. On July 10, however, many received emails stating that their joining dates are getting pushed to October.

The information about a deferral in onboarding date hasn’t even been communicated proactively to those waiting to join the company, according to multiple

In [11]:
txt = '''
The Union government has collected Rs 4.75 lakh crore in direct taxes in the first quarter of 2023-24, the Finance Ministry said on July 9.

As per the ministry, as on July 9, the direct tax collected was 15.87 percent higher compared to the same period last financial year. This collection is 26.05 percent of the total budget estimates of direct taxes for FY 2023-24.

Refunds amounting to Rs. 42,000 crore have been issued during April 1 to July 9, which are 2.55 percent higher than refunds issued during the same period in the preceding year, the ministry said.

The overall rate of growth is faster than what the government had anticipated. In the Union Budget for 2023-24, presented by Finance Minister Nirmala Sitharaman on February 1, direct tax collections were seen rising 10.5 percent from 2022-23.
'''
txt, result = loaded_model_inference(txt)

INPUT TEXT:

The Union government has collected Rs 4.75 lakh crore in direct taxes in the first quarter of 2023-24, the Finance Ministry said on July 9.

As per the ministry, as on July 9, the direct tax collected was 15.87 percent higher compared to the same period last financial year. This collection is 26.05 percent of the total budget estimates of direct taxes for FY 2023-24.

Refunds amounting to Rs. 42,000 crore have been issued during April 1 to July 9, which are 2.55 percent higher than refunds issued during the same period in the preceding year, the ministry said.

The overall rate of growth is faster than what the government had anticipated. In the Union Budget for 2023-24, presented by Finance Minister Nirmala Sitharaman on February 1, direct tax collections were seen rising 10.5 percent from 2022-23.


SUMMARY:
The Union government has collected Rs 4.75 lakh crore in direct taxes in the first quarter of 2023-24. The direct tax collected was 15.87 percent higher compared to 

In [12]:
txt = '''
India, the world's second-largest sugar producer and a major exporter in recent years, will likely have a smaller role in the sugar export market going forward as its government-led ethanol program continues to expand, a report said on Monday.

According to the report Asia Biofuel Outlook, produced by research firm BMI, a unit of Fitch Solutions, India's pursuit of increased ethanol blending in gasoline, as a way to cut the oil products' import bill and reduce carbon emissions, will continue to support global sugar prices.

BMI says that there is currently a fast development of additional capacity to produce ethanol in India, where the biofuel is made mainly from sugarcane.

As more ethanol plants start production, more of the country's sugarcane crop will be used to make the fuel, limiting the amount of sugar that will be produced.
'''
txt, result = loaded_model_inference(txt)

INPUT TEXT:

India, the world's second-largest sugar producer and a major exporter in recent years, will likely have a smaller role in the sugar export market going forward as its government-led ethanol program continues to expand, a report said on Monday.

According to the report Asia Biofuel Outlook, produced by research firm BMI, a unit of Fitch Solutions, India's pursuit of increased ethanol blending in gasoline, as a way to cut the oil products' import bill and reduce carbon emissions, will continue to support global sugar prices.

BMI says that there is currently a fast development of additional capacity to produce ethanol in India, where the biofuel is made mainly from sugarcane.

As more ethanol plants start production, more of the country's sugarcane crop will be used to make the fuel, limiting the amount of sugar that will be produced.


SUMMARY:
India will likely have a smaller role in the sugar export market, a report says. The report is produced by research firm BMI. The r

In [13]:
txt = '''
On July 4, police arrested a Pakistani woman who entered Indian illegally in May and was found living in Greater Noida with a man she met online via a gaming app, PUBG. The 27-year-old woman had searched YouTube for ways to gain entry into India. However, the duo were released and the woman has requested the government to grant her and her kids the citizenship of India as she is in love with Sachin Meena. Seema says that she has adopted Hinduism and alleged that her ex-husband beats her in Pakistan. Here's the full story of a love story which started on PUBG.'''
txt, result = loaded_model_inference(txt)

INPUT TEXT:

On July 4, police arrested a Pakistani woman who entered Indian illegally in May and was found living in Greater Noida with a man she met online via a gaming app, PUBG. The 27-year-old woman had searched YouTube for ways to gain entry into India. However, the duo were released and the woman has requested the government to grant her and her kids the citizenship of India as she is in love with Sachin Meena. Seema says that she has adopted Hinduism and alleged that her ex-husband beats her in Pakistan. Here's the full story of a love story which started on PUBG.

SUMMARY:
Pakistani woman entered Indian illegally in May and was found living in Greater Noida. The 27-year-old had searched YouTube for ways to gain entry into India. The duo were released and the woman has requested the government to grant her and her kids the citizenship of India. Seema says that she has adopted Hinduism and alleged that her ex-husband beat her in Pakistan.


In [94]:
txt = '''
Google's Med-PaLM 2 is reportedly being tested in various hospitals since April of this year.

The AI chatbot designed to answer medical questions is based on Google's PaLM 2, the Large Language Model (LLM) it announced at Google I/O in May. The model will also power Bard, its conversational and generative AI chatbot.
As reported by The Wall Street Journal, which managed to get their hands on an internal mail, Med-PaLM 2 was trained on a set of expert medical demonstrations and has been designed to be used for Healthcare related problems.
'''
txt, result = loaded_model_inference(txt)

INPUT TEXT:

Google's Med-PaLM 2 is reportedly being tested in various hospitals since April of this year.

The AI chatbot designed to answer medical questions is based on Google's PaLM 2, the Large Language Model (LLM) it announced at Google I/O in May. The model will also power Bard, its conversational and generative AI chatbot.
As reported by The Wall Street Journal, which managed to get their hands on an internal mail, Med-PaLM 2 was trained on a set of expert medical demonstrations and has been designed to be used for Healthcare related problems.


SUMMARY:
Med-PaLM 2 is being tested in various hospitals since April of this year. The AI chatbot is based on Google's PaLM 2, the Large Language Model (LLM) it announced at Google I/O in May. The model will also power Bard, its conversational and generative AI chatbot.
