In [1]:
# # # !pip install transformers accelerate
# !pip install datasets

In [2]:
from datasets import load_dataset

In [3]:
multinews = load_dataset("multi_news", split = 'test',trust_remote_code=True)

In [4]:
multinews.to_pandas()

Unnamed: 0,document,summary
0,GOP Eyes Gains As Voters In 11 States Pick Gov...,– It's a race for the governor's mansion in 11...
1,\n \n \n \n UPDATE: 4/19/2001 Read Richard Met...,– It turns out Facebook is only guilty of abou...
2,It's the Golden State's latest version of the ...,– Not a big fan of Southern California? Neithe...
3,The seed for this crawl was a list of every ho...,– Why did Microsoft buy Nokia's phone business...
4,After a year in which liberals scored impressi...,– The Supreme Court is facing a docket of high...
...,...,...
5617,Tweet with a location \n \n You can add locati...,– The traditional end-of-summit group photo at...
5618,Loic Venance/AFP/Getty Images \n \n The awards...,– Sofia Coppola scored a historic victory at t...
5619,(CNN) A federal criminal investigation into a ...,– The duck boat sinking that killed 17 on a Mi...
5620,An archive of the public statements deleted by...,– Note to tweeting politicians: Watch what you...


In [5]:
from transformers import AutoTokenizer

In [6]:
tokenizer = AutoTokenizer.from_pretrained('t5-small')

In [7]:
multi_news = multinews.train_test_split(test_size=0.2)

In [8]:
prefix = 'summarize'
def process_function(examples):
    input = [prefix + doc for doc in examples['document']]
    model_input = tokenizer(input , max_length = 1024, truncation = True)
    labels = tokenizer(text = examples['summary'], max_length = 128)
    model_input['labels'] = labels['input_ids']

    return model_input

In [9]:
tokenized_multi_news = multi_news.map(process_function , batched = True)

Map:   0%|          | 0/4497 [00:00<?, ? examples/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Map:   0%|          | 0/1125 [00:00<?, ? examples/s]

In [10]:
from transformers import DataCollatorForSeq2Seq , AutoModelForSeq2SeqLM ,Seq2SeqTrainingArguments, Seq2SeqTrainer

In [11]:
data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer , model = 't5_small')
model = AutoModelForSeq2SeqLM.from_pretrained('t5-small')

In [12]:
trainings_args = Seq2SeqTrainingArguments(
    output_dir = "./results",
    eval_strategy = 'epoch',
    learning_rate = 2e-5,
    per_device_train_batch_size = 10,
    per_device_eval_batch_size = 10,
    weight_decay = 0.1,
    save_total_limit = 3,
    num_train_epochs = 10,
    fp16 = True
)

In [13]:
trainer = Seq2SeqTrainer(
    model = model, 
    args = trainings_args,
    train_dataset = tokenized_multi_news['train'],
    eval_dataset = tokenized_multi_news['test'],
    tokenizer = tokenizer,
    data_collator = data_collator,
)

In [None]:
trainer.train()

Epoch,Training Loss,Validation Loss


In [None]:
text = """
    Deep learning is a subset of machine learning that uses multilayered neural networks, called deep neural networks, to simulate the complex decision-making power of the human brain. Some form of deep learning powers most of the artificial intelligence (AI) applications in our lives today.

The chief difference between deep learning and machine learning is the structure of the underlying neural network architecture. “Nondeep,” traditional machine learning models use simple neural networks with one or two computational layers. Deep learning models use three or more layers—but typically hundreds or thousands of layers—to train the models.

While supervised learning models require structured, labeled input data to make accurate outputs, deep learning models can use unsupervised learning. With unsupervised learning, deep learning models can extract the characteristics, features and relationships they need to make accurate outputs from raw, unstructured data. Additionally, these models can even evaluate and refine their outputs for increased precision.

Deep learning is an aspect of data science that drives many applications and services that improve automation, performing analytical and physical tasks without human intervention. This enables many everyday products and services—such as digital assistants, voice-enabled TV remotes, credit card fraud detection, self-driving cars and generative AI. 

Neural networks, or artificial neural networks, attempt to mimic the human brain through a combination of data inputs, weights and bias—all acting as silicon neurons. These elements work together to accurately recognize, classify and describe objects within the data.

Deep neural networks consist of multiple layers of interconnected nodes, each building on the previous layer to refine and optimize the prediction or categorization. This progression of computations through the network is called forward propagation. The input and output layers of a deep neural network are called visible layers. The input layer is where the deep learning model ingests the data for processing, and the output layer is where the final prediction or classification is made.

Another process called backpropagation uses algorithms, such as gradient descent, to calculate errors in predictions, and then adjusts the weights and biases of the function by moving backwards through the layers to train the model. Together, forward propagation and backpropagation enable a neural network to make predictions and correct for any errors . Over time, the algorithm becomes gradually more accurate.

Deep learning requires a tremendous amount of computing power. High-performance graphical processing units (GPUs) are ideal because they can handle a large volume of calculations in multiple cores with copious memory available. Distributed cloud computing might also assist. This level of computing power is necessary to train deep algorithms through deep learning. However, managing multiple GPUs on premises can create a large demand on internal resources and be incredibly costly to scale. For software requirements, most deep learning apps are coded with one of these three learning frameworks: JAX, PyTorch or TensorFlow.
"""

In [None]:
inputs_ids = tokenize(text,max_length=1024, truncation = True, return_tensors= 'pt').input_ids
input_ids = input_ids.to('cuda')

In [None]:
import torch
with torch.no_grad():
    if model.device.type == 'cuda':
        output = model.generate(input_ids, max_length=128, num_beams = 5)

summary_ids = output[0].tolist()

summary = tokenizer.decode(summary_ids, skip_special_tokens = True)
print(summary)

In [None]:
ref_summary  = """
    Deep learning is a subset of machine learning that 
    uses multilayered neural networks to simulate the complex decision-making power of the human brain. It powers most artificial intelligence (AI) applications in our lives today. Deep learning models use hundreds or thousands of layers, typically hundreds or thousands, to train the models. They can use unsupervised learning to extract characteristics, features, and relationships from raw, unstructured data, and can evaluate and refine their outputs for increased precision. Deep learning drives many applications and services that improve automation, performing analytical and physical tasks without human intervention. It requires a significant amount of computing power, such as high-performance GPUs or distributed cloud computing, to train deep algorithms through deep learning. Most deep learning apps are coded with one of
    three learning frameworks: JAX, PyTorch, or TensorFlow.

"""

In [None]:
from rouge import Rouge

rouge = Rouge()
scores = rouge.get_scores(summary, ref_summary)
scores

In [None]:
trainer.save_model()

In [None]:
model.save_pretrained("summarizer")