In [25]:
!pip install transformers



In [27]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer
from torch.utils.data import DataLoader, Dataset

In [28]:
# Sample data for training
sample_data = [
    {'input_text': 'This is an example input text.', 'target_text': 'Example summary.'},
    {'input_text': 'Another input text for summarization.', 'target_text': 'Summary of the second text.'},
    # Add more samples as needed
]

In [29]:
# Define your dataset class
class CustomDataset(Dataset):
    def __init__(self, data):
        self.data = data
        self.tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Access input and target text from the sample dictionary
        sample = self.data[idx]
        input_text = sample['input_text']
        target_text = sample['target_text']

        # Tokenize input and target text using the tokenizer
        input_encoding = self.tokenizer(input_text, truncation=True, padding='max_length', max_length=512)
        target_encoding = self.tokenizer(target_text, truncation=True, padding='max_length', max_length=64)

        # Convert tokenized sequences to tensors
        input_ids = torch.tensor(input_encoding['input_ids'])
        attention_mask = torch.tensor(input_encoding['attention_mask'])
        target_ids = torch.tensor(target_encoding['input_ids'])  # Use input_ids for target since it's an autoregressive model

        return input_ids, attention_mask, target_ids

In [30]:
# Create an instance of CustomDataset with the sample data
dataset = CustomDataset(sample_data)

In [31]:
# Define hyperparameters and variables
batch_size = 2
learning_rate = 5e-5
num_epochs = 5

In [32]:
# Load model
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')

In [33]:
# Define dataloader
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [34]:
# Define optimizer and scheduler
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

In [35]:
# Training loop
for epoch in range(num_epochs):
    model.train()
    for batch in dataloader:
        input_ids, attention_mask, target_ids = batch
        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, decoder_input_ids=target_ids[:, :-1])
        logits = outputs.logits
        loss = torch.nn.functional.cross_entropy(logits.view(-1, logits.shape[-1]), target_ids[:, 1:].reshape(-1))
        loss.backward()
        optimizer.step()
    scheduler.step()

In [37]:
# Inference function
def generate_summary(input_text):
    model.eval()
    inputs = dataset.tokenizer([input_text], max_length=1024, return_tensors='pt', truncation=True)
    summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=150, early_stopping=True)
    summary = dataset.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

In [38]:
input_text = "The rapid advancement of artificial intelligence (AI) has transformed various industries, including healthcare, finance, and transportation. AI technologies such as machine learning and natural language processing are being used to analyze large datasets, automate tasks, and improve decision-making processes. For instance, in healthcare, AI-powered systems can assist doctors in diagnosing diseases more accurately and quickly. In finance, AI algorithms are used for fraud detection and risk assessment. Moreover, in transportation, AI plays a crucial role in the development of autonomous vehicles, leading to safer and more efficient transportation systems. Overall, AI continues to revolutionize industries and has the potential to drive significant innovation and growth in the future"
summary = generate_summary(input_text)

# Print the generated summary
print("Generated Summary:")
print(summary)

Generated Summary:
The rapid advancement of artificial intelligence has transformed various industries, including healthcare, finance, and transportation. In healthcare, AI-powered systems can assist doctors in diagnosing diseases more accurately and quickly. In finance, AI algorithms are used for fraud detection and risk assessment. In transportation, AI plays a crucial role in the development of autonomous vehicles.


In [39]:
len(input_text)

790

In [40]:
len(summary)

402