In [3]:
from transformers import pipeline

# Initialize summarizer pipeline with BART model
MODEL_NAME = "facebook/bart-large-cnn"
summarizer = pipeline("summarization", model=MODEL_NAME, device=0)

def chunk_text(text, max_chunk_size=1024):
    words = text.split()
    chunks, current_chunk = [], []
    current_size = 0
    for word in words:
        if current_size + len(word) + 1 > max_chunk_size:
            chunks.append(' '.join(current_chunk))
            current_chunk, current_size = [word], len(word)
        else:
            current_chunk.append(word)
            current_size += len(word) + 1
    if current_chunk:
        chunks.append(' '.join(current_chunk))
    return chunks

def generate_summary(text):
    chunks = chunk_text(text)
    summaries = []
    for chunk in chunks:
        if len(chunk.split()) < 10:
            continue
        summary = summarizer(
            chunk,
            max_length=150,
            min_length=30,
            do_sample=False,
            num_beams=4,
            clean_up_tokenization_spaces=True  # Explicitly set to avoid warning
        )
        summaries.append(summary[0]['summary_text'])
    return " ".join(summaries)
