In [1]:
import torch 
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
MODEL_NAME="facebook/bart-large-cnn"
tokenizer=AutoTokenizer.from_pretrained(MODEL_NAME)
model=AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
model.to(device)
model.eval()

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


BartForConditionalGeneration(
  (model): BartModel(
    (shared): BartScaledWordEmbedding(50264, 1024, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): BartScaledWordEmbedding(50264, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
        

In [4]:
def summarize_text(
    text: str,
    max_input_length=1024,
    max_summary_length=150,
    min_summary_length=40
)->str:
    inputs=tokenizer(
        text,
        truncation=True,
        padding="max_length",
        max_length=max_input_length,
        return_tensors="pt"
    )
    inputs={k: v.to(device) for k,v in inputs.items()}
    
    with torch.no_grad():
        summary_ids=model.generate(
            **inputs,
            max_length=max_summary_length,
            min_length=min_summary_length,
            num_beams=4,
            length_penalty=2.0,
            early_stopping=True
        )
    summary=tokenizer.decode(
        summary_ids[0],
        skip_special_tokens=True
        )
    return summary

In [5]:
text = """
A very well made movie. Loved every minute of it.
It was a roller coaster ride. The acting was great,
the direction was sharp, and the story kept me hooked.
There were a few slow moments, but overall it was a
fantastic experience that I would happily watch again.
"""

print(summarize_text(text))

A very well made movie. Loved every minute of it. The acting was great, the direction was sharp, and the story kept me hooked. There were a few slow moments, but overall it was aantastic experience that I would happily watch again.


In [6]:
def chunk_text(
    text: str,
    tokenizer,
    max_tokens: int = 1024,
    overlap: int = 50
):
    """
    Splits text into token-based chunks with overlap.
    Overlap helps preserve context between chunks.
    """
    tokens = tokenizer.encode(text, add_special_tokens=False)

    chunks = []
    start = 0
    while start < len(tokens):
        end = start + max_tokens
        chunk_tokens = tokens[start:end]
        chunk_text = tokenizer.decode(chunk_tokens)
        chunks.append(chunk_text)
        start = end - overlap  # overlap for context

    return chunks


In [None]:
def summarize_chunk(
    text: str,
    max_input_length: int = 1024,
    max_summary_length: int = 150,
    min_summary_length: int = 40
) -> str:
    inputs = tokenizer(
        text,
        truncation=True,
        padding="max_length",
        max_length=max_input_length,
        return_tensors="pt"
    )

    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        summary_ids = model.generate(
            **inputs,
            max_length=max_summary_length,
            min_length=min_summary_length,
            num_beams=4,
            length_penalty=2.0,
            early_stopping=True
        )

    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)


In [8]:
def summarize_long_text(
    text: str,
    max_tokens: int = 1024,
    overlap: int = 50,
    final_summary: bool = True
) -> str:
    chunks = chunk_text(
        text,
        tokenizer=tokenizer,
        max_tokens=max_tokens,
        overlap=overlap
    )

    chunk_summaries = []
    for i, chunk in enumerate(chunks):
        summary = summarize_chunk(chunk)
        chunk_summaries.append(summary)

    merged_summary = " ".join(chunk_summaries)

    # Optional second-pass summarization
    if final_summary and len(chunks) > 1:
        merged_summary = summarize_chunk(
            merged_summary,
            max_summary_length=180,
            min_summary_length=60
        )

    return merged_summary


In [9]:
long_text = """
I went into this movie with fairly high expectations, mostly because of the cast and the amount of praise it had been receiving online. From the very first scene, it was clear that a lot of effort had gone into the visual presentation. The cinematography was impressive, with well-composed shots and a color palette that set the mood effectively. The background score also complemented the scenes nicely, never overpowering the dialogue but still adding emotional weight where needed.

The story itself starts off strong, introducing the main characters and their motivations in a way that feels natural rather than forced. The first act does a good job of building intrigue and making you care about what happens next. The lead actor delivers a convincing performance, showing a good range of emotions and making the character feel believable. Supporting characters also have their moments, especially one standout performance that adds depth to what could have been a very generic role.

However, as the movie progresses into the second act, the pacing begins to slow down noticeably. Certain scenes feel unnecessarily stretched, and there are moments where the narrative seems to lose focus. While some of these slower moments help in character development, others feel repetitive and could have been trimmed without affecting the overall story. This is where the movie tests the viewer’s patience, especially for those who prefer tighter storytelling.

The screenplay has its strengths, particularly in its dialogue. Many conversations feel authentic and grounded, avoiding overly dramatic or unrealistic exchanges. That said, there are also a few lines that feel cliché and predictable, which slightly detracts from the otherwise solid writing. The themes explored in the movie—such as ambition, regret, and personal growth—are handled with a decent level of maturity, even if they aren’t particularly groundbreaking.

One of the highlights of the film is its direction. The director clearly has a strong vision and isn’t afraid to let scenes breathe. There are several moments of silence that speak louder than words, allowing the audience to absorb the emotions of the characters. The use of close-up shots during key emotional moments is especially effective and adds intimacy to the storytelling.

As the film moves toward its climax, it regains some of the momentum it lost earlier. The conflicts introduced in the first half finally come to a head, and the stakes feel real. The emotional payoff, while not perfect, is satisfying enough to justify the buildup. The final act ties up most loose ends, though a few questions are left unanswered, which some viewers may find frustrating.

The ending itself is bittersweet and stays true to the tone of the movie. It avoids taking the easy route and instead opts for a conclusion that feels realistic, even if it’s not entirely uplifting. This choice may divide audiences, but it fits the story the film was trying to tell.

Overall, this movie is a well-made piece of cinema with strong performances, good direction, and high production values. While it does suffer from pacing issues and a few predictable moments, it still manages to leave a lasting impression. It’s not a perfect film, but it’s one that stays with you after the credits roll. I wouldn’t call it a masterpiece, but it’s definitely worth watching, especially if you appreciate character-driven stories and thoughtful filmmaking.
"""

print(summarize_long_text(long_text))


The story itself starts off strong, introducing the main characters and their motivations in a way that feels natural rather than forced. The lead actor delivers a convincing performance, showing a good range of emotions and making the character feel believable. The themes explored in the movie are handled with a decent level of maturity, even if they aren’t particularly groundbreaking.
