<a href="https://colab.research.google.com/github/rohithr2511/FaceTextSummarization/blob/main/FaceTextSummarization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
## Necessary libraries
import argparse
import os
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from tqdm import tqdm

In [8]:
# Loading model and tokenizer
model_name = "facebook/bart-large-cnn"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)

Device set to use cpu


In [9]:
# Chunking text to fit model's input limits
def chunk_text(text, max_tokens=800):
    words = text.split()
    for i in range(0, len(words), max_tokens):
        yield " ".join(words[i:i + max_tokens])

In [10]:
# Summarization function
def summarize_text(text):
    chunks = list(chunk_text(text))
    summary = ""
    for chunk in tqdm(chunks, desc="Summarizing"):
        result = summarizer(chunk, max_length=200, min_length=100, do_sample=False)[0]
        summary += result['summary_text'] + " "
    return summary.strip()

In [15]:
# Example: File I/O
input_path = "/content/input1.txt"   # input1.txt path
output_path = "/content/output_summary.txt"

In [16]:
# Reading input text
with open(input_path, 'r', encoding='utf-8') as file:
    text = file.read()

In [17]:
# Generating summary
summary = summarize_text(text)

Summarizing:   0%|          | 0/1 [00:00<?, ?it/s]Your max_length is set to 200, but your input_length is only 179. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=89)
Summarizing: 100%|██████████| 1/1 [00:29<00:00, 29.33s/it]


In [18]:
# Saving summary to output file
with open(output_path, 'w', encoding='utf-8') as file:
    file.write(summary)

print(f"✅ Summary saved to: {output_path}")

✅ Summary saved to: /content/output_summary.txt
