In [2]:
# Fix compatibility issues and optimize performance
import warnings
import os

# Suppress TensorFlow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings('ignore')

# Set environment variables for better performance
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
os.environ['TRANSFORMERS_CACHE'] = './model_cache'

print("✅ Environment optimized for better performance")

✅ Environment optimized for better performance


# Small Model  T5

In [6]:
# Optimized Fast Summarization with Smaller Models
import json
from transformers import pipeline, set_seed
import time

# Set seed for reproducibility
set_seed(42)

print("🚀 Loading optimized summarization model...")
start_time = time.time()

# Use a smaller, faster model for better performance
try:
    # Option 1: Faster BART model
    summarizer = pipeline(
        "summarization", 
        model="sshleifer/distilbart-cnn-6-6",  # Smaller, faster version of BART
        device=-1,  # Force CPU usage (set to 0 for GPU if available)
        batch_size=1,
        max_length=512,
        truncation=True
    )
    model_name = "DistilBART (Optimized)"
except Exception as e:
    print(f"⚠️ DistilBART not available, falling back to T5: {e}")
    # Fallback to T5-small
    summarizer = pipeline(
        "summarization",
        model="t5-small",
        device=-1,
        batch_size=1,
        max_length=512,
        truncation=True
    )
    model_name = "T5-Small (Fallback)"

load_time = time.time() - start_time
print(f"✅ Model loaded in {load_time:.2f} seconds: {model_name}")

# Load test data
with open("test.json", "r", encoding="utf-8") as f:
    data = [json.loads(line.strip()) for line in f]

# Use first 5 samples for faster testing
samples = data[:5]
print(f"📊 Processing {len(samples)} samples for performance testing...")

# Process samples with timing
total_start = time.time()
results = []

for i, sample in enumerate(samples, 1):
    sample_start = time.time()
    dialogue = sample["dialogue"]
    
    # Optimized summarization parameters
    try:
        summary_result = summarizer(
            dialogue,
            max_length=50,      # Reduced for speed
            min_length=10,
            do_sample=False,    # Deterministic output
            truncation=True,
            clean_up_tokenization_spaces=True
        )
        summary = summary_result[0]["summary_text"]
    except Exception as e:
        print(f"⚠️ Error processing sample {i}: {e}")
        summary = "Error in summarization"
    
    sample_time = time.time() - sample_start
    
    print(f"\n📋 Sample {i} (processed in {sample_time:.2f}s):")
    print(f"Original: {dialogue[:100]}...")
    print(f"Generated: {summary}")
    print(f"Reference: {sample['summary']}")
    
    results.append({
        'sample': i,
        'processing_time': sample_time,
        'generated_summary': summary,
        'reference_summary': sample['summary']
    })

total_time = time.time() - total_start
avg_time = total_time / len(samples)

print(f"\n⏱️ Performance Summary:")
print(f"Total time: {total_time:.2f} seconds")
print(f"Average per sample: {avg_time:.2f} seconds")
print(f"Samples per minute: {60/avg_time:.1f}")
print(f"Model used: {model_name}")

🚀 Loading optimized summarization model...


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/460M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/460M [00:00<?, ?B/s]

⚠️ DistilBART not available, falling back to T5: Could not load model sshleifer/distilbart-cnn-6-6 with any of the following classes: (<class 'transformers.models.auto.modeling_auto.AutoModelForSeq2SeqLM'>, <class 'transformers.models.auto.modeling_tf_auto.TFAutoModelForSeq2SeqLM'>, <class 'transformers.models.bart.modeling_bart.BartForConditionalGeneration'>, <class 'transformers.models.bart.modeling_tf_bart.TFBartForConditionalGeneration'>). See the original errors:

while loading with AutoModelForSeq2SeqLM, an error is thrown:
Traceback (most recent call last):
  File "C:\Users\vmcsa\AppData\Roaming\Python\Python312\site-packages\transformers\pipelines\base.py", line 292, in infer_framework_load_model
    model = model_class.from_pretrained(model, **kwargs)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\vmcsa\AppData\Roaming\Python\Python312\site-packages\transformers\models\auto\auto_factory.py", line 600, in from_pretrained
    return model_class.from_pr

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/460M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Device set to use cpu


✅ Model loaded in 27.72 seconds: T5-Small (Fallback)
📊 Processing 5 samples for performance testing...

📋 Sample 1 (processed in 3.51s):
Original: Hannah: Hey, do you have Betty's number?
Amanda: Lemme check
Hannah: <file_gif>
Amanda: Sorry, can't...
Generated: Amanda: Lemme check Hannah: file_gif> Amanda: Sorry, can't find it. he called her last time we were at the park together Hannah: I don't know him well.
Reference: Hannah needs Betty's number but Amanda doesn't have it. She needs to contact Larry.

📋 Sample 2 (processed in 2.26s):
Original: Eric: MACHINE!
Rob: That's so gr8!
Eric: I know! And shows how Americans see Russian ;)
Rob: And it'...
Generated: Rob: I know! I especially like the train part! Rob: Hahaha! No one talks to the machine like that! I'll watch some of his stand-ups on youtube.
Reference: Eric and Rob are going to watch a stand-up on youtube.

📋 Sample 3 (processed in 2.12s):
Original: Lenny: Babe, can you help me with something?
Bob: Sure, what's up?
Lenny: Whic

# Fixing NumPy Compatibility Issues With BERT Summarization

In [10]:
import json
from transformers import pipeline

# Load test data from test.json
with open("test.json", "r", encoding="utf-8") as f:
    data = [json.loads(line.strip()) for line in f]

# Use only the first 10 dialogues
samples = data[:10]

# Load summarization pipeline using Facebook's BART model
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Summarize and print results
for i, sample in enumerate(samples):
    dialogue = sample["dialogue"]
    summary = summarizer(dialogue, max_length=60, min_length=10, do_sample=False)[0]["summary_text"]
    print(f"\nSample {i+1}:")
    print("Original Dialogue:", dialogue)
    print("Model Summary   :", summary)
    print("Reference Summary:", sample["summary"])


Device set to use cpu



Sample 1:
Original Dialogue: Hannah: Hey, do you have Betty's number?
Amanda: Lemme check
Hannah: <file_gif>
Amanda: Sorry, can't find it.
Amanda: Ask Larry
Amanda: He called her last time we were at the park together
Hannah: I don't know him well
Hannah: <file_gif>
Amanda: Don't be shy, he's very nice
Hannah: If you say so..
Hannah: I'd rather you texted him
Amanda: Just text him 🙂
Hannah: Urgh.. Alright
Hannah: Bye
Amanda: Bye bye
Model Summary   : Hannah asks Amanda for Betty's number. Amanda can't find it. Hannah asks Larry. Amanda asks Larry to call Betty.
Reference Summary: Hannah needs Betty's number but Amanda doesn't have it. She needs to contact Larry.

Sample 2:
Original Dialogue: Eric: MACHINE!
Rob: That's so gr8!
Eric: I know! And shows how Americans see Russian ;)
Rob: And it's really funny!
Eric: I know! I especially like the train part!
Rob: Hahaha! No one talks to the machine like that!
Eric: Is this his only stand-up?
Rob: Idk. I'll check.
Eric: Sure.
Rob: Turns out 

# Using the SAMSum Dataset with Pegasus Model

In [11]:
import json
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("transformersbook/pegasus-samsum")
model = AutoModelForSeq2SeqLM.from_pretrained("transformersbook/pegasus-samsum")

# Load test data from test.json
with open("test.json", "r", encoding="utf-8") as f:
    data = [json.loads(line.strip()) for line in f]

# Use only the first 10 dialogues
samples = data[:10]

# Generate and print summaries
for i, sample in enumerate(samples):
    dialogue = sample["dialogue"]

    # Tokenize input
    inputs = tokenizer(dialogue, truncation=True, padding="longest", return_tensors="pt")

    # Generate summary
    summary_ids = model.generate(
        inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_length=60,
        min_length=10,
        num_beams=4,
        length_penalty=2.0,
        early_stopping=True
    )

    # Decode summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    # Print results
    print(f"\nSample {i+1}:")
    print("Original Dialogue:", dialogue)
    print("Model Summary    :", summary)
    print("Reference Summary:", sample["summary"])



Sample 1:
Original Dialogue: Hannah: Hey, do you have Betty's number?
Amanda: Lemme check
Hannah: <file_gif>
Amanda: Sorry, can't find it.
Amanda: Ask Larry
Amanda: He called her last time we were at the park together
Hannah: I don't know him well
Hannah: <file_gif>
Amanda: Don't be shy, he's very nice
Hannah: If you say so..
Hannah: I'd rather you texted him
Amanda: Just text him 🙂
Hannah: Urgh.. Alright
Hannah: Bye
Amanda: Bye bye
Model Summary    : Amanda can't find Betty's number. Larry called her last time they were at the park together.
Reference Summary: Hannah needs Betty's number but Amanda doesn't have it. She needs to contact Larry.

Sample 2:
Original Dialogue: Eric: MACHINE!
Rob: That's so gr8!
Eric: I know! And shows how Americans see Russian ;)
Rob: And it's really funny!
Eric: I know! I especially like the train part!
Rob: Hahaha! No one talks to the machine like that!
Eric: Is this his only stand-up?
Rob: Idk. I'll check.
Eric: Sure.
Rob: Turns out no! There are some 

# Using Pipeline for Summarization

In [1]:
from transformers import pipeline
import json

# Load the summarization pipeline for pegasus-samsum
summarizer = pipeline("summarization", model="transformersbook/pegasus-samsum")

# Load the JSON file (test.json)
with open("test.json", "r", encoding="utf-8") as f:
    data = [json.loads(line.strip()) for line in f]

# Take first 10 samples
samples = data[:10]

# Run summarization
for i, sample in enumerate(samples):
    dialogue = sample["dialogue"]
    summary = summarizer(
        dialogue,
        max_new_tokens=60,    # Use max_new_tokens instead of max_length
        min_length=15,
        do_sample=False,
        truncation=True
    )[0]["summary_text"]

    print(f"\nSample {i+1}:")
    print("Original Dialogue:", dialogue)
    print("Model Summary    :", summary)
    print("Reference Summary:", sample["summary"])




Device set to use cuda:0
Your max_length is set to 128, but your input_length is only 122. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=61)



Sample 1:
Original Dialogue: Hannah: Hey, do you have Betty's number?
Amanda: Lemme check
Hannah: <file_gif>
Amanda: Sorry, can't find it.
Amanda: Ask Larry
Amanda: He called her last time we were at the park together
Hannah: I don't know him well
Hannah: <file_gif>
Amanda: Don't be shy, he's very nice
Hannah: If you say so..
Hannah: I'd rather you texted him
Amanda: Just text him 🙂
Hannah: Urgh.. Alright
Hannah: Bye
Amanda: Bye bye
Model Summary    : Amanda can't find Betty's number. Larry called her last time they were at the park together.
Reference Summary: Hannah needs Betty's number but Amanda doesn't have it. She needs to contact Larry.

Sample 2:
Original Dialogue: Eric: MACHINE!
Rob: That's so gr8!
Eric: I know! And shows how Americans see Russian ;)
Rob: And it's really funny!
Eric: I know! I especially like the train part!
Rob: Hahaha! No one talks to the machine like that!
Eric: Is this his only stand-up?
Rob: Idk. I'll check.
Eric: Sure.
Rob: Turns out no! There are some 

Your max_length is set to 128, but your input_length is only 113. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=56)



Sample 7:
Original Dialogue: Max: Know any good sites to buy clothes from?
Payton: Sure :) <file_other> <file_other> <file_other> <file_other> <file_other> <file_other> <file_other>
Max: That's a lot of them!
Payton: Yeah, but they have different things so I usually buy things from 2 or 3 of them.
Max: I'll check them out. Thanks. 
Payton: No problem :)
Max: How about u?
Payton: What about me?
Max: Do u like shopping?
Payton: Yes and no.
Max: How come?
Payton: I like browsing, trying on, looking in the mirror and seeing how I look, but not always buying.
Max: Y not?
Payton: Isn't it obvious? ;)
Max: Sry ;)
Payton: If I bought everything I liked, I'd have nothing left to live on ;)
Max: Same here, but probably different category ;)
Payton: Lol
Max: So what do u usually buy?
Payton: Well, I have 2 things I must struggle to resist!
Max: Which are?
Payton: Clothes, ofc ;)
Max: Right. And the second one?
Payton: Books. I absolutely love reading!
Max: Gr8! What books do u read?
Payton: Ever

Your max_length is set to 128, but your input_length is only 111. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=55)



Sample 9:
Original Dialogue: Beatrice: I am in town, shopping. They have nice scarfs in the shop next to the church. Do you want one?
Leo: No, thanks
Beatrice: But you don't have a scarf.
Leo: Because I don't need it.
Beatrice: Last winter you had a cold all the time. A scarf could help.
Leo: I don't like them.
Beatrice: Actually, I don't care. You will get a scarf.
Leo: How understanding of you!
Beatrice: You were complaining the whole winter that you're going to die. I've had enough.
Leo: Eh.
Model Summary    : Leo doesn't need a scarf because he had a cold all the time last winter. Beatrice will buy him one.
Reference Summary: Beatrice wants to buy Leo a scarf, but he doesn't like scarves. She cares about his health and will buy him a scarf no matter his opinion.

Sample 10:
Original Dialogue: Ivan: hey eric
Eric: yeah man
Ivan: so youre coming to the wedding
Eric: your brother's
Ivan: yea
Eric: i dont know mannn
Ivan: YOU DONT KNOW??
Eric: i just have a lot to do at home, plus i d