In [None]:
import pandas as pd
from transformers import pipeline

# Define the summarization model
# T5 is excellent for abstractive summarization. BART is another great choice.
MODEL_NAME = "facebook/bart-large-cnn" 

# --- Helper Functions ---

def load_data(filepath='cleaned_customer_feedback.csv'):
    """Loads cleaned data and filters for long feedback to summarize."""
    try:
        df = pd.read_csv(filepath)
        # Filter for longer reviews (e.g., > 30 words) for meaningful summarization
        df['word_count'] = df['cleaned_text'].apply(lambda x: len(str(x).split()))
        long_feedback_df = df[df['word_count'] > 30].head(5)
        
        if long_feedback_df.empty:
            print("Note: No feedback records found with more than 30 words. Using shorter samples.")
            long_feedback_df = df.head(5)

        return long_feedback_df['cleaned_text'].tolist()

    except FileNotFoundError:
        print(f"Error: {filepath} not found. Please run data_preprocessing.py first.")
        return []

def summarize_feedback(texts):
    """
    Implements abstractive summarization using a pre-trained Transformer model.
    """
    # Initialize the summarization pipeline
    # NOTE: This uses the Hugging Face pipeline which handles tokenization and model inference.
    try:
        summarizer = pipeline("summarization", model=MODEL_NAME)
        print(f"--- Summarization Pipeline Initialized with {MODEL_NAME} ---")
    except Exception as e:
        print(f"Error initializing pipeline: {e}. Ensure you have transformers and torch installed.")
        return

    results = []
    
    for i, text in enumerate(texts):
        # Abstractive Summarization (Short Summary)
        # min_length and max_length control the output summary size
        short_summary = summarizer(
            text, 
            max_length=30, 
            min_length=10, 
            do_sample=False
        )[0]['summary_text']
        
        # Abstractive Summarization (Detailed Summary)
        detailed_summary = summarizer(
            text, 
            max_length=60, 
            min_length=30, 
            do_sample=False
        )[0]['summary_text']
        
        results.append({
            'Original Text': text,
            'Short Summary': short_summary,
            'Detailed Summary': detailed_summary
        })

    return results

# --- Main Execution ---

if __name__ == '__main__':
    # Add a longer simulated review for demonstration
    long_review_1 = "I recently purchased the new smart thermostat from your company, and while the installation process was incredibly simple and the design is sleek, I am quite disappointed with the mobile application integration. It constantly disconnects from the Wi-Fi network, and I have to manually restart the device every morning. This is an unacceptable performance for a premium product. I expect a fix immediately or I will have to return it. I have been a loyal customer for five years and this is the first time I've been truly frustrated. The customer support chat was also very slow to respond to my initial query, taking over 48 hours to get an actual human response."
    
    # Create a small DataFrame with the long review for testing
    test_df = pd.DataFrame({'cleaned_text': [long_review_1]})
    test_texts = test_df['cleaned_text'].tolist()
    
    # You would typically load from the file, but using the hardcoded one for immediate demo
    # sample_texts = load_data() 

    print("\n--- Sample Input-Output Examples for Summarization ---")
    summaries = summarize_feedback(test_texts)
    
    if summaries:
        for item in summaries:
            print("\n" + "="*50)
            print(f"ORIGINAL: {item['Original Text'][:150]}...")
            print(f"SHORT SUMMARY: {item['Short Summary']}")
            print(f"DETAILED SUMMARY: {item['Detailed Summary']}")
            print("="*50)

    # Deliverable is the notebook/script showing these input-output examples.
