In [None]:

import transformers
import json
from transformers import pipeline
import os

class ArticleSummarizer:
    def __init__(self, model_name="facebook/bart-large-cnn"):
        """
        Initialize the summarization pipeline.
        
        Args:
            model_name (str): Hugging Face model name for summarization.
        """
        self.summarizer = pipeline("summarization", model=model_name, tokenizer=model_name, framework="pt")

    def summarize_text(self, text, min_chars=200, max_chars=300):
        """
        Summarize the given text if it meets the character length criteria.
        
        Args:
            text (str): The original article content.
            min_chars (int): Minimum number of characters to qualify for summarization.
            max_chars (int): Desired maximum number of characters for the summary.
        
        Returns:
            str: The summarized text or the original text if it doesn't meet criteria.
        """
        if len(text) < min_chars:
            return text  # Skip summarization

        # Estimate token length based on character count (approx. 1.3 chars per token)
        min_tokens = int(min_chars / 1.3)
        max_tokens = int(max_chars / 1.3)

        try:
            summary = self.summarizer(
                text,
                max_length=max_tokens,
                min_length=min_tokens,
                do_sample=False
            )[0]['summary_text']
            
            # Ensure the summary is within the desired character range
            if len(summary) > max_chars:
                summary = summary[:max_chars].rsplit(' ', 1)[0] + '...'
            return summary

        except Exception as e:
            print(f"Error during summarization: {e}")
            return text  # Return original text in case of error

def extract_and_summarize(json_input_path, json_output_path):
    """
    Extracts content from the input JSON, summarizes it, and saves to the output JSON.
    
    Args:
        json_input_path (str): Path to the input JSON file.
        json_output_path (str): Path to save the output JSON file with summaries.
    """
    # Check if input file exists
    if not os.path.exists(json_input_path):
        print(f"Input file {json_input_path} does not exist.")
        return

    # Load articles from JSON
    with open(json_input_path, 'r', encoding='utf-8') as f:
        try:
            articles = json.load(f)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {e}")
            return

    summarizer = ArticleSummarizer()

    # Iterate through articles and summarize
    for idx, article in enumerate(articles):
        content = article.get('content', '')
        summary = summarizer.summarize_text(content)
        article['summary'] = summary
        print(f"Article {idx + 1}: Summary generated.")

    # Save the updated articles to the output JSON
    with open(json_output_path, 'w', encoding='utf-8') as f:
        json.dump(articles, f, ensure_ascii=False, indent=4)
    print(f"Summarized articles saved to {json_output_path}")

if __name__ == "__main__":
    # Define input and output JSON file paths
    input_json = "news_articles.json"          # Replace with your input JSON file path
    output_json = "summarized_news_articles.json"  # Desired output JSON file path

    extract_and_summarize(input_json, output_json)
