In [None]:
!pip install transformers --quiet

In [None]:
import json
from transformers import pipeline

input_path = "cleaned_AI_summarized.json"
output_path = "cleaned_AI_summarized_tagged.json"

In [None]:
with open(input_path, "r", encoding="utf-8") as f:
    articles = json.dump(f)

print(f"Loaded {len(articles)} articles for classification")

In [None]:
candidate_labels = ["Technology", "Business", "Health", "Sports", "Science", "Politics"]

In [None]:
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device_map="auto")

In [None]:
def classify_articles(articles, candidate_labels, batch_size=8):
    classified = []
    summaries = [article['summary'] for article in articles]
    
    for i in range(0, len(summaries), batch_size):
        batch_summaries = summaries[i:i + batch_size]

        try:
            batch_results = classifier(batch_summaries, candidate_labels)

            for j, result in enumerate(batch_results):
                articles[i+j]['category'] = result['labels'][0]
                classified.append(articles[i+j])
        
        except Exception as e:
            print(f"Error processing batch starting with '{batch_summaries[0][:50]}' | {str(e)}")
    
    return classified

In [None]:
tagged_articles = classify_articles(articles, candidate_labels)

with open(output_path, "w", encoding="utf-8") as f:
    json.dump(tagged_articles, f, indent=2, ensure_ascii=False)

print(f"Tagged {len(tagged_articles)} articles with categories.")