In [None]:
from celery import Celery
import feedparser
from sqlalchemy import create_engine, Column, Integer, String, DateTime
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
import logging
import nltk
from nltk.tokenize import word_tokenize
from nltk.classify import NaiveBayesClassifier
from collections import defaultdict

# Set up Celery app
app = Celery('tasks', broker='amqp://guest@localhost//')

# Set up database connection
engine = create_engine('postgresql://user:password@host:port/dbname')
Base = declarative_base()

class Article(Base):
    __tablename__ = 'articles'
    id = Column(Integer, primary_key=True)
    title = Column(String)
    content = Column(String)
    publication_date = Column(DateTime)
    source_url = Column(String)
    category = Column(String)

Base.metadata.create_all(engine)

Session = sessionmaker(bind=engine)
session = Session()

# Set up logging
logging.basicConfig(filename='app.log', level=logging.INFO)

def log_event(event):
    logging.info(event)

def handle_error(error):
    logging.error(error)

# Load NLTK data
nltk.download('punkt')

# Define categories
categories = ['Terrorism / protest / political unrest / riot',
               'Positive/Uplifting',
               'Natural Disasters',
               'Others']

# Train Naive Bayes classifier
def train_classifier():
    train_data = []
    for category in categories:
        with open(f'{category}.txt', 'r') as f:
            texts = f.readlines()
            for text in texts:
                tokens = word_tokenize(text)
                train_data.append((dict([(word, True) for word in tokens]), category))
    classifier = NaiveBayesClassifier.train(train_data)
    return classifier

classifier = train_classifier()

# Define task to process article
@app.task
def process_article(article):
    try:
        # Category classification using NLTK
        category = classify_article(article['content'])
        article['category'] = category
        store_article(article)
        log_event(f'Article {article["title"]} processed successfully')
    except Exception as e:
        handle_error(f'Error processing article {article["title"]}: {str(e)}')

def classify_article(content):
    tokens = word_tokenize(content)
    features = defaultdict(int)
    for token in tokens:
        features[token] += 1
    return classifier.classify(dict(features))

def store_article(article):
    existing_article = session.query(Article).filter_by(title=article['title']).first()
    if existing_article is None:
        new_article = Article(**article)
        session.add(new_article)
        session.commit()

# Define task to parse feeds and extract articles
@app.task
def parse_feeds(feeds):
    articles = []
    for feed in feeds:
        parsed_feed = feedparser.parse(feed)
        for entry in parsed_feed.entries:
            article = {
                'title': entry.title,
                'content': entry.summary,
                'publication_date': entry.published,
                'source_url': entry.link
            }
            articles.append(article)
    return articles

# Define task to send articles to task queue
@app.task
def send_articles_to_queue(articles):
    for article in articles:
        process_article.delay(article)

# Run the application
if __name__ == '__main__':
    feeds = [
        'http://rss.cnn.com/rss/cnn_topstories.rss',
        'http://qz.com/feed',
        'http://feeds.foxnews.com/foxnews/politics',
        'http://feeds.reuters.com/reuters/businessNews',
        'http://feeds.feedburner.com/NewshourWorld',
        'https://feeds.bbci.co.uk/news/world/asia/india/rss.xml'
    ]
    articles = parse_feeds(feeds)
    send_articles_to_queue(articles)