# core

> Main codebase for fetching and saving RSS feeds

In [None]:
# | default_exp core


In [None]:
# | hide
from nbdev.showdoc import *


In [None]:
# | export

import reader
from typing import *
from bs4 import BeautifulSoup
from rich import print
from rich.panel import Panel
from rich.text import Text
import logging

logger = logging.getLogger(__name__)


def strip_html(s: str):
    s = BeautifulSoup(s, features="lxml")
    return s.text


In [None]:
# | export

class Feed:
    """RSS feed class 
    """

    def __init__(self, url: str, name: str = None, tags: List[str] = []):
        self.url = url
        self.name = name
        self.tags = tags

    def add_tag(self, tag: str):
        if tag not in self.tags:
            self.tags.append(tag)

    def remove_tag(self, tag: str):
        if tag in self.tags:
            self.tags.remove(tag)


In [None]:
# | export

class PyNewsReader:
    def __init__(self, dbpath=None, feeds=List[Feed]):
        if dbpath is None:
            logger.info("Database path not specified, using ./db.sqlite")
            dbpath = "db.sqlite"

        self._reader = reader.make_reader(
            "db.sqlite", plugins=["reader.enclosure_dedupe", "reader.entry_dedupe"]
        )

        self._reader.enable_search()
        self._feed_names = {}

    def _print_entries(self, entries: List[reader.Entry], mark_as_read: bool = True, limit: int = 10):
        """
        Pretty print entries - supports reader.Reader.get_entries arguments
        """
        displayed_links = set()
        for e in entries:
            if e.link in displayed_links:
                # Don't display duplicates
                self._reader.mark_entry_as_read(e)
            else:
                displayed_links.add(e.link)
                if e.published:
                    published_date = Text(
                        "Date: " + e.published.isoformat()[:10], justify="center"
                    )
                else:
                    published_date = Text("Date: Unknown", justify="center")
                if mark_as_read:
                    self._reader.mark_entry_as_read(e)

                feed_title = self._get_feed_title(e.original_feed_url)
                print(
                    Panel(
                        published_date
                        + "\n\n"
                        + Text(strip_html(e.summary) + "\n", justify=None),
                        title=f"[link={e.link}]{e.title}[/link]",
                        subtitle=feed_title,
                    )
                )
                print()
            if len(displayed_links) == limit:
                return

    def _search_to_entry(self, search_result):
        for i in self._reader.get_entries():
            if i.id == search_result.id and i.feed_url == search_result.feed_url:
                return i

    def _get_feed_title(self, url: str):
        """Get display title for pynewsreader feed

        Args:
            url (str): URL of pynewsreader feed

        Returns:
            str: Display title
        """
        if url in self._feed_names:
            return self._feed_names[url]
        elif self._reader.get_feed(url).title:
            return self._reader.get_feed(url).title
        else:
            return self._reader.get_feed(url).url

    def update(self):
        """Update feeds and search"""
        self._reader.update_feeds()
        self._reader.update_search()

    def _get_entries(self, important: bool = None, read: Union[None, bool] = None, limit: int = 10):
        """Get entries in reader.Entry format

        Args:
            read (reader.Entry.read, optional): Filter on `read` status (None, True, False). Defaults to None.
            limit (int, optional): Number of entries to return. Defaults to 10.

        Returns:
            List[reader.Entry]: List of entries
        """
        return self._reader.get_entries(read=read, limit=limit, important=important)

    def add_feed(self, feed: Feed):
        """Add feed to pynewsreader

        Args:
            feed (Feed): pynewsreader Feed to add
        """
        self._reader.add_feed(feed.url, exist_ok=True)
        if feed.name:
            self._feed_names[feed.url] = feed.name

    def remove_feed(self, feed: Feed):
        """Remove feed from pynewsreader instance

        Args:
            feed (Feed): Feed to remove
        """

        self._reader.delete_feed(feed.url)

    def feeds(self):
        """List pynewsreader feeds

        Returns:
            List[str]: List of names of current pynewsreader feeds
        """
        return [self._get_feed_title(i.url) for i in self._reader.get_feeds()]

    def list(self, limit: int = 5, read: bool = None, important: bool = None, mark_as_read: bool = False):
        """Pretty print entries

        Args:
            limit (int, optional): Number of entries to show. Defaults to 5.
            read (bool, optional): Show read entries (True), unread entries (False), or all entries (None). Defaults to None.
            mark_as_read (bool, optional): Mark displayed entries as read. Defaults to False.
        """
        self._print_entries(self._get_entries(
            read=read, important=important, limit=limit*2), limit=limit, mark_as_read=mark_as_read)

    def search(self, query: str, mark_as_read: bool = True, limit: int = 10):
        """Search entries and pretty print results

        Args:
            query (str): Search query
            mark_as_read (bool, optional): Mark results as read? Defaults to True.
        """
        self._print_entries([self._search_to_entry(i) for i in self._reader.search_entries(
            query)], mark_as_read=mark_as_read, limit=limit)

    def mark_important(self, entry: reader.Entry):
        """Mark entry as important

        Args:
            entry (reader.Entry): Entry to mark as important
        """
        reader.Reader.mark_entry_as_important(entry)

    def mark_unimportant(self, entry: reader.Entry):
        """Mark entry as important

        Args:
            entry (reader.Entry): Entry to mark as important
        """
        reader.Reader.mark_entry_as_unimportant(entry)

        
    def tag(self, entry: reader.Entry, tag_key: str, tag_value: Dict = None):
        """Tag an entry

        Args:
            entry (reader.Entry): Entry to tag
            tag_key (str): Key of tag
            tag_value (Dict, optional): Value of tag. Defaults to None.
        """
        reader.Reader.set_tag(entry, tag_key, tag_value)


In [None]:
r = PyNewsReader()


## Add Feeds

In [None]:
# With names
for i in [
    Feed(url='https://ricochet.media/en/feed', name="Richochet Media"),
    Feed(url='https://thetyee.ca/rss2.xml', name="The Tyee"),
    Feed(url="https://www.thestar.com/content/thestar/feed.RSSManagerServlet.articles.topstories.rss",
         name="Toronto Star | Top Stories"),
    Feed(url="https://www.thestar.com/content/thestar/feed.RSSManagerServlet.articles.news.investigations.rss",
         name="Toronto Star | Investigations"),
    Feed(url="https://www.thestar.com/content/thestar/feed.RSSManagerServlet.articles.opinion.editorials.rss",
         name="Toronto Star | Editorials"),
    Feed(url="https://www.macleans.ca/feed/",
         name="Macleans")
]:
    r.add_feed(i)

# Use the default name from the feed
for i in [
    "https://rss.cbc.ca/lineup/topstories.xml",
    "https://rss.cbc.ca/lineup/world.xml",
    "https://rss.cbc.ca/lineup/canada.xml",
    "https://rss.cbc.ca/lineup/business.xml",
    "https://rss.cbc.ca/lineup/technology.xml",
    "https://www.cbc.ca/cmlink/rss-canada-ottawa",
    "https://thenarwhal.ca/feed/",
]:
    r.add_feed(Feed(i))


## List Feeds

In [None]:
r.feeds()


['Richochet Media',
 'CBC | Business News',
 'CBC | Canada News',
 'CBC | Ottawa News',
 'CBC | Technology News',
 'CBC | Top Stories News',
 'CBC | World News',
 'Macleans',
 'The Narwhal',
 'The Tyee',
 'Toronto Star | Investigations',
 'Toronto Star | Editorials',
 'Toronto Star | Top Stories']

## Update Feeds

In [None]:
r.update()


## Display Entries

`read` = None shows all entries

`read` = False shows only unread entries

`read` = True shows only read entries

In [None]:
r.list(read=False, limit=5, mark_as_read=True)


## Search Entries

In [None]:
r.search("fusion")


In [None]:
# | hide

# To Do:
# * Command line interface
# * User interface
# * Tags


In [None]:
# | hide
import nbdev

nbdev.nbdev_export()
