# core

> Main codebase for fetching and saving RSS feeds

In [None]:
#| default_exp core

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
# | export

import reader
from typing import *
from bs4 import BeautifulSoup
from rich import print
from rich.panel import Panel
from rich.text import Text
import logging

logger = logging.getLogger(__name__)


def strip_html(s: str):
    s = BeautifulSoup(s)
    return s.text


class PyNewsReader(reader.Reader):
    def __init__(self, dbpath=None, feeds=None):
        if dbpath is None:
            logger.info("Database path not specified, using ./db.sqlite")
            dbpath = "db.sqlite"

        self.reader = reader.make_reader(
            "db.sqlite", plugins=["reader.enclosure_dedupe", "reader.entry_dedupe"]
        )

        if feeds is None or self.reader.get_feed_counts().total == 0:
            feeds = [
                "https://rss.cbc.ca/lineup/topstories.xml",
                "https://rss.cbc.ca/lineup/world.xml",
                "https://rss.cbc.ca/lineup/canada.xml",
                "https://rss.cbc.ca/lineup/business.xml",
                "https://rss.cbc.ca/lineup/technology.xml",
                "https://www.cbc.ca/cmlink/rss-canada-ottawa",
                "https://www.thestar.com/content/thestar/feed.RSSManagerServlet.articles.topstories.rss",
                "https://www.thestar.com/content/thestar/feed.RSSManagerServlet.articles.news.investigations.rss",
                "https://www.thestar.com/content/thestar/feed.RSSManagerServlet.articles.opinion.editorials.rss",
                "https://www.macleans.ca/feed/",
                "https://thetyee.ca/rss2.xml",
                "https://thenarwhal.ca/feed/",
                "https://ricochet.media/en/feed",
            ]

        for feed in feeds:
            logger.info("Feeds not specified, starting you off with some Canadian news")
            self.reader.add_feed(feed, exist_ok=True)

        super().__init__(
            self.reader._storage,
            self.reader._search,
            self.reader._parser,
            self.reader._reserved_name_scheme,
        )
        self.enable_search()

    def _print_entries(self, entries: List[reader.Entry], mark_as_read: bool = True, limit: int = 10):
        """
        Pretty print entries - supports reader.Reader.get_entries arguments
        """
        displayed_links = set()
        for e in entries:
            if e.link in displayed_links:
                # Don't display duplicates
                r.mark_entry_as_read(e)
            else:
                displayed_links.add(e.link)
                if e.published:
                    published_date = Text(
                        "Date: " + e.published.isoformat()[:10], justify="center"
                    )
                else:
                    published_date = Text("Date: Unknown", justify="center")
                if mark_as_read:
                    r.mark_entry_as_read(e)
                print(
                    Panel(
                        published_date
                        + "\n\n"
                        + Text(strip_html(e.summary) + "\n", justify=None),
                        title=f"[link={e.link}]{e.title}[/link]",
                        subtitle=e.feed.title or e.feed_url,
                    )
                )
                print()
            if len(displayed_links) == limit:
                return

    def _search_to_entry(self, search_result):
        for i in r.get_entries():
            if i.id == search_result.id and i.feed_url == search_result.feed_url:
                return i

    def show(self, limit: int = 5, read: bool = None, **kwargs) -> type(None):
        """Pretty print entries - supports reader.Reader.get_entries arguments"""
        self._print_entries(self.get_entries(**kwargs, read=read, limit=limit*2), limit=limit)

    def search(self, query: str, mark_as_read: bool = True):
        """Search entries and pretty print results"""
        entries = []
        for e in r.search_entries(query):
            if mark_as_read:
                r.mark_entry_as_read(e)
            entries.append(self._search_to_entry(e))
        self._print_entries(entries)

    def update(self):
        """Update feeds and search"""
        self.update_feeds()
        self.update_search()

In [None]:
r = PyNewsReader()



## List feeds

In [None]:
[i.title or i.url for i in r.get_feeds()]

['https://ricochet.media/en/feed',
 'CBC | Business News',
 'CBC | Canada News',
 'CBC | Ottawa News',
 'CBC | Technology News',
 'CBC | Top Stories News',
 'CBC | World News',
 'Macleans.ca',
 'The Narwhal',
 "The Tyee | BC's home for today's news, analysis and updates",
 'TORONTO STAR',
 'TORONTO STAR',
 'TORONTO STAR']

## Update Feeds

In [None]:
r.update()

## Get Entries

In [None]:
for i in r.get_entries(limit=1):
    print(i)

## Display Entries

`read` = None shows all entries

`read` = False shows only unread entries

`read` = True shows only read entries

In [None]:
r.show(limit=5, read=None)

## Search Entries

In [None]:
r.search("fusion")

In [None]:
#| hide

## To Do:
## * Change title of feed
## * Command line interface
## * User interface
## * Share / favourite entries
## * Tags

In [None]:
#| hide
import nbdev

nbdev.nbdev_export()