# Data Visualization with Plotly

## Day 1

In [35]:
# Import modules - Standard Library
from collections import Counter
from datetime import datetime, timedelta
from pprint import pprint as pp
import re

In [2]:
# Import modules - Third-Party
import feedparser
import plotly
import plotly.graph_objs as go

In [58]:
# Constants
BLOG_CATEGORIES = {
    'articles': 'article',
    'codechallenge': 'challenge',
    'guest': 'guest',
    'special': 'special',
    'twitter': 'twitter'
}
BLOG_FEED = feedparser.parse(
    url_file_stream_or_string='https://pybit.es/feed'
)
DATE_STRING = 'Thu, 30 Jun 2022 06:56:11 +0000'
URL_STRING = 'https://pybit.es/articles/the-importance-of-disconnecting-as-a-developer/'

In [18]:
# Assign blog entries to a variable
entries = BLOG_FEED['entries']

In [19]:
# Display the first blog entry
entries[0]

{'title': 'The Importance of Disconnecting as a Developer',
 'title_detail': {'type': 'text/plain',
  'language': None,
  'base': 'https://pybit.es/feed/',
  'value': 'The Importance of Disconnecting as a Developer'},
 'links': [{'rel': 'alternate',
   'type': 'text/html',
   'href': 'https://pybit.es/articles/the-importance-of-disconnecting-as-a-developer/'}],
 'link': 'https://pybit.es/articles/the-importance-of-disconnecting-as-a-developer/',
 'comments': 'https://pybit.es/articles/the-importance-of-disconnecting-as-a-developer/#respond',
 'authors': [{'name': 'Hugh Tipping'}],
 'author': 'Hugh Tipping',
 'author_detail': {'name': 'Hugh Tipping'},
 'published': 'Thu, 30 Jun 2022 06:56:11 +0000',
 'published_parsed': time.struct_time(tm_year=2022, tm_mon=6, tm_mday=30, tm_hour=6, tm_min=56, tm_sec=11, tm_wday=3, tm_yday=181, tm_isdst=0),
 'tags': [{'term': 'Tips', 'scheme': None, 'label': None},
  {'term': 'Mindset', 'scheme': None, 'label': None},
  {'term': 'Productivity', 'scheme'

### Parsed Feed Notes

- The `published` key uses a string value for a date:
    `'Thu, 30 Jun 2022 06:56:11 +0000'`
- The `published_parsed` key uses a `time.struct` object to store a structured date:
    `time.struct_time(tm_year=2022, tm_mon=6, tm_mday=30, tm_hour=6, tm_min=56, tm_sec=11, tm_wday=3, tm_yday=181, tm_isdst=0)`

#### Working with date/time objects is easiest with `datetime.datetime` objects

In [60]:
# Create a helper function to convert strings to datetime objects
def convert_to_datetime(
    date_string: str = DATE_STRING
) -> datetime:
    """ Convert a string to a datetime object.
    
        Args:
            date_string (str):
                A string representing a date, extracted from
                entries['published'].  Default is DATE_STRING.

        Returns:
            date_time (datetime):
                A datetime object representing the converted date_string.
    """

    # Remove the timezone offset from date_string
    date = DATE_STRING.split(
        sep='+'  # Split the string to a list object on the '+' character
    )[0].strip()  # Keep the first index (everything to the left of the '+') and remove the trailing whitespace

    return datetime.strptime(date, '%a, %d %b %Y %H:%M:%S')


# Convert a string to a datetime object to a string
date_time = convert_to_datetime()

In [41]:
# check the date_time object type
type(date_time)

datetime.datetime

In [42]:
# Add 5 days to the date_time value
date_time + timedelta(
    days=5
)

datetime.datetime(2022, 7, 5, 6, 56, 11)

In [59]:
# Extract an entry's category from its `link` key
def get_category(
    link: str = URL_STRING
) -> str:
    """ Extract an entry's category from its `link` key.
    
    Args:
        link (str):
            A link with a category embedded in the URL, extracted from
            entries['link'].  Default is URL_STRING.

        Returns:
            category (str):
                Category value, extracted from the link.
    """

    # Extract the category from the link with re.sub
    category_match = re.sub(
        pattern=r'https?://pybit.es/([a-z]+)/.*',
        repl=r'\1',
        string=link
    )

    # Attempt to match category_match to a key in BLOG_CATEGORIES, default is 'article'
    category = BLOG_CATEGORIES.get(category_match, 'article')

    return category

category = get_category()
category

'article'