# Data Visualization with Plotly

## Day 1

In [30]:
# Import modules - Standard Library
from collections import Counter
from datetime import datetime, timedelta
from pprint import pprint as pp
from typing import List, Tuple, Union
import re

In [2]:
# Import modules - Third-Party
import feedparser
import plotly
import plotly.graph_objs as go

In [3]:
# Constants
BLOG_CATEGORIES = {
    'articles': 'article',
    'codechallenge': 'challenge',
    'guest': 'guest',
    'special': 'special',
    'twitter': 'twitter'
}
BLOG_FEED = feedparser.parse(
    url_file_stream_or_string='https://pybit.es/feed'
)
DATE_STRING = 'Thu, 30 Jun 2022 06:56:11 +0000'
URL_STRING = 'https://pybit.es/articles/the-importance-of-disconnecting-as-a-developer/'

In [4]:
# Assign blog entries to a variable
entries = BLOG_FEED['entries']

In [5]:
# Display the number of entries
print(len(entries))

10


In [6]:
# Display the first blog entry
entries[0]

{'title': '5 tips for overcoming imposter syndrome …',
 'title_detail': {'type': 'text/plain',
  'language': None,
  'base': 'https://pybit.es/feed/',
  'value': '5 tips for overcoming imposter syndrome …'},
 'links': [{'rel': 'alternate',
   'type': 'text/html',
   'href': 'https://pybit.es/articles/overcoming-imposter-syndrome/'}],
 'link': 'https://pybit.es/articles/overcoming-imposter-syndrome/',
 'authors': [{'name': 'Bob Belderbos'}],
 'author': 'Bob Belderbos',
 'author_detail': {'name': 'Bob Belderbos'},
 'published': 'Fri, 08 Jul 2022 07:27:14 +0000',
 'published_parsed': time.struct_time(tm_year=2022, tm_mon=7, tm_mday=8, tm_hour=7, tm_min=27, tm_sec=14, tm_wday=4, tm_yday=189, tm_isdst=0),
 'tags': [{'term': 'Mindset', 'scheme': None, 'label': None},
  {'term': 'brag doc', 'scheme': None, 'label': None},
  {'term': 'coaching', 'scheme': None, 'label': None},
  {'term': 'communication', 'scheme': None, 'label': None},
  {'term': 'comparing to others', 'scheme': None, 'label':

### Parsed Feed Notes

- The `published` key uses a string value for a date:
    `'Thu, 30 Jun 2022 06:56:11 +0000'`
- The `published_parsed` key uses a `time.struct` object to store a structured date:
    `time.struct_time(tm_year=2022, tm_mon=6, tm_mday=30, tm_hour=6, tm_min=56, tm_sec=11, tm_wday=3, tm_yday=181, tm_isdst=0)`

#### Working with date/time objects is easiest with `datetime.datetime` objects

In [7]:
# Create a helper function to convert strings to datetime objects
def convert_to_datetime(
    date_string: str = DATE_STRING
) -> datetime:
    """ Convert a string to a datetime object.
    
        Args:
            date_string (str):
                A string representing a date, extracted from
                entries['published'].  Default is DATE_STRING.

        Returns:
            date_time (datetime):
                A datetime object representing the converted date_string.
    """

    # Remove the timezone offset from date_string
    date = date_string.split(
        sep='+'  # Split the string to a list object on the '+' character
    )[0].strip()  # Keep the first index (everything to the left of the '+') and remove the trailing whitespace

    # Create a datetime.datetime object from the date string
    date_time =  datetime.strptime(date, '%a, %d %b %Y %H:%M:%S')

    # Convert the datetime object to a year-month string
    date_time = f'{date_time.year}-{date_time.month}'

    return date_time


# Convert a string to a datetime object to a string
date_time = convert_to_datetime()

In [8]:
# Display the date_time object type - Used when convert_to_datetime returned a datetime.datetime object
type(date_time)

str

In [9]:
# Display the date_time string
date_time

'2022-6'

In [10]:
# Add 5 days to the date_time value - Used when convert_to_datetime returned a datetime.datetime object
# date_time + timedelta(
#     days=5
# )

In [11]:
# Extract an entry's category from its `link` key
def get_category(
    link: str = URL_STRING
) -> str:
    """ Extract an entry's category from its `link` key.
    
    Args:
        link (str):
            A link with a category embedded in the URL, extracted from
            entries['link'].  Default is URL_STRING.

        Returns:
            category (str):
                Category value, extracted from the link.
    """

    # Extract the category from the link with re.sub
    category_match = re.sub(
        pattern=r'https?://pybit.es/([a-z]+)/.*',
        repl=r'\1',
        string=link
    )

    # Attempt to match category_match to a key in BLOG_CATEGORIES, default is 'article'
    category = BLOG_CATEGORIES.get(category_match, 'article')

    return category

category = get_category()
category

'article'

---

## Day 2

### Plotting RSS Feed Entries with `Plotly`

#### Task 1 - Create a breakdown of the number of entries per month

In [12]:
# Create a breakdown of the number of entries per month
published_dates = [
   convert_to_datetime(
    date_string=entry.published
    ) for entry in entries
]


In [13]:
# Display published_dates
published_dates

['2022-7',
 '2022-7',
 '2022-7',
 '2022-6',
 '2022-6',
 '2022-6',
 '2022-6',
 '2022-6',
 '2022-6',
 '2022-6']

In [14]:
# Get the number of posts by month with a Counter object
posts_by_month = Counter(published_dates)

In [15]:
# Display posts_by_month
posts_by_month

Counter({'2022-7': 3, '2022-6': 7})

#### Task 2 - Create a breakdown of entry categories

In [16]:
# Create a breakdown of entry categories
categories = [
    get_category(
        link=entry.link
    ) for entry in entries
]

In [17]:
# Display categories
categories

['article',
 'article',
 'article',
 'article',
 'article',
 'article',
 'article',
 'article',
 'article',
 'article']

In [18]:
# Get a count of entries in each category with a Counter object
category_counts = Counter(categories)

In [19]:
# Display category_counts sorted by most_common
category_counts.most_common()

[('article', 10)]

#### Task 3 - Create a breakdown of common tags

In [20]:
# Create a breakdown of common tags, and display the first 10 tags
tags = []
for entry in entries:
    for tag in entry.tags:
        tags.append(tag.get('term'))

tags[:10]

['Mindset',
 'brag doc',
 'coaching',
 'communication',
 'comparing to others',
 'confidence',
 'imposter syndrome',
 'pdm',
 'perfectionism',
 'tips']

In [21]:
# Get a count of each tag with a Counter object
tag_counts = Counter(tags)

In [22]:
# Display tag_counts sorted by most_common
tag_counts.most_common()

[('Podcast', 4),
 ('Developer', 4),
 ('pdm', 3),
 ('career', 3),
 ('Modules', 3),
 ('Mindset', 2),
 ('communication', 2),
 ('csv', 2),
 ('rich', 2),
 ('Productivity', 2),
 ('mindset', 2),
 ('productivity', 2),
 ('data', 2),
 ('JIT learning', 2),
 ('pandas', 2),
 ('brag doc', 1),
 ('coaching', 1),
 ('comparing to others', 1),
 ('confidence', 1),
 ('imposter syndrome', 1),
 ('perfectionism', 1),
 ('tips', 1),
 ('Career', 1),
 ('networking', 1),
 ('Tools', 1),
 ('alias', 1),
 ('build', 1),
 ('color searcher', 1),
 ('colors', 1),
 ('colorsys', 1),
 ('console', 1),
 ('hex', 1),
 ('HSL', 1),
 ('pathlib', 1),
 ('shell', 1),
 ('sorting', 1),
 ('Standard Library', 1),
 ('tools', 1),
 ('tuple unpacking', 1),
 ('typing', 1),
 ('unix', 1),
 ('Tips', 1),
 ('disconnect', 1),
 ('distraction', 1),
 ('nature', 1),
 ('take breaks', 1),
 ('technology', 1),
 ('Data', 1),
 ('adjacent skills', 1),
 ('comfort zone', 1),
 ('data science', 1),
 ('Jupyter', 1),
 ('Data Science', 1),
 ('Bites', 1),
 ('command li

---

## Day 3

### Transpose Data and Initialize `Plotly`

#### Task 1 - Convert `posts_by_month`, `category_counts`, and `tag_counts` to data that can form `x` and `y` axises

- At present, the data is in the form of `dict` key/value pairs or lists of `tuple` pairs, where the `[0]` index is a key and the `[1]` index is the corresponding value.
- The transposed data should be a list of tuples where the first tuple contains all of the "keys" (`x`-axis) and the second tuple contains all of the "values" (`y`-axis).

In [39]:
""" Transpose dictionaries or key/value lists of tuples to a list of tuples
    where the first tuple is all keys (x-axis) and the second tuple is all values (y-axis).
"""


def transpose_data_for_graphing(
    data: Union[dict, List[Tuple]]
) -> List[Tuple]:
    """ Transpose data for graphing.

        Transpose dictionaries or key/value lists of tuples to a list of tuples
        where the first tuple is all keys (x-axis) and the second tuple is all values (y-axis).
            
            Args:
                data (dict or list[Tuple]):
                    Data to be transposed.
    
            Returns:
                transposed_data (List[Tuple, Tuple]):
                    Transposed data.
        """

    # If the data is a dictionary, unpack the data into a list of tuples
    if isinstance(data, dict):
        data = data.items()

    # Transpose the data with the zip function, by unpacking the data into a list of tuples
    transformed_data = list(
        zip(*data)
    )

    return transformed_data

In [40]:
# Transpose posts_by_month for graphing
transposed_posts_by_month = transpose_data_for_graphing(
    data=posts_by_month
)

transposed_posts_by_month

[('2022-7', '2022-6'), (3, 7)]

In [41]:
# Transpose category_counts for graphing
transposed_category_counts = transpose_data_for_graphing(
    data=category_counts
)

transposed_category_counts

[('article',), (10,)]

In [42]:
# Transpose tag_counts for graphing
transposed_tag_counts = transpose_data_for_graphing(
    data=tag_counts
)

transposed_tag_counts

[('Mindset',
  'brag doc',
  'coaching',
  'communication',
  'comparing to others',
  'confidence',
  'imposter syndrome',
  'pdm',
  'perfectionism',
  'tips',
  'Podcast',
  'Career',
  'career',
  'networking',
  'Tools',
  'Developer',
  'Modules',
  'alias',
  'build',
  'color searcher',
  'colors',
  'colorsys',
  'console',
  'csv',
  'hex',
  'HSL',
  'pathlib',
  'rich',
  'shell',
  'sorting',
  'Standard Library',
  'tools',
  'tuple unpacking',
  'typing',
  'unix',
  'Tips',
  'Productivity',
  'disconnect',
  'distraction',
  'mindset',
  'nature',
  'productivity',
  'take breaks',
  'technology',
  'Data',
  'adjacent skills',
  'comfort zone',
  'data',
  'data science',
  'JIT learning',
  'Jupyter',
  'pandas',
  'Data Science',
  'Bites',
  'command line',
  'JSON',
  'matplotlib',
  'plotext',
  'plotting',
  'PyBites Platform',
  'schools',
  'typer',
  'best practices',
  'content provider',
  'mentoring',
  'pitfalls',
  'portfolio',
  'Learning',
  'carbon',


#### Task 2 - Initialize `Plotly` in offline mode

In [43]:
# Initialize Plotly in offline mode
# Reference: https://plot.ly/python/getting-started
plotly.offline.init_notebook_mode(
    connected=True
)