In [None]:
import ipywidgets as widgets
from ipywidgets import interact

import numpy as np
import pandas as pd
import datetime
import pathlib
from os import environ

from solution import Entry, get_week, str_to_date

In [None]:
# if the path to target directory was not
# specified with a env var, specify it here
try:
    DATA_ROOT = environ['TARGET_DIRECTORY']
except KeyError:
    DATA_ROOT = pathlib.Path('./result')
DATA_ROOT

In [None]:
# read data
data = []
for json_file in DATA_ROOT.glob('*/*/*.json'):
    json_df = pd.read_json(json_file, typ='records')

    data.append(json_df)
data = pd.DataFrame.from_records(data)

In [None]:
# prepare columns for data analysis
data['date'] = data.apply(lambda row: str_to_date(row.date), axis=1)
data['week'] = data.apply(lambda row: get_week(row.date), axis=1)
data['year'] = data.apply(lambda row: row.date.year, axis=1)
data['text_len'] = data['text'].str.len()

In [None]:
# create an interactive graph
# the graph might be too large or small, adjust its size
# with the fig_{x,y} sliders
# you can also choose the orientation of the data from which 
# years to display, the orientation of the graph, whether to
# only compare years, and which value to compare

year_options = data['year'].unique()
@interact(years=widgets.SelectMultiple(options=[str(year) for year in year_options],
                                       value=[str(year) for year in year_options],
                                       description='Years', ))
def plot_graph(years,
               fig_x=(1,20, 0.5), fig_y=(1,20, 0.5),
               horizontal=True, years_only=False,
               display=['text len mean', 'msg count'],               
              ):
    filtered_data = data[data.year.isin([int(y) for y in years])]
    sorted_data = filtered_data.sort_values(['year', 'week'])
    group_by = ['year', 'week']
    if years_only:
        group_by = ['year']
    if display == 'text len mean':
        grouped = sorted_data.groupby(group_by)['text_len'].mean()
    elif display == 'msg count':
        grouped = sorted_data.groupby(group_by)['text'].count()
    else:
        raise ValueError
    if horizontal:
        grouped.plot.barh(figsize=(fig_x, fig_y), grid=True)
    else:
        grouped.plot.bar(figsize=(fig_x, fig_y), grid=True)
