In [34]:
import ipywidgets as widgets
from ipywidgets import interact

import numpy as np
import pandas as pd
import datetime
import pathlib
from os import environ

from solution import Entry, get_week, str_to_date

In [35]:
# if the path to target directory was not
# specified with a env var, specify it here
try:
    DATA_ROOT = environ['TARGET_DIRECTORY']
except KeyError:
    DATA_ROOT = pathlib.Path('./result')
DATA_ROOT

PosixPath('result')

In [73]:
# read data
data = []
for json_file in DATA_ROOT.glob('*/*/*.json'):
    json_df = pd.read_json(json_file, typ='records')

    data.append(json_df)
data = pd.DataFrame.from_records(data)

Unnamed: 0,count,date,status,text
0,2,2021-11-19,True,puppy runs merrily odd
1,1,2021-11-15,True,puppy runs crazily adorable
2,2,2021-11-17,True,cat hits merrily clueless
3,1,2021-11-19,True,cat runs crazily adorable
4,4,2021-11-19,True,puppy runs merrily adorable
...,...,...,...,...
3646,2,2023-01-18,True,dog hits dutifully adorable
3647,3,2023-01-17,True,cat jumps merrily clueless
3648,1,2023-01-22,True,monkey runs crazily clueless
3649,3,2023-01-20,True,monkey hits foolishly clueless


In [85]:
# prepare columns for data analysis
data['date'] = data.apply(lambda row: str_to_date(row.date), axis=1)
data['week'] = data.apply(lambda row: get_week(row.date), axis=1)
data['year'] = data.apply(lambda row: row.date.year, axis=1)
data['text_len'] = data['text'].str.len()

Unnamed: 0,count,date,status,text,week,year,text_len
0,2,2021-11-19,True,puppy runs merrily odd,47,2021,22
1,1,2021-11-15,True,puppy runs crazily adorable,47,2021,27
2,2,2021-11-17,True,cat hits merrily clueless,47,2021,25
3,1,2021-11-19,True,cat runs crazily adorable,47,2021,25
4,4,2021-11-19,True,puppy runs merrily adorable,47,2021,27
...,...,...,...,...,...,...,...
3646,2,2023-01-18,True,dog hits dutifully adorable,4,2023,27
3647,3,2023-01-17,True,cat jumps merrily clueless,4,2023,26
3648,1,2023-01-22,True,monkey runs crazily clueless,4,2023,28
3649,3,2023-01-20,True,monkey hits foolishly clueless,4,2023,30


In [128]:
filtered_data = data[data.year.isin([2021])]
filtered_data

Unnamed: 0,count,date,status,text,week,year,text_len
0,2,2021-11-19,True,puppy runs merrily odd,47,2021,22
1,1,2021-11-15,True,puppy runs crazily adorable,47,2021,27
2,2,2021-11-17,True,cat hits merrily clueless,47,2021,25
3,1,2021-11-19,True,cat runs crazily adorable,47,2021,25
4,4,2021-11-19,True,puppy runs merrily adorable,47,2021,27
...,...,...,...,...,...,...,...
1489,3,2021-01-22,True,dog jumps dutifully clueless,4,2021,28
1490,3,2021-01-18,True,rabbit runs occasionally adorable,4,2021,33
1491,2,2021-01-18,True,rabbit runs foolishly clueless,4,2021,30
1492,1,2021-01-22,True,cat hits foolishly adorable,4,2021,27


In [132]:
year_options = data['year'].unique()
@interact(years=widgets.SelectMultiple(options=[str(year) for year in year_options],
                                       value=[str(year) for year in year_options],
                                       description='Years', ))
def plot_graph(years,
               fig_x=(1,20, 0.5), fig_y=(1,20, 0.5),
               horizontal=True, years_only=False,
               display=['text len mean', 'msg count'],               
              ):
    filtered_data = data[data.year.isin([int(y) for y in years])]
    sorted_data = filtered_data.sort_values(['year', 'week'])
    group_by = ['year', 'week']
    if years_only:
        group_by = ['year']
    if display == 'text len mean':
        grouped = sorted_data.groupby(group_by)['text_len'].mean()
    elif display == 'msg count':
        grouped = sorted_data.groupby(group_by)['text'].count()
    else:
        raise ValueError
    if horizontal:
        grouped.plot.barh(figsize=(fig_x, fig_y), grid=True)
    else:
        grouped.plot.bar(figsize=(fig_x, fig_y), grid=True)


interactive(children=(SelectMultiple(description='Years', index=(0, 1, 2), options=('2021', '2022', '2023'), v…