In [161]:
import pandas as pd
import altair as alt
df = pd.read_pickle('data_adjusted.pkl')
nest = df.set_index('series').dropna().to_dict()['Parent Series ID'] # Gives the series ID of the parent of a given series.
cat_names = df.set_index('series').dropna().to_dict()['Category'] # Gives the category name of a given series ID.

In [162]:
# Parameters
start_year = 1988
end_year = 2021
parent = 'CUSR0000SA0' # Top-level
#parent = 'CUSR0000SAH3' # Testing a lower-level category

start_date = '1/1/' + str(start_year)
end_date = '12/1/' + str(end_year)

In [163]:
# Building list of nesting category names for use in UI.
nest_list_series_ids = []
nest_list_category_names = []
t = parent
for x in range(0,4):
    if nest.get(t,0) != 0:
        nest_list_series_ids.append(nest.get(t,0))
        nest_list_category_names.append(cat_names.get(t,0))
        t = nest.get(t,0)
nest_list_category_names.append('All items')       
nest_list_category_names[::-1]

['All items']

In [164]:
# Normalize values to % change the specific category from start of date window.
df = df[df['Parent Series ID'] == parent]
#baseline_df = df[['date','value','Category']][df['date'] == start_date]
#baseline_dict = baseline_df.set_index('Category').to_dict()['value']

min_dt = df[['date','value','Category']].groupby('Category').date.min().to_dict()
print(min_dt)
for k,v in min_dt.items():
    min_dt[k] = max(v,pd.to_datetime(start_date))
for k,v in min_dt.items():
    baseline_dict[k] = df[(df['Category'] == k) & (df['date'] == v)].value.item()
baseline_dict

df['baseline'] = df['Category'].map(baseline_dict) 
df['change'] = df['value']/df['baseline'] - 1

{'Apparel': Timestamp('1970-01-01 00:00:00'), 'Education and communication': Timestamp('1993-01-01 00:00:00'), 'Food and Beverages': Timestamp('1970-01-01 00:00:00'), 'Housing': Timestamp('1970-01-01 00:00:00'), 'Medical care': Timestamp('1970-01-01 00:00:00'), 'Other goods and services': Timestamp('1970-01-01 00:00:00'), 'Recreation': Timestamp('1993-01-01 00:00:00'), 'Transportation': Timestamp('1970-01-01 00:00:00')}


In [165]:
min_dt = df[['date','value','Category']].groupby('Category').date.min().to_dict()
print(min_dt)
for k,v in min_dt.items():
    min_dt[k] = max(v,pd.to_datetime(start_date))
for k,v in min_dt.items():
    baseline_dict[k] = df[(df['Category'] == k) & (df['date'] == v)].value.item()
baseline_dict

{'Apparel': Timestamp('1970-01-01 00:00:00'), 'Education and communication': Timestamp('1993-01-01 00:00:00'), 'Food and Beverages': Timestamp('1970-01-01 00:00:00'), 'Housing': Timestamp('1970-01-01 00:00:00'), 'Medical care': Timestamp('1970-01-01 00:00:00'), 'Other goods and services': Timestamp('1970-01-01 00:00:00'), 'Recreation': Timestamp('1993-01-01 00:00:00'), 'Transportation': Timestamp('1970-01-01 00:00:00')}


{'Food and Beverages': 115.6,
 'Housing': 116.4,
 'Apparel': 113.2,
 'Transportation': 124.6,
 'Medical care': 134.4,
 'Other goods and services': 132.8,
 'Education and communication': 84.1,
 'Recreation': 89.7}

In [166]:
for k,v in min_dt.items():
    baseline_dict[k] = df[(df['Category'] == k) & (df['date'] == v)].value.item()
                       #baseline_df[(baseline_df['Category'] == k) & (baseline_df['date'] == v)]
baseline_dict

{'Food and Beverages': 115.6,
 'Housing': 116.4,
 'Apparel': 113.2,
 'Transportation': 124.6,
 'Medical care': 134.4,
 'Other goods and services': 132.8,
 'Education and communication': 84.1,
 'Recreation': 89.7}

In [167]:
# Line Chart
alt.Chart(df[['date','change','Category']][(df['date'] >= start_date) & (df['date'] <= end_date)], title='Price Change since ' + str(start_year) +' by Category').mark_line().encode(
    x = alt.X('date', title = 'Year'),
    y = alt.Y('change', title='Price change since ' + str(start_year), axis=alt.Axis(format='%')),
    color = 'Category'
).properties(height=400, width=600).interactive()

In [168]:
# Area/Stream Chart
alt.Chart(df[['date','change','Category']][(df['date'] >= start_date) & (df['date'] <= end_date)],title='Price Change since ' + str(start_year) + ' by Category').mark_area().encode(
    x = alt.X('date'),
    y = alt.Y('change', stack='center', axis=None),
    color = 'Category'
).properties(height=400, width=600).interactive()

In [169]:
# Bar Chart
alt.Chart(df[['date','change','Category']][df['date'] == end_date],title='Price Change since ' + str(start_year) + ' by Category').mark_bar().encode(
    x = alt.X('Category', sort='y'),
    y = alt.Y('change', title='Price change since ' + str(start_year), axis=alt.Axis(format='%')),
    color = 'Category'
).properties(height=400, width=600).interactive()

In [170]:
# Scatter Chart
alt.Chart(df[['date','change','Category']][(df['date'] >= start_date) & (df['date'] <= end_date)], title='Price Change since ' + str(start_year) +' by Category').mark_circle().encode(
    x = alt.X('date', title = 'Year'),
    y = alt.Y('change', title='Price change since ' + str(start_year), axis=alt.Axis(format='%')),
    color = 'Category'
).properties(height=400, width=600).interactive()

In [171]:
# Bump Chart
alt.Chart(df[['date','change','Category']][(df['date'] >= start_date) & (df['date'] <= end_date)]).mark_line(point = True).encode(
    x = alt.X("date:O", timeUnit="year", title="Year"),
    y="rank:O",
    color=alt.Color("Category:N")
).transform_window(
    rank="rank()",
    sort=[alt.SortField("change", order="descending")],
    groupby=["date"]
).properties(
    title='Ranked Price Change since ' + str(start_year) +' by Category',
    width=600,
    height=400,
)

In [172]:
# Slope Chart
alt.Chart(df[['date','change','Category']][(df['date'] == start_date) | (df['date'] == end_date)], title='Price Change since ' + str(start_year) +' by Category').mark_line().encode(
    x = alt.X('date', title = 'Year', axis = alt.Axis(values = [start_date, end_date], format='%Y')),
    y = alt.Y('change', title='Price change since ' + str(start_year), axis=alt.Axis(format='%')),
    color = 'Category'
).properties(height=400, width=600).interactive()

In [173]:
c1 = base = alt.Chart(df[['year','change','Category']][(df['date'] >= start_date) & (df['date'] <= end_date)], title='Price Change since ' + str(start_year) +' by Category').encode(
    theta=alt.Theta("year", stack=True),
    radius=alt.Radius("sum(change)", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20), stack=True),
    color="Category:N",
).properties(height=400, width=600).mark_arc(innerRadius=20, stroke="#fff")

c2 = base = alt.Chart(df[['year','change','Category']][(df['date'] >= start_date) & (df['date'] <= end_date)], title='Price Change since ' + str(start_year) +' by Category').encode(
    theta=alt.Theta("year"),
    radius=alt.Radius("sum(change)", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20))
).properties(height=400, width=600).mark_text(radiusOffset=15).encode(text="year:Q")

(c1 + c2)