In [598]:
import pandas as pd
import altair as alt
alt.data_transformers.enable('default', max_rows=20000)
df = pd.read_pickle('combined_data.pkl')
nest = df.set_index('series').dropna().to_dict()['Parent Series ID'] # Gives the series ID of the parent of a given series.
cat_names = df.set_index('series').dropna().to_dict()['Category'] # Gives the category name of a given series ID.

In [599]:
# Parameters - CPI Data
start_year = 2006
end_year = 2021
parent = 'CUSR0000SA0' # Top-level
#parent = 'CUSR0000SAH3' # Testing a lower-level category
#parent = 'CUSR0000SAF11'

start_date = '1/1/' + str(start_year)
end_date = '12/1/' + str(end_year)

# Parameters - Other Data
earnings = 'Exclude' # Pick one of 'Exclude', 'Top', 'Gender', 'Race', 'Education', or 'Race and Gender'
unemployment = 'Exclude' # Pick one of 'Exclude', 'Top', 'Gender', 'Race', 'Education'
stocks = True # Pick True or False
interest = False # Pick True or False

In [600]:
# Building list of nesting category names for use in UI.
nest_list_series_ids = []
nest_list_category_names = []
t = parent
for x in range(0,4):
    if nest.get(t,0) != 0:
        nest_list_series_ids.append(nest.get(t,0))
        nest_list_category_names.append(cat_names.get(t,0))
        t = nest.get(t,0)
nest_list_category_names.append('All items')       
nest_list_category_names[::-1]

['All items']

In [601]:
# Pull data for conditions in parameters.
df_cpi = df[df['Parent Series ID'] == parent]
#df_cpi = df[df['Category'] == 'All items']
df_cpi['Category'] = 'CPI - ' + df_cpi['Category']
df_earnings = df[(df['Type'] == 'Earnings') & (df['Bucket'] == earnings)]
df_unemployment = df[(df['Type'] == 'Unemployment') & (df['Bucket'] == unemployment)]
if stocks == True:
    df_stocks = df[df['Type'] == 'Stocks']
else:
    df_stocks = pd.DataFrame()
if interest == True:   
    df_interest = df[df['Type'] == 'Interest']
else:
    df_interest = pd.DataFrame()

# Combine selected data.
df = pd.concat([df_cpi, df_earnings, df_unemployment, df_stocks, df_interest])

# Normalize values to % change the specific category from start of date window.
min_dt = df[['date','value','Category']].groupby('Category').date.min().to_dict()
print(min_dt)
for k,v in min_dt.items():
    min_dt[k] = max(v,pd.to_datetime(start_date))
for k,v in min_dt.items():
    baseline_dict[k] = df[(df['Category'] == k) & (df['date'] == v)].value.item()
baseline_dict
df['baseline'] = df['Category'].map(baseline_dict) 
df['change'] = df['value']/df['baseline'] - 1

# Set color scheme based on the number of categories.
if len(df['Category'].unique()) > 10:
    c_scheme = 'category20'
else:
    c_scheme = 'category10'

{'CPI - Apparel': Timestamp('1970-01-01 00:00:00'), 'CPI - Education and communication': Timestamp('1993-01-01 00:00:00'), 'CPI - Food and Beverages': Timestamp('1970-01-01 00:00:00'), 'CPI - Housing': Timestamp('1970-01-01 00:00:00'), 'CPI - Medical care': Timestamp('1970-01-01 00:00:00'), 'CPI - Other goods and services': Timestamp('1970-01-01 00:00:00'), 'CPI - Recreation': Timestamp('1993-01-01 00:00:00'), 'CPI - Transportation': Timestamp('1970-01-01 00:00:00'), 'Stocks - Dow': Timestamp('1970-01-01 00:00:00'), 'Stocks - NASDAQ': Timestamp('1971-02-01 00:00:00'), 'Stocks - S&P': Timestamp('1970-01-01 00:00:00')}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cpi['Category'] = 'CPI - ' + df_cpi['Category']


In [609]:
# Line Chart
alt.Chart(df[['date','change','Category']][(df['date'] >= start_date) & (df['date'] <= end_date)], title='Change Since ' + str(start_year) +' by Category').mark_line().encode(
    x = alt.X('date', title = 'Year'),
    y = alt.Y('change', title='Change Since ' + str(start_year), axis=alt.Axis(format='%')),
    color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
    tooltip = 'Category'
).properties(height=400, width=600)#.interactive()

In [610]:
# Area/Stream Chart
alt.Chart(df[['date','change','Category']][(df['date'] >= start_date) & (df['date'] <= end_date)],title='Change Since ' + str(start_year) + ' by Category').mark_area().encode(
    x = alt.X('date'),
    y = alt.Y('change', stack='center', axis=None),
    color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
    tooltip = 'Category'
).properties(height=400, width=600)#.interactive()

In [611]:
# Bar Chart
alt.Chart(df[['date','change','Category']][(df['date'] == end_date) | ((df['periodName'] == '4th Quarter') & (df['year'] == end_year))],title='Change Since ' + str(start_year) + ' by Category').mark_bar().encode(
    x = alt.X('Category', sort='y'),
    y = alt.Y('change', title='Change Since ' + str(start_year), axis=alt.Axis(format='%')),
    color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
    tooltip = 'Category'
).properties(height=400, width=600)#.interactive()

In [612]:
# Scatter Chart
alt.Chart(df[['date','change','Category']][(df['date'] >= start_date) & (df['date'] <= end_date)], title='Change Since ' + str(start_year) +' by Category').mark_circle().encode(
    x = alt.X('date', title = 'Year'),
    y = alt.Y('change', title='Change Since ' + str(start_year), axis=alt.Axis(format='%')),
    color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
    tooltip = 'Category'
).properties(height=400, width=600)#.interactive()

In [613]:
# Bump Chart
alt.Chart(df[['date','change','Category']][(df['date'] >= start_date) & (df['date'] <= end_date)]).mark_line(point = True).encode(
    x = alt.X("date:O", timeUnit="year", title="Year"),
    y="rank:O",
    color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
    tooltip = 'Category'
).transform_window(
    rank="rank()",
    sort=[alt.SortField("change", order="descending")],
    groupby=["date"]
).properties(
    title='Ranked Change Since ' + str(start_year) +' by Category',
    width=600,
    height=400,
)

In [614]:
# Slope Chart
alt.Chart(df[['date','change','Category']][(df['date'] == start_date) | (df['date'] == end_date)], title='Change Since ' + str(start_year) +' by Category').mark_line().encode(
    x = alt.X('date', title = 'Year', axis = alt.Axis(values = [start_date, end_date], format='%Y')),
    y = alt.Y('change', title='Change Since ' + str(start_year), axis=alt.Axis(format='%')),
    color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
    tooltip = 'Category'
).properties(height=400, width=600)#.interactive()

In [615]:
c1 = base = alt.Chart(df[['year','change','Category']][(df['date'] >= start_date) & (df['date'] <= end_date)], title='Change Since ' + str(start_year) +' by Category').encode(
    theta=alt.Theta("year:N", stack=True),
    radius=alt.Radius("sum(change)", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20), stack=True),
    color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
    tooltip = 'Category'
).properties(height=400, width=600).mark_arc(innerRadius=20, stroke="#fff")

c2 = base = alt.Chart(df[['year','change','Category']][(df['date'] >= start_date) & (df['date'] <= end_date)], title='Change Since ' + str(start_year) +' by Category').encode(
    theta=alt.Theta("year:N"),
    radius=alt.Radius("sum(change)", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20))
).properties(height=400, width=600).mark_text(radiusOffset=15).encode(text="year:Q")

(c1 + c2)