In [14]:
import pandas as pd
import altair as alt
import numpy as np
alt.data_transformers.enable('default', max_rows=20000)
df = pd.read_pickle('../Final_Data/combined_data.pkl')
nest = df.set_index('series').dropna().to_dict()['Parent Series ID'] # Gives the series ID of the parent of a given series.
cat_names = df.set_index('series').dropna().to_dict()['Category'] # Gives the category name of a given series ID.

In [15]:
def build_arg_text(**args):
    
    arg_text = '?chart_type=' + args.get('chart_type','Line+Chart')
    arg_text += '&start_year=' + args.get('start_year','2000')
    arg_text += '&inflation=' + args.get('inflation','By_Category')
    arg_text += '&earnings=' + args.get('earnings','Exclude')
    arg_text += '&unemployment=' + args.get('unemployment','Exclude')
    arg_text += '&stocks=' + args.get('stocks','Exclude')
    arg_text += '&interest=' + args.get('interest','Exclude')
    
    return arg_text

In [16]:
def data_parse(df, **args):
    
    website = 'https://apps-summer22.ischool.berkeley.edu/inflation_viz/chart'
    arg_text = website + build_arg_text(**args)
    
    start_date = '1/1/' + str(args.get('start_year','2000'))
    end_date = '12/1/' + str(args.get('end_year','2021'))
    
    # Start off with no hyperlinks and add where needed.
    df['href'] = arg_text
    
    # CPI
    if args.get('parent','') == '':
        if args.get('inflation','By+Category') == 'Exclude':
            df_cpi = pd.DataFrame
        elif args.get('inflation','By+Category') == 'By+Category':
            df_cpi = df[df['Parent Series ID'] == 'CUSR0000SA0']
        elif args.get('inflation','By+Category') == 'Total':
            df_cpi = df[df['series'] == 'CUSR0000SA0']
    else:
        df_cpi = df[df['Parent Series ID'] == args.get('parent','')]
        
    if args.get('inflation','By+Category') != 'Exclude':
        df_cpi['Category'] = 'CPI - ' + df_cpi['Category']
        df_cpi['href'] = np.where(df_cpi['Leaf'] == 0, arg_text + '&parent=' + df_cpi['series'], 'None')
        
    # Earnings
    if args.get('earnings','') in ['','Exclude']:
        df_earnings = pd.DataFrame()
    else:
        df_earnings = df[(df['Type'] == 'Earnings') & (df['Bucket'] == args.get('earnings','').replace('+',' '))]
        
    # Unemployment
    if args.get('unemployment','') in ['','Exclude']:
        df_unemployment = pd.DataFrame()
    else:
        df_unemployment = df[(df['Type'] == 'Unemployment') & (df['Bucket'] == args.get('unemployment','').replace('+',' '))]
        
    # Stocks
    if args.get('stocks','') in ['','Exclude']:
        df_stocks = pd.DataFrame()
    else:
        df_stocks = df[df['Type'] == 'Stocks'] 
        
    # Interest
    if args.get('interest','') in ['','Exclude']:
        df_interest = pd.DataFrame()
    else:
        df_interest = df[df['Type'] == 'Interest'] 

    # Combine selected data.
    df = pd.concat([df_cpi, df_earnings, df_unemployment, df_stocks, df_interest])

    # Normalize values to % change the specific category from start of date window.
    baseline_dict = {}
    min_dt = df[['date','value','Category']].groupby('Category').date.min().to_dict()
    for k,v in min_dt.items():
        min_dt[k] = max(v,pd.to_datetime(start_date))
    for k,v in min_dt.items():
        baseline_dict[k] = df[(df['Category'] == k) & (df['date'] == v)].value.item()
    df['baseline'] = df['Category'].map(baseline_dict) 
    df['change'] = df['value']/df['baseline'] - 1

    return df

In [17]:
def build_line(df, **args):
    
    # Set dates.
    start_year = args.get('start_year',2000)
    start_date = '1/1/' + str(start_year)
    end_year = args.get('end_year',2021)
    end_date = '12/1/' + str(end_year)
    
    # Set color scheme based on the number of categories.
    if len(df['Category'].unique()) > 10:
        c_scheme = 'category20'
    else:
        c_scheme = 'category10'
        
    # Check for hrefs.
    has_hrefs = len(df[df['href'] != 'None']) > 0
    has_non_hrefs = len(df[df['href'] == 'None']) > 0
    
    
    '''if has_hrefs:
        with_href = alt.Chart(df[['date','change','Category','href']][(df['date'] >= start_date) & (df['date'] <= end_date) & (df['href'] != '')], title='Change Since ' + str(start_year) +' by Category').mark_line().encode(
            x = alt.X('date', title = 'Year'),
            y = alt.Y('change', title='Change Since ' + str(start_year), axis=alt.Axis(format='%')),
            color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
            tooltip = 'Category',
            href = alt.Href('href')
        ).properties(height=400, width=600)
    if has_non_hrefs:
        no_href = alt.Chart(df[['date','change','Category','href']][(df['date'] >= start_date) & (df['date'] <= end_date) & (df['href'] != '')], title='Change Since ' + str(start_year) +' by Category').mark_line().encode(
            x = alt.X('date', title = 'Year'),
            y = alt.Y('change', title='Change Since ' + str(start_year), axis=alt.Axis(format='%')),
            color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
            tooltip = 'Category'
        ).properties(height=400, width=600)
    
    if has_hrefs and has_non_hrefs:
        t_chart = with_href + no_href
    elif has_hrefs:
        t_chart = with_href
    elif has_non_hrefs:
        t_chart = no_href
    else:
        return "No data to dispaly on chart."'''
    
    # Line Chart
    t_chart = alt.Chart(df[['date','change','Category','href']][(df['date'] >= start_date) & (df['date'] <= end_date) & (df['href'] != '')], title='Change Since ' + str(start_year) +' by Category').mark_line().encode(
            x = alt.X('date', title = 'Year'),
            y = alt.Y('change', title='Change Since ' + str(start_year), axis=alt.Axis(format='%')),
            color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
            tooltip = 'Category',
            href = alt.Href('href')
        ).properties(height=400, width=600)
        
    t_chart['usermeta'] = {"embedOptions": {'loader': {'target': '_chart'}}}
    
    return t_chart

In [18]:
def build_bar(df, **args):
    
    # Set dates.
    start_year = args.get('start_year',2000)
    start_date = '1/1/' + str(start_year)
    end_year = args.get('end_year',2021)
    end_date = '12/1/' + str(end_year)
    
    # Set color scheme based on the number of categories.
    if len(df['Category'].unique()) > 10:
        c_scheme = 'category20'
    else:
        c_scheme = 'category10'
        
    # Bar Chart
    '''with_href = alt.Chart(df[['date','change','Category','href']][(df['date'] == end_date) | ((df['periodName'] == '4th Quarter') & (df['year'] == end_year))],title='Change Since ' + str(start_year) + ' by Category').mark_bar().encode(
        x = alt.X('Category', sort='y'),
        y = alt.Y('change', title='Change Since ' + str(start_year), axis=alt.Axis(format='%')),
        color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
        tooltip = 'Category',
        href = alt.Href('href')
    ).properties(height=400, width=600)
    no_href = alt.Chart(df[['date','change','Category','href']][(df['date'] == end_date) | ((df['periodName'] == '4th Quarter') & (df['year'] == end_year))],title='Change Since ' + str(start_year) + ' by Category').mark_bar().encode(
        x = alt.X('Category', sort='y', axis=alt.Axis(labels=False)),
        y = alt.Y('change', title='Change Since ' + str(start_year), axis=alt.Axis(format='%')),
        color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
        tooltip = 'Category'
    ).properties(height=400, width=600)'''
    
    t_chart = alt.Chart(df[['date','change','Category','href']][(df['date'] == end_date) | ((df['periodName'] == '4th Quarter') & (df['year'] == end_year))],title='Change Since ' + str(start_year) + ' by Category').mark_bar().encode(
            x = alt.X('Category', sort='y'),
            y = alt.Y('change', title='Change Since ' + str(start_year), axis=alt.Axis(format='%')),
            color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
            tooltip = 'Category',
            href = alt.Href('href')
        ).properties(height=400, width=600)
    
    t_chart['usermeta'] = {"embedOptions": {'loader': {'target': '_chart'}}}
    
    return t_chart

In [19]:
# Simulate args.
arg_str = "chart_type=Bar+Chart&start_year=1970&end_year=2021&inflation=By+Category&earnings=Exclude&unemployment=Exclude&stocks=Exclude&interest=Include"
arg_list = arg_str.split('&')
arg_dict = {z.split('=')[0] : z.split('=')[1] for z in arg_list}
args = arg_dict

t_df = data_parse(df.copy(deep=True), **args)

if args['chart_type'] == 'Line+Chart':
    out_html = build_line(t_df, **args)
    pass
elif args['chart_type'] == 'Bar+Chart':
    out_html = build_bar(t_df, **args)
else:
    pass

out_html

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cpi['Category'] = 'CPI - ' + df_cpi['Category']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cpi['href'] = np.where(df_cpi['Leaf'] == 0, arg_text + '&parent=' + df_cpi['series'], 'None')
