In [134]:
import pandas as pd
import altair as alt
import numpy as np
alt.data_transformers.enable('default', max_rows=20000)
df = pd.read_pickle('../Final_Data/combined_data.pkl')
nest = df.set_index('series').dropna().to_dict()['Parent Series ID'] # Gives the series ID of the parent of a given series.
cat_names = df.set_index('series').dropna().to_dict()['Category'] # Gives the category name of a given series ID.

In [135]:
def build_arg_text(**args):
    
    arg_text = '?chart_type=' + args.get('chart_type','Line+Chart')
    arg_text += '&start_year=' + args.get('start_year','2000')
    arg_text += '&inflation=' + args.get('inflation','By_Category')
    arg_text += '&earnings=' + args.get('earnings','Exclude')
    arg_text += '&unemployment=' + args.get('unemployment','Exclude')
    arg_text += '&stocks=' + args.get('stocks','Exclude')
    arg_text += '&interest=' + args.get('interest','Exclude')
    
    return arg_text

In [136]:
def data_parse(df, **args):
    
    website = 'https://apps-summer22.ischool.berkeley.edu/inflation_viz/chart'
    arg_text = website + build_arg_text(**args)
    
    start_date = '1/1/' + str(args.get('start_year','2000'))
    end_date = '12/1/' + str(args.get('end_year','2021'))
    
    # Start off with no hyperlinks and add where needed.
    df['href'] = arg_text
    
    # CPI
    if args.get('parent','') == '':
        if args.get('inflation','By+Category') == 'Exclude':
            df_cpi = pd.DataFrame
        elif args.get('inflation','By+Category') == 'By+Category':
            df_cpi = df[df['Parent Series ID'] == 'CUSR0000SA0']
        elif args.get('inflation','By+Category') == 'Total':
            df_cpi = df[df['series'] == 'CUSR0000SA0']
    else:
        df_cpi = df[df['Parent Series ID'] == args.get('parent','')]
        
    if args.get('inflation','By+Category') != 'Exclude':
        df_cpi['Category'] = 'CPI - ' + df_cpi['Category']
        df_cpi['href'] = np.where(df_cpi['Leaf'] == 0, arg_text + '&parent=' + df_cpi['series'], 'None')
        
    # Earnings
    if args.get('earnings','') in ['','Exclude']:
        df_earnings = pd.DataFrame()
    else:
        df_earnings = df[(df['Type'] == 'Earnings') & (df['Bucket'] == args.get('earnings','').replace('+',' '))]
        
    # Unemployment
    if args.get('unemployment','') in ['','Exclude']:
        df_unemployment = pd.DataFrame()
    else:
        df_unemployment = df[(df['Type'] == 'Unemployment') & (df['Bucket'] == args.get('unemployment','').replace('+',' '))]
        
    # Stocks
    if args.get('stocks','') in ['','Exclude']:
        df_stocks = pd.DataFrame()
    else:
        df_stocks = df[df['Type'] == 'Stocks'] 
        
    # Interest
    if args.get('interest','') in ['','Exclude']:
        df_interest = pd.DataFrame()
    else:
        df_interest = df[df['Type'] == 'Interest'] 

    # Combine selected data.
    df = pd.concat([df_cpi, df_earnings, df_unemployment, df_stocks, df_interest])

    # Normalize values to % change the specific category from start of date window.
    baseline_dict = {}
    min_dt = df[['date','value','Category']].groupby('Category').date.min().to_dict()
    for k,v in min_dt.items():
        min_dt[k] = max(v,pd.to_datetime(start_date))
    for k,v in min_dt.items():
        baseline_dict[k] = df[(df['Category'] == k) & (df['date'] == v)].value.item()
    df['baseline'] = df['Category'].map(baseline_dict) 
    df['change'] = df['value']/df['baseline'] - 1

    return df

In [137]:
def build_line(df, **args):
    
    # Set dates.
    start_year = args.get('start_year',2000)
    start_date = '1/1/' + str(start_year)
    end_year = args.get('end_year',2021)
    end_date = '12/1/' + str(end_year)
    
    # Set color scheme based on the number of categories.
    if len(df['Category'].unique()) > 10:
        c_scheme = 'category20'
    else:
        c_scheme = 'category10'
        
    # Check for hrefs.
    has_hrefs = len(df[df['href'] != 'None']) > 0
    has_non_hrefs = len(df[df['href'] == 'None']) > 0
    
    
    '''if has_hrefs:
        with_href = alt.Chart(df[['date','change','Category','href']][(df['date'] >= start_date) & (df['date'] <= end_date) & (df['href'] != '')], title='Change Since ' + str(start_year) +' by Category').mark_line().encode(
            x = alt.X('date', title = 'Year'),
            y = alt.Y('change', title='Change Since ' + str(start_year), axis=alt.Axis(format='%')),
            color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
            tooltip = 'Category',
            href = alt.Href('href')
        ).properties(height=400, width=600)
    if has_non_hrefs:
        no_href = alt.Chart(df[['date','change','Category','href']][(df['date'] >= start_date) & (df['date'] <= end_date) & (df['href'] != '')], title='Change Since ' + str(start_year) +' by Category').mark_line().encode(
            x = alt.X('date', title = 'Year'),
            y = alt.Y('change', title='Change Since ' + str(start_year), axis=alt.Axis(format='%')),
            color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
            tooltip = 'Category'
        ).properties(height=400, width=600)
    
    if has_hrefs and has_non_hrefs:
        t_chart = with_href + no_href
    elif has_hrefs:
        t_chart = with_href
    elif has_non_hrefs:
        t_chart = no_href
    else:
        return "No data to dispaly on chart."'''
    
    # Line Chart
    t_chart = alt.Chart(df[['date','change','Category','href']][(df['date'] >= start_date) & (df['date'] <= end_date) & (df['href'] != '')], title='Change Since ' + str(start_year) +' by Category').mark_line().encode(
            x = alt.X('date', title = 'Year'),
            y = alt.Y('change', title='Change Since ' + str(start_year), axis=alt.Axis(format='%')),
            color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
            tooltip = 'Category',
            href = alt.Href('href')
        ).properties(height=400, width=600)
        
    t_chart['usermeta'] = {"embedOptions": {'loader': {'target': '_chart'}}}
    
    return t_chart

In [138]:
def build_bar(df, **args):
    
    # Set dates.
    start_year = args.get('start_year',2000)
    start_date = '1/1/' + str(start_year)
    end_year = args.get('end_year',2021)
    end_date = '12/1/' + str(end_year)
    
    # Set color scheme based on the number of categories.
    if len(df['Category'].unique()) > 10:
        c_scheme = 'category20'
    else:
        c_scheme = 'category10'
        
    # Bar Chart
    '''with_href = alt.Chart(df[['date','change','Category','href']][(df['date'] == end_date) | ((df['periodName'] == '4th Quarter') & (df['year'] == end_year))],title='Change Since ' + str(start_year) + ' by Category').mark_bar().encode(
        x = alt.X('Category', sort='y'),
        y = alt.Y('change', title='Change Since ' + str(start_year), axis=alt.Axis(format='%')),
        color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
        tooltip = 'Category',
        href = alt.Href('href')
    ).properties(height=400, width=600)
    no_href = alt.Chart(df[['date','change','Category','href']][(df['date'] == end_date) | ((df['periodName'] == '4th Quarter') & (df['year'] == end_year))],title='Change Since ' + str(start_year) + ' by Category').mark_bar().encode(
        x = alt.X('Category', sort='y', axis=alt.Axis(labels=False)),
        y = alt.Y('change', title='Change Since ' + str(start_year), axis=alt.Axis(format='%')),
        color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
        tooltip = 'Category'
    ).properties(height=400, width=600)'''
    
    t_chart = alt.Chart(df[['date','change','Category','href']][(df['date'] == end_date) | ((df['periodName'] == '4th Quarter') & (df['year'] == end_year))],title='Change Since ' + str(start_year) + ' by Category').mark_bar().encode(
            x = alt.X('Category', sort='y'),
            y = alt.Y('change', title='Change Since ' + str(start_year), axis=alt.Axis(format='%')),
            color = alt.Color('Category', scale=alt.Scale(scheme = c_scheme)),
            tooltip = 'Category',
            href = alt.Href('href')
        ).properties(height=400, width=600)
    
    t_chart['usermeta'] = {"embedOptions": {'loader': {'target': '_chart'}}}
    
    return t_chart

In [139]:
# Simulate args.
arg_str = "chart_type=Bar+Chart&start_year=1970&end_year=2021&inflation=By+Category&earnings=Exclude&unemployment=Exclude&stocks=Exclude&interest=Include"
arg_str = "chart_type=Bar+Chart&start_year=1970&end_year=2021&inflation=Total&earnings=Total&unemployment=Exclude&stocks=Exclude&interest=Include"

arg_list = arg_str.split('&')
arg_dict = {z.split('=')[0] : z.split('=')[1] for z in arg_list}
args = arg_dict

t_df = data_parse(df.copy(deep=True), **args)

if args['chart_type'] == 'Line+Chart':
    out_html = build_line(t_df, **args)
    pass
elif args['chart_type'] == 'Bar+Chart':
    out_html = build_bar(t_df, **args)
else:
    pass

out_html

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cpi['Category'] = 'CPI - ' + df_cpi['Category']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cpi['href'] = np.where(df_cpi['Leaf'] == 0, arg_text + '&parent=' + df_cpi['series'], 'None')


In [140]:
df[df['Type'] == 'Earnings']

Unnamed: 0,year,period,periodName,value,footnotes,series,latest,Category,Parent Series ID,Level,Leaf,date,Type,Bucket
0,1989,M10,4th Quarter,408.0,[{}],LEU0252881500,,Earnings - All People,,,,1989-10-01,Earnings,Total
1,1989,M07,3rd Quarter,395.0,[{}],LEU0252881500,,Earnings - All People,,,,1989-07-01,Earnings,Total
2,1989,M04,2nd Quarter,398.0,[{}],LEU0252881500,,Earnings - All People,,,,1989-04-01,Earnings,Total
3,1989,M01,1st Quarter,397.0,[{}],LEU0252881500,,Earnings - All People,,,,1989-01-01,Earnings,Total
4,1988,M10,4th Quarter,392.0,[{}],LEU0252881500,,Earnings - All People,,,,1988-10-01,Earnings,Total
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44,2011,M01,1st Quarter,1336.0,[{}],LEU0252919700,,Earnings - Advanced Degree,,,,2011-01-01,Earnings,By Education
45,2010,M10,4th Quarter,1334.0,[{}],LEU0252919700,,Earnings - Advanced Degree,,,,2010-10-01,Earnings,By Education
46,2010,M07,3rd Quarter,1368.0,[{}],LEU0252919700,,Earnings - Advanced Degree,,,,2010-07-01,Earnings,By Education
47,2010,M04,2nd Quarter,1341.0,[{}],LEU0252919700,,Earnings - Advanced Degree,,,,2010-04-01,Earnings,By Education


In [141]:
df[df['Type'] == 'Earnings']
df[(df['date'] == None) | ((df['periodName'] == '4th Quarter') & (df['year'] == 2021))]

Unnamed: 0,year,period,periodName,value,footnotes,series,latest,Category,Parent Series ID,Level,Leaf,date,Type,Bucket
1,2021,M10,4th Quarter,1010.0,[{}],LEU0252881500,,Earnings - All People,,,,2021-10-01,Earnings,Total
1,2021,M10,4th Quarter,1103.0,[{}],LEU0252881800,,Earnings - Men,,,,2021-10-01,Earnings,By Gender
1,2021,M10,4th Quarter,930.0,[{}],LEU0252882700,,Earnings - Women,,,,2021-10-01,Earnings,By Gender
1,2021,M10,4th Quarter,1030.0,[{}],LEU0252883600,,Earnings - White People,,,,2021-10-01,Earnings,By Race
1,2021,M10,4th Quarter,1129.0,[{}],LEU0252883900,,Earnings - White Men,,,,2021-10-01,Earnings,By Race and Gender
1,2021,M10,4th Quarter,939.0,[{}],LEU0252884200,,Earnings - White Women,,,,2021-10-01,Earnings,By Race and Gender
1,2021,M10,4th Quarter,805.0,[{}],LEU0252884500,,Earnings - Black People,,,,2021-10-01,Earnings,By Race
1,2021,M10,4th Quarter,807.0,[{}],LEU0252884800,,Earnings - Black Men,,,,2021-10-01,Earnings,By Race and Gender
1,2021,M10,4th Quarter,802.0,[{}],LEU0252885100,,Earnings - Black Women,,,,2021-10-01,Earnings,By Race and Gender
1,2021,M10,4th Quarter,1384.0,[{}],LEU0254468400,,Earnings - Asian People,,,,2021-10-01,Earnings,By Race


In [142]:
df[(df['date'] == '12/1/2021') | ((df['periodName'] == '4th Quarter') & (df['year'] == 2021))]

Unnamed: 0,year,period,periodName,value,footnotes,series,latest,Category,Parent Series ID,Level,Leaf,date,Type,Bucket
6,2021,M12,December,280.126000,[{}],CUSR0000SA0,,CPI - All items,,0.0,0.0,2021-12-01,CPI,
6,2021,M12,December,286.018000,[{}],CUSR0000SAF,,CPI - Food and Beverages,CUSR0000SA0,1.0,0.0,2021-12-01,CPI,
6,2021,M12,December,268.356000,[{}],CUSR0000SAF11,,CPI - Food at home,CUSR0000SAF,2.0,0.0,2021-12-01,CPI,
6,2021,M12,December,297.693000,[{}],CUSR0000SAF111,,CPI - Cereals and bakery products,CUSR0000SAF11,3.0,1.0,2021-12-01,CPI,
6,2021,M12,December,299.689000,[{}],CUSR0000SAF112,,"CPI - Meats, poultry, fish, and eggs",CUSR0000SAF11,3.0,1.0,2021-12-01,CPI,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6,2021,M12,December,3.600000,[{}],LNS14027689,,Unemployment Rate - Some College,,,,2021-12-01,Unemployment,By Education
6,2021,M12,December,2.100000,[{}],LNS14027662,,Unemployment Rate - Bachelor's and Higher,,,,2021-12-01,Unemployment,By Education
623,2021,,,35641.334091,,,,Stocks - Dow,,,,2021-12-01,Stocks,
1240,2021,,,15474.432273,,,,Stocks - NASDAQ,,,,2021-12-01,Stocks,


In [143]:
df[((df['periodName'] == '4th Quarter') & (df['year'] == 2021))]

Unnamed: 0,year,period,periodName,value,footnotes,series,latest,Category,Parent Series ID,Level,Leaf,date,Type,Bucket
1,2021,M10,4th Quarter,1010.0,[{}],LEU0252881500,,Earnings - All People,,,,2021-10-01,Earnings,Total
1,2021,M10,4th Quarter,1103.0,[{}],LEU0252881800,,Earnings - Men,,,,2021-10-01,Earnings,By Gender
1,2021,M10,4th Quarter,930.0,[{}],LEU0252882700,,Earnings - Women,,,,2021-10-01,Earnings,By Gender
1,2021,M10,4th Quarter,1030.0,[{}],LEU0252883600,,Earnings - White People,,,,2021-10-01,Earnings,By Race
1,2021,M10,4th Quarter,1129.0,[{}],LEU0252883900,,Earnings - White Men,,,,2021-10-01,Earnings,By Race and Gender
1,2021,M10,4th Quarter,939.0,[{}],LEU0252884200,,Earnings - White Women,,,,2021-10-01,Earnings,By Race and Gender
1,2021,M10,4th Quarter,805.0,[{}],LEU0252884500,,Earnings - Black People,,,,2021-10-01,Earnings,By Race
1,2021,M10,4th Quarter,807.0,[{}],LEU0252884800,,Earnings - Black Men,,,,2021-10-01,Earnings,By Race and Gender
1,2021,M10,4th Quarter,802.0,[{}],LEU0252885100,,Earnings - Black Women,,,,2021-10-01,Earnings,By Race and Gender
1,2021,M10,4th Quarter,1384.0,[{}],LEU0254468400,,Earnings - Asian People,,,,2021-10-01,Earnings,By Race


In [144]:
xdf = data_parse(df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cpi['Category'] = 'CPI - ' + df_cpi['Category']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cpi['href'] = np.where(df_cpi['Leaf'] == 0, arg_text + '&parent=' + df_cpi['series'], 'None')


In [145]:
xdf[xdf['Type'] == 'Earnings']

Unnamed: 0,year,period,periodName,value,footnotes,series,latest,Category,Parent Series ID,Level,Leaf,date,Type,Bucket,href,baseline,change


In [146]:
baseline_dict = {}
min_dt = df[['date','value','Category']].groupby('Category').date.min().to_dict()
for k,v in min_dt.items():
    min_dt[k] = max(v,pd.to_datetime('1/1/2000'))
for k,v in min_dt.items():
    baseline_dict[k] = df[(df['Category'] == k) & (df['date'] == v)].value.item()
df['baseline'] = df['Category'].map(baseline_dict) 
df['change'] = df['value']/df['baseline'] - 1

In [147]:
df[df['Type'] == 'Earnings']

Unnamed: 0,year,period,periodName,value,footnotes,series,latest,Category,Parent Series ID,Level,Leaf,date,Type,Bucket,href,baseline,change
0,1989,M10,4th Quarter,408.0,[{}],LEU0252881500,,Earnings - All People,,,,1989-10-01,Earnings,Total,https://apps-summer22.ischool.berkeley.edu/inf...,573.0,-0.287958
1,1989,M07,3rd Quarter,395.0,[{}],LEU0252881500,,Earnings - All People,,,,1989-07-01,Earnings,Total,https://apps-summer22.ischool.berkeley.edu/inf...,573.0,-0.310646
2,1989,M04,2nd Quarter,398.0,[{}],LEU0252881500,,Earnings - All People,,,,1989-04-01,Earnings,Total,https://apps-summer22.ischool.berkeley.edu/inf...,573.0,-0.305410
3,1989,M01,1st Quarter,397.0,[{}],LEU0252881500,,Earnings - All People,,,,1989-01-01,Earnings,Total,https://apps-summer22.ischool.berkeley.edu/inf...,573.0,-0.307155
4,1988,M10,4th Quarter,392.0,[{}],LEU0252881500,,Earnings - All People,,,,1988-10-01,Earnings,Total,https://apps-summer22.ischool.berkeley.edu/inf...,573.0,-0.315881
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44,2011,M01,1st Quarter,1336.0,[{}],LEU0252919700,,Earnings - Advanced Degree,,,,2011-01-01,Earnings,By Education,https://apps-summer22.ischool.berkeley.edu/inf...,997.0,0.340020
45,2010,M10,4th Quarter,1334.0,[{}],LEU0252919700,,Earnings - Advanced Degree,,,,2010-10-01,Earnings,By Education,https://apps-summer22.ischool.berkeley.edu/inf...,997.0,0.338014
46,2010,M07,3rd Quarter,1368.0,[{}],LEU0252919700,,Earnings - Advanced Degree,,,,2010-07-01,Earnings,By Education,https://apps-summer22.ischool.berkeley.edu/inf...,997.0,0.372116
47,2010,M04,2nd Quarter,1341.0,[{}],LEU0252919700,,Earnings - Advanced Degree,,,,2010-04-01,Earnings,By Education,https://apps-summer22.ischool.berkeley.edu/inf...,997.0,0.345035


In [148]:
df[((df['periodName'] == '4th Quarter') & (df['year'] == 2021))]

Unnamed: 0,year,period,periodName,value,footnotes,series,latest,Category,Parent Series ID,Level,Leaf,date,Type,Bucket,href,baseline,change
1,2021,M10,4th Quarter,1010.0,[{}],LEU0252881500,,Earnings - All People,,,,2021-10-01,Earnings,Total,https://apps-summer22.ischool.berkeley.edu/inf...,573.0,0.762653
1,2021,M10,4th Quarter,1103.0,[{}],LEU0252881800,,Earnings - Men,,,,2021-10-01,Earnings,By Gender,https://apps-summer22.ischool.berkeley.edu/inf...,641.0,0.720749
1,2021,M10,4th Quarter,930.0,[{}],LEU0252882700,,Earnings - Women,,,,2021-10-01,Earnings,By Gender,https://apps-summer22.ischool.berkeley.edu/inf...,489.0,0.90184
1,2021,M10,4th Quarter,1030.0,[{}],LEU0252883600,,Earnings - White People,,,,2021-10-01,Earnings,By Race,https://apps-summer22.ischool.berkeley.edu/inf...,588.0,0.751701
1,2021,M10,4th Quarter,1129.0,[{}],LEU0252883900,,Earnings - White Men,,,,2021-10-01,Earnings,By Race and Gender,https://apps-summer22.ischool.berkeley.edu/inf...,662.0,0.705438
1,2021,M10,4th Quarter,939.0,[{}],LEU0252884200,,Earnings - White Women,,,,2021-10-01,Earnings,By Race and Gender,https://apps-summer22.ischool.berkeley.edu/inf...,498.0,0.885542
1,2021,M10,4th Quarter,805.0,[{}],LEU0252884500,,Earnings - Black People,,,,2021-10-01,Earnings,By Race,https://apps-summer22.ischool.berkeley.edu/inf...,469.0,0.716418
1,2021,M10,4th Quarter,807.0,[{}],LEU0252884800,,Earnings - Black Men,,,,2021-10-01,Earnings,By Race and Gender,https://apps-summer22.ischool.berkeley.edu/inf...,507.0,0.591716
1,2021,M10,4th Quarter,802.0,[{}],LEU0252885100,,Earnings - Black Women,,,,2021-10-01,Earnings,By Race and Gender,https://apps-summer22.ischool.berkeley.edu/inf...,424.0,0.891509
1,2021,M10,4th Quarter,1384.0,[{}],LEU0254468400,,Earnings - Asian People,,,,2021-10-01,Earnings,By Race,https://apps-summer22.ischool.berkeley.edu/inf...,602.0,1.299003


In [164]:
#rdf = df[(df['periodName'] == '4th Quarter') & (df['year'] == 2021)]
#rdf[rdf['Category'] == 'Earnings']
rdf = df[(df['periodName'] == '4th Quarter') & (df['year'] == 2021)]
rdf = df[(df['date'] == '12/1/2021') | ((df['periodName'] == '4th Quarter') & (df['year'] == 2021))]
rdf = rdf[rdf['Type'] == 'Earnings']

asdf = alt.Chart(rdf[['date','Category','value']][(rdf['date'] == '12/1/2021') | ((rdf['periodName'] == '4th Quarter') & (rdf['year'] == 2021))],title='Change Since ' + str(2000) + ' by Category').mark_bar().encode(
            x = alt.X('Category', sort='y'),
            y = alt.Y('value', title='Change Since ' + str(2000), axis=alt.Axis(format='%')),
            color = alt.Color('Category'),
            tooltip = 'Category'
        ).properties(height=400, width=600)

In [150]:
df[df['Type'] == 'Earnings'].iloc[4].periodName == '4th Quarter'

True

In [165]:
alt.Chart(rdf).mark_bar().encode(
            x = alt.X('Category', sort='y'),
            y = alt.Y('value', title='Change Since ' + str(2000), axis=alt.Axis(format='%')),
            color = alt.Color('Category'),
            tooltip = 'Category'
)

In [180]:
zdf = df[['date','change','Category','href']][(df['date'] == '12/1/2021') | ((df['periodName'] == '4th Quarter') & (df['year'] == '2021'))]

In [181]:
zdf[zdf['Category'] == 'Earnings - Black Men']
qdf = zdf[zdf['Category'] == 'Earnings - White Men']

In [187]:
df.iloc[7].year == '1989'

False

In [182]:
alt.Chart(qdf).mark_bar().encode(
            x = alt.X('Category', sort='y'),
            y = alt.Y('change', title='Change Since ' + str(2000), axis=alt.Axis(format='%')),
            color = alt.Color('Category'),
            tooltip = 'Category'
)

