In [1]:
#Imports
import pandas as pd
import altair as alt
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm

# V2 of State-level combined data for Drought and Energy

Based on Usability Study Feedback, re-working the state drought and energy consumption visualization to have the state energy consumption as horizontal bar graph and a weekly drought line chart for each state that will be interactive for the user. 

In [2]:
#Read in Energy and State Data
df = pd.read_csv("2016countyenergyprofiles_clean.csv")
state_info = pd.read_csv('state_info.csv')
state_info = state_info.rename(columns={"STUSAB": "state_abbr", "STATE": "state_map_id"})
state_info = state_info.set_index('state_abbr').drop(['STATENS'], axis=1)
consumption_st = df.join(state_info, on='state_abbr', how='left')
consumption_st.STATE_NAME.astype(str)

consumption_st.head(5)

Unnamed: 0,state_id,state_abbr,county_state_name,county_id,county_name,latitude,longitude,consolidated_city-county,population,population_cohort,...,GHG emissions mtons CO2e_Industry_Natural Gas,GHG emissions mtons CO2e_On-road Transportation_Gasoline,GHG emissions mtons CO2e_On-road Transportation_Diesel,egrid_primary_subregion_Emissions factors_Electricity,CO2 equivalent total output emission rate (mton/MWh)_Emissions factors_Electricity,CO2 equivalent total output emission rate (mton/TcF)_Emissions factors_Natural Gas,CO2 equivalent total output emission rate (mton/gallon)_Emissions factors_Gasoline,CO2 equivalent total output emission rate (mton/gallon)_Emissions factors_Diesel,state_map_id,STATE_NAME
0,1,AL,"Autauga County, AL",1001,Autauga County,32.532237,-86.64644,,55049,7,...,179434.771263,273472.6,145622.149678,SRSO,0.496737,0.054981,0.010795,0.012655,1,Alabama
1,1,AL,"Baldwin County, AL",1003,Baldwin County,30.659218,-87.746067,,199510,11,...,91485.936972,1252990.0,543861.890408,SRSO,0.496737,0.054981,0.010795,0.012655,1,Alabama
2,1,AL,"Barbour County, AL",1005,Barbour County,31.870253,-85.405104,,26614,6,...,83909.287325,160952.1,50827.577511,SRSO,0.496737,0.054981,0.010795,0.012655,1,Alabama
3,1,AL,"Bibb County, AL",1007,Bibb County,33.015893,-87.127148,,22572,5,...,205.749803,127454.2,47793.274145,SRSO,0.496737,0.054981,0.010795,0.012655,1,Alabama
4,1,AL,"Blount County, AL",1009,Blount County,33.977358,-86.56644,,57704,8,...,7522.084463,253915.7,126512.101584,SRTV,0.540971,0.054981,0.010795,0.012655,1,Alabama


In [3]:
#Read in Drought Data
drought_df = pd.read_csv('drought_monitor_export_20160101_20161231.csv')
#Since visualization is focusing on states; drop Washington DC, Puerto Rico 
drought_df = drought_df.drop(drought_df.loc\
                             [drought_df['Name'].isin(['District of Columbia', 'Puerto Rico'])].index)
#Change 'Name' to 'STATE_NAME'
drought_df = drought_df.rename(columns = {'Name': 'STATE_NAME'})

#Add in a row for US average by week as a point of comparison
#Calculate the average DSCI for each week 
avg_DSCI_by_week = drought_df.groupby('MapDate')['DSCI'].mean().reset_index()
avg_DSCI_by_week['STATE_NAME'] = 'US Average'

#Append the averages to the original drought_df
drought_df = pd.concat([drought_df, avg_DSCI_by_week])


## Create Line Chart for State-level drought by week 

In [4]:
# # Aggregate by state average to utilize for custom color palette 
mean_state_drought = pd.DataFrame({'mean_DSCI': drought_df.groupby('STATE_NAME')['DSCI'].mean().round(decimals = 2)}).reset_index()
mean_state_drought.STATE_NAME.astype(str)
#mean_state_drought.to_csv('mean_state_drought_data.csv')

0            Alabama
1             Alaska
2            Arizona
3           Arkansas
4         California
5           Colorado
6        Connecticut
7           Delaware
8            Florida
9            Georgia
10            Hawaii
11             Idaho
12          Illinois
13           Indiana
14              Iowa
15            Kansas
16          Kentucky
17         Louisiana
18             Maine
19          Maryland
20     Massachusetts
21          Michigan
22         Minnesota
23       Mississippi
24          Missouri
25           Montana
26          Nebraska
27            Nevada
28     New Hampshire
29        New Jersey
30        New Mexico
31          New York
32    North Carolina
33      North Dakota
34              Ohio
35          Oklahoma
36            Oregon
37      Pennsylvania
38      Rhode Island
39    South Carolina
40      South Dakota
41         Tennessee
42             Texas
43        US Average
44              Utah
45           Vermont
46          Virginia
47        Was

In [5]:
#Create custom color palette for Line Chart where color maps to the mean DSCI score\
#for that state

#Bins for # of states
num_bins = 50

#Create color map from green to red
cmap = cm.get_cmap('RdYlGn_r', num_bins)

# bin mean_DSCI values into num_bins bins
bins = pd.cut(mean_state_drought['mean_DSCI'], bins = num_bins, labels = False)

# convert bins to colors in hex format
colors = [cmap(x) for x in bins]
hex_colors = [cm.colors.rgb2hex(x) for x in colors]

# add new column to data frame with hex color codes for double checking
mean_state_drought['color'] = hex_colors

In [6]:
#convert MapDate column to date/time dtype 
drought_df['date'] = pd.to_datetime(drought_df['MapDate'], format = '%Y%m%d')
drought_df['month_date'] = drought_df['date'].dt.strftime('%m/%d')

#Join mean drought and color dataframe to drought df
drought_df = drought_df.merge(mean_state_drought, how = 'left', on='STATE_NAME')

In [7]:
#Dark mode for all plots, easier to see the line chart
alt.themes.enable('dark')

#Seletion tool to be used across all plots: 
click = alt.selection_multi(fields=['STATE_NAME'], toggle="true")

#Line chart for drought DSCI score per state per week 
drought_line_chart = alt.Chart(drought_df, title = 'State Drought Severity by Week')\
.mark_line().encode(
x=alt.X('month_date:O', axis=alt.Axis(title='Week')),
y=alt.Y('DSCI:Q', axis=alt.Axis(title='Drought Severity Coverage Index (DSCI)')),
color=alt.Color('STATE_NAME:N', legend=None, scale= alt.Scale(range = hex_colors)),
tooltip=alt.Tooltip('STATE_NAME:N', title='State'),
text= 'state_abbr:N',
opacity = alt.condition(click, alt.value(1), alt.value(0.05))
).add_selection(click)

drought_line_chart

## Create a stacked bar chart for each energy type: Electricity, Natural Gas, Fuel (gasoline + Diesel)



In [8]:
#Join drought data with consumption data: 
drought_consumption_st = consumption_st.merge(mean_state_drought, how = 'left', on='STATE_NAME')

In [9]:
drought_consumption_st.shape

(3142, 173)

In [10]:
#Creating the Electricity Consumption dataset
#Electricity consumption dataframe aggregation
consumption_elec_pop = drought_consumption_st.groupby(['STATE_NAME','state_map_id']).agg({
                                'population': 'sum',
                                'consumption (MWh/capita)_Residential_Electricity': 'mean',                                                          
                                'consumption (MWh/capita)_Local calibration_Commercial_Electricity': 'mean',
                                'consumption (MWh/capita)_Industry_Electricity': 'mean',
                                'mean_DSCI': 'first',
                                'state_abbr': 'first'                                                        
                               }).reset_index().reset_index(drop = True)
consumption_elec_pop['total MWh/capita'] = consumption_elec_pop['consumption (MWh/capita)_Residential_Electricity']\
                                            + consumption_elec_pop['consumption (MWh/capita)_Local calibration_Commercial_Electricity']\
                                            + consumption_elec_pop['consumption (MWh/capita)_Industry_Electricity']

#Drop District of Columbia: 
consumption_elec_pop = consumption_elec_pop.drop(consumption_elec_pop.loc\
                             [consumption_elec_pop['STATE_NAME'].isin(['District of Columbia', 'Puerto Rico'])].index)
#Rename consumption type: 
consumption_elec_pop = consumption_elec_pop.rename\
(columns = {'consumption (MWh/capita)_Residential_Electricity':\
            'Residential Consumption',\
            'consumption (MWh/capita)_Local calibration_Commercial_Electricity':\
           'Commercial Consumption',\
           'consumption (MWh/capita)_Industry_Electricity':\
           'Industry Consumption'})

#Duplicate consumption columns to later use as tooltip in chart
consumption_elec_pop['Residential Consumption (MWh/capita)'] = consumption_elec_pop['Residential Consumption']
consumption_elec_pop['Commercial Consumption (MWh/capita)'] = consumption_elec_pop['Commercial Consumption']
consumption_elec_pop['Industry Consumption (MWh/capita)'] = consumption_elec_pop['Industry Consumption']

#Transform df into a long format for stacked bar chart: 
consumption_elec_pop_long = pd.melt(consumption_elec_pop, id_vars=['STATE_NAME', 'state_map_id', 'population',\
                                                                   'mean_DSCI', 'state_abbr', 'total MWh/capita',\
                                                                  'Residential Consumption (MWh/capita)',\
                                                                  'Commercial Consumption (MWh/capita)',\
                                                                  'Industry Consumption (MWh/capita)'], 
                  value_vars=['Residential Consumption', 
                              'Commercial Consumption', 
                              'Industry Consumption'], 
                  var_name='Electricity Type', value_name='Consumption (MWh/capita)')

#Add in a row for US average by week as a point of comparison
average_elec_consumption = consumption_elec_pop_long.groupby('Electricity Type').agg({
    'Residential Consumption (MWh/capita)': 'mean',
    'Commercial Consumption (MWh/capita)': 'mean',
    'Industry Consumption (MWh/capita)': 'mean',
    'total MWh/capita': 'mean',
    'Consumption (MWh/capita)': 'mean'
}).reset_index()

new_row1 = {'STATE_NAME': 'US Average', 'state_map_id': None, 'population': 334591598, 'mean_DSCI': None,
           'state_abbr': None, 'total MWh/capita': average_elec_consumption.loc[0, 'total MWh/capita'],
           'Residential Consumption (MWh/capita)': average_elec_consumption.loc[0, 'Residential Consumption (MWh/capita)'],
           'Commercial Consumption (MWh/capita)': average_elec_consumption.loc[0, 'Commercial Consumption (MWh/capita)'],
           'Industry Consumption (MWh/capita)': average_elec_consumption.loc[0, 'Industry Consumption (MWh/capita)'],
           'Electricity Type': average_elec_consumption.loc[0, 'Electricity Type'], 
            'Consumption (MWh/capita)': average_elec_consumption.loc[0, 'Consumption (MWh/capita)']}

new_row2 = {'STATE_NAME': 'US Average', 'state_map_id': None, 'population': 334591598, 'mean_DSCI': None,
           'state_abbr': None, 'total MWh/capita': average_elec_consumption.loc[1, 'total MWh/capita'],
           'Residential Consumption (MWh/capita)': average_elec_consumption.loc[1, 'Residential Consumption (MWh/capita)'],
           'Commercial Consumption (MWh/capita)': average_elec_consumption.loc[1, 'Commercial Consumption (MWh/capita)'],
           'Industry Consumption (MWh/capita)': average_elec_consumption.loc[1, 'Industry Consumption (MWh/capita)'],
           'Electricity Type': average_elec_consumption.loc[1, 'Electricity Type'], 
            'Consumption (MWh/capita)': average_elec_consumption.loc[1, 'Consumption (MWh/capita)']}

new_row3 = {'STATE_NAME': 'US Average', 'state_map_id': None, 'population': 334591598, 'mean_DSCI': None,
           'state_abbr': None, 'total MWh/capita': average_elec_consumption.loc[2, 'total MWh/capita'],
           'Residential Consumption (MWh/capita)': average_elec_consumption.loc[2, 'Residential Consumption (MWh/capita)'],
           'Commercial Consumption (MWh/capita)': average_elec_consumption.loc[2, 'Commercial Consumption (MWh/capita)'],
           'Industry Consumption (MWh/capita)': average_elec_consumption.loc[2, 'Industry Consumption (MWh/capita)'],
           'Electricity Type': average_elec_consumption.loc[2, 'Electricity Type'], 
            'Consumption (MWh/capita)': average_elec_consumption.loc[2, 'Consumption (MWh/capita)']}
#Append new rows to long format df
consumption_elec_pop_long = consumption_elec_pop_long.append(new_row1, ignore_index = True)
consumption_elec_pop_long = consumption_elec_pop_long.append(new_row2, ignore_index = True)
consumption_elec_pop_long = consumption_elec_pop_long.append(new_row3, ignore_index = True)

#Round all numbers in DF to 2 decimals 
consumption_elec_pop_long = consumption_elec_pop_long.round(2)

In [11]:
#Create stacked bar chart for electricity 

color_scale = alt.Scale(domain=['Commercial Consumption', 'Industry Consumption', 'Residential Consumption',
                               'Diesel Consumption', 'Gasoline Consumption'],
                        range=['#2D806E', '#B4A817', '#AA5540','#D5966D','#4E79A7'])
electricity_chart = alt.Chart(consumption_elec_pop_long,\
                              title="Electricity Consumption per Capita").\
mark_bar().encode(
x=alt.X('Consumption (MWh/capita):Q', axis=alt.Axis(title='Consumption (Megawatt-hour/capita)')),
y=alt.Y('STATE_NAME:N', axis=alt.Axis(title='State')),
color=alt.Color('Electricity Type:N', scale = color_scale),
tooltip=[alt.Tooltip('STATE_NAME:N', title='State'),
         alt.Tooltip('population:Q', title='Population', format=",.0f"),
         alt.Tooltip('total MWh/capita:Q'),
         alt.Tooltip('Residential Consumption (MWh/capita):Q', title='Residential MWh/Capita'),
         alt.Tooltip('Commercial Consumption (MWh/capita):Q', title='Commercial MWh/Capita'), 
         alt.Tooltip('Industry Consumption (MWh/capita):Q', title='Industry MWh/Capita')],
opacity = alt.condition(click, alt.value(1), alt.value(0.2))
).add_selection(click)


electricity_chart

In [12]:
#Creating the Natural Gas Consumption dataset
#Natural Gas consumption dataframe aggregation 
consumption_natgas_pop = drought_consumption_st.groupby(['STATE_NAME','state_map_id']).agg({
                                'population': 'sum',
                                'consumption (TcF/capita)_Residential_Natural Gas': 'mean',                                                          
                                'consumption (TcF/capita)_Local calibration_Commercial_Natural Gas': 'mean',
                                'consumption (TcF/capita)_Industry_Natural Gas': 'mean',
                                'mean_DSCI': 'first',
                                'state_abbr': 'first'                                                        
                               }).reset_index().reset_index(drop = True)
consumption_natgas_pop['total TcF/capita'] = consumption_natgas_pop['consumption (TcF/capita)_Residential_Natural Gas']\
                                            + consumption_natgas_pop['consumption (TcF/capita)_Local calibration_Commercial_Natural Gas']\
                                            + consumption_natgas_pop['consumption (TcF/capita)_Industry_Natural Gas']

#Drop District of Columbia: 
consumption_natgas_pop = consumption_natgas_pop.drop(consumption_natgas_pop.loc\
                             [consumption_natgas_pop['STATE_NAME'].isin(['District of Columbia', 'Puerto Rico'])].index)
#Rename consumption type: 
consumption_natgas_pop = consumption_natgas_pop.rename\
(columns = {'consumption (TcF/capita)_Residential_Natural Gas':\
            'Residential Consumption',\
            'consumption (TcF/capita)_Local calibration_Commercial_Natural Gas':\
           'Commercial Consumption',\
           'consumption (TcF/capita)_Industry_Natural Gas':\
           'Industry Consumption'})

#Duplicate consumption columns to later use as tooltip in chart
consumption_natgas_pop['Residential Consumption (TcF/capita)'] = consumption_natgas_pop['Residential Consumption']
consumption_natgas_pop['Commercial Consumption (TcF/capita)'] = consumption_natgas_pop['Commercial Consumption']
consumption_natgas_pop['Industry Consumption (TcF/capita)'] = consumption_natgas_pop['Industry Consumption']

#Transform df into a long format for stacked bar chart: 
consumption_natgas_pop_long = pd.melt(consumption_natgas_pop, id_vars=['STATE_NAME', 'state_map_id', 'population',\
                                                                   'mean_DSCI', 'state_abbr', 'total TcF/capita',\
                                                                  'Residential Consumption (TcF/capita)',\
                                                                  'Commercial Consumption (TcF/capita)',\
                                                                  'Industry Consumption (TcF/capita)'], 
                  value_vars=['Residential Consumption', 
                              'Commercial Consumption', 
                              'Industry Consumption'], 
                  var_name='Natural Gas Type', value_name='Consumption (TcF/capita)')

#Add in a row for US average by week as a point of comparison
average_natgas_consumption = consumption_natgas_pop_long.groupby('Natural Gas Type').agg({
    'Residential Consumption (TcF/capita)': 'mean',
    'Commercial Consumption (TcF/capita)': 'mean',
    'Industry Consumption (TcF/capita)': 'mean',
    'total TcF/capita': 'mean',
    'Consumption (TcF/capita)': 'mean'
}).reset_index()

#hardcoding in the US population 
new_row1 = {'STATE_NAME': 'US Average', 'state_map_id': None, 'population': 334591598, 'mean_DSCI': None,
           'state_abbr': None, 'total TcF/capita': average_natgas_consumption.loc[0, 'total TcF/capita'],
           'Residential Consumption (TcF/capita)': average_natgas_consumption.loc[0, 'Residential Consumption (TcF/capita)'],
           'Commercial Consumption (TcF/capita)': average_natgas_consumption.loc[0, 'Commercial Consumption (TcF/capita)'],
           'Industry Consumption (TcF/capita)': average_natgas_consumption.loc[0, 'Industry Consumption (TcF/capita)'],
           'Natural Gas Type': average_natgas_consumption.loc[0, 'Natural Gas Type'], 
            'Consumption (TcF/capita)': average_natgas_consumption.loc[0, 'Consumption (TcF/capita)']}

new_row2 = {'STATE_NAME': 'US Average', 'state_map_id': None, 'population': 334591598, 'mean_DSCI': None,
           'state_abbr': None, 'total TcF/capita': average_natgas_consumption.loc[1, 'total TcF/capita'],
           'Residential Consumption (TcF/capita)': average_natgas_consumption.loc[1, 'Residential Consumption (TcF/capita)'],
           'Commercial Consumption (TcF/capita)': average_natgas_consumption.loc[1, 'Commercial Consumption (TcF/capita)'],
           'Industry Consumption (TcF/capita)': average_natgas_consumption.loc[1, 'Industry Consumption (TcF/capita)'],
           'Natural Gas Type': average_natgas_consumption.loc[1, 'Natural Gas Type'], 
            'Consumption (TcF/capita)': average_natgas_consumption.loc[1, 'Consumption (TcF/capita)']}

new_row3 = {'STATE_NAME': 'US Average', 'state_map_id': None, 'population': 334591598, 'mean_DSCI': None,
           'state_abbr': None, 'total TcF/capita': average_natgas_consumption.loc[2, 'total TcF/capita'],
           'Residential Consumption (TcF/capita)': average_natgas_consumption.loc[2, 'Residential Consumption (TcF/capita)'],
           'Commercial Consumption (TcF/capita)': average_natgas_consumption.loc[2, 'Commercial Consumption (TcF/capita)'],
           'Industry Consumption (TcF/capita)': average_natgas_consumption.loc[2, 'Industry Consumption (TcF/capita)'],
           'Natural Gas Type': average_natgas_consumption.loc[2, 'Natural Gas Type'], 
            'Consumption (TcF/capita)': average_natgas_consumption.loc[2, 'Consumption (TcF/capita)']}

#Append new rows to long format df
consumption_natgas_pop_long = consumption_natgas_pop_long.append(new_row1, ignore_index = True)
consumption_natgas_pop_long = consumption_natgas_pop_long.append(new_row2, ignore_index = True)
consumption_natgas_pop_long = consumption_natgas_pop_long.append(new_row3, ignore_index = True)

#Round all numbers in DF to 2 decimals 
consumption_natgas_pop_long = consumption_natgas_pop_long.round(2)

In [13]:
#Create stacked bar chart for Natural Gas 

naturalgas_chart = alt.Chart(consumption_natgas_pop_long,\
                              title="Natural Gas Consumption per Capita").\
mark_bar().encode(
x=alt.X('Consumption (TcF/capita):Q', axis=alt.Axis(title='Consumption (Trillion Cubic Feet/capita)')),
y=alt.Y('STATE_NAME:N', axis=alt.Axis(title= None)),
color=alt.Color('Natural Gas Type:N', scale = color_scale),
tooltip=[alt.Tooltip('STATE_NAME:N', title='State'),
         alt.Tooltip('population:Q', title='Population', format=",.0f"),
         alt.Tooltip('total TcF/capita:Q'),
         alt.Tooltip('Residential Consumption (TcF/capita):Q', title='Residential TcF/Capita'),
         alt.Tooltip('Commercial Consumption (TcF/capita):Q', title='Commercial TcF/Capita'), 
         alt.Tooltip('Industry Consumption (TcF/capita):Q', title='Industry TcF/Capita')],
opacity = alt.condition(click, alt.value(1), alt.value(0.2))
).add_selection(click)

naturalgas_chart

In [14]:
#Creating the Fuel Consumption dataset
#Fuel consumption dataframe aggregation
consumption_fuel_pop = drought_consumption_st.groupby(['STATE_NAME','state_map_id']).agg({
                                'population': 'sum',
                                'consumption (gallons/capita)_On Road Transportation_Gasoline': 'mean',                                                          
                                'consumption (gallons/capita)_On Road Transportation_Diesel': 'mean',
                                'mean_DSCI': 'first',
                                'state_abbr': 'first'                                                        
                               }).reset_index().reset_index(drop = True)
consumption_fuel_pop['total gallons/capita'] = consumption_fuel_pop['consumption (gallons/capita)_On Road Transportation_Gasoline']\
                                            + consumption_fuel_pop['consumption (gallons/capita)_On Road Transportation_Diesel']\

#Drop District of Columbia: 
consumption_fuel_pop = consumption_fuel_pop.drop(consumption_fuel_pop.loc\
                             [consumption_fuel_pop['STATE_NAME'].isin(['District of Columbia', 'Puerto Rico'])].index)
#Rename consumption type: 
consumption_fuel_pop = consumption_fuel_pop.rename\
(columns = {'consumption (gallons/capita)_On Road Transportation_Gasoline':\
            'Gasoline Consumption',\
            'consumption (gallons/capita)_On Road Transportation_Diesel':\
           'Diesel Consumption'})

#Duplicate consumption columns to later use as tooltip in chart
consumption_fuel_pop['Gasoline Consumption (gallons/capita)'] = consumption_fuel_pop['Gasoline Consumption']
consumption_fuel_pop['Diesel Consumption (gallons/capita)'] = consumption_fuel_pop['Diesel Consumption']

#Transform df into a long format for stacked bar chart: 
consumption_fuel_pop_long = pd.melt(consumption_fuel_pop, id_vars=['STATE_NAME', 'state_map_id', 'population',\
                                                                   'mean_DSCI', 'state_abbr', 'total gallons/capita',\
                                                                  'Gasoline Consumption (gallons/capita)',\
                                                                  'Diesel Consumption (gallons/capita)'], 
                  value_vars=['Gasoline Consumption', 
                              'Diesel Consumption'], 
                  var_name='Fuel Type', value_name='Consumption (gallons/capita)')

#Add in a row for US average by week as a point of comparison
average_fuel_consumption = consumption_fuel_pop_long.groupby('Fuel Type').agg({
    'Gasoline Consumption (gallons/capita)': 'mean',
    'Diesel Consumption (gallons/capita)': 'mean',
    'total gallons/capita': 'mean',
    'Consumption (gallons/capita)': 'mean'
}).reset_index()

new_row1 = {'STATE_NAME': 'US Average', 'state_map_id': None, 'population': 334591598, 'mean_DSCI': None,
           'state_abbr': None, 'total gallons/capita': average_fuel_consumption.loc[0, 'total gallons/capita'],
           'Gasoline Consumption (gallons/capita)': average_fuel_consumption.loc[0, 'Gasoline Consumption (gallons/capita)'],
           'Diesel Consumption (gallons/capita)': average_fuel_consumption.loc[0, 'Diesel Consumption (gallons/capita)'],
           'Fuel Type': average_fuel_consumption.loc[0, 'Fuel Type'], 
            'Consumption (gallons/capita)': average_fuel_consumption.loc[0, 'Consumption (gallons/capita)']}

new_row2 = {'STATE_NAME': 'US Average', 'state_map_id': None, 'population': 334591598, 'mean_DSCI': None,
           'state_abbr': None, 'total gallons/capita': average_fuel_consumption.loc[1, 'total gallons/capita'],
           'Gasoline Consumption (gallons/capita)': average_fuel_consumption.loc[1, 'Gasoline Consumption (gallons/capita)'],
           'Diesel Consumption (gallons/capita)': average_fuel_consumption.loc[1, 'Diesel Consumption (gallons/capita)'],
           'Fuel Type': average_fuel_consumption.loc[1, 'Fuel Type'], 
            'Consumption (gallons/capita)': average_fuel_consumption.loc[1, 'Consumption (gallons/capita)']}

#Append new rows to long format df
consumption_fuel_pop_long = consumption_fuel_pop_long.append(new_row1, ignore_index = True)
consumption_fuel_pop_long = consumption_fuel_pop_long.append(new_row2, ignore_index = True)

#Round all numbers in DF to 2 decimals 
consumption_fuel_pop_long = consumption_fuel_pop_long.round(2)

In [15]:
#Create stacked bar chart for Fuel 
#Use different color scale because we only have 2 groups: 
color_scale_fuel = alt.Scale(domain=['Diesel Consumption', 'Gasoline Consumption'],
                        range=['#D5966D', '#4E79A7'])

fuel_chart = alt.Chart(consumption_fuel_pop_long,\
                              title="Fuel Consumption per Capita").\
mark_bar().encode(
x=alt.X('Consumption (gallons/capita):Q', axis=alt.Axis(title='Consumption (gallons/capita)')),
y=alt.Y('STATE_NAME:N', axis=alt.Axis(title= None)),
color=alt.Color('Fuel Type:N', scale = color_scale),
tooltip=[alt.Tooltip('STATE_NAME:N', title='State'),
         alt.Tooltip('population:Q', title='Population', format=",.0f"),
         alt.Tooltip('total gallons/capita:Q'),
         alt.Tooltip('Gasoline Consumption (gallons/capita):Q', title='Gasoline Gallons/Capita'),
         alt.Tooltip('Diesel Consumption (gallons/capita):Q', title='Gallons/Capita')], 
opacity = alt.condition(click, alt.value(1), alt.value(0.2))
).add_selection(click)

fuel_chart

In [16]:
#Combining the 4 charts side by side
#Note that you can select multiple states at a time with shift+click

barcharts = alt.hconcat(electricity_chart.properties(height = 800, width = 200),
                        naturalgas_chart.properties(height = 800, width = 200),
                        fuel_chart.properties(height = 800, width = 200))

# Combine charts 1-3 and chart 4 vertically
combined_charts = alt.vconcat(barcharts, drought_line_chart.properties(height = 400, width = 900)).properties(resolve = alt.Resolve(scale=alt.LegendResolveMap(color=alt.ResolveMode('independent'))))
combined_charts

In [17]:
with open('spec_v2.json', 'w') as f:
    f.write(combined_charts.to_json())

combined_charts.save('drought_consumption_file_v2.html')

In [18]:
alt.renderers.enable('json')
combined_charts.to_json()

'{\n  "$schema": "https://vega.github.io/schema/vega-lite/v4.17.0.json",\n  "config": {\n    "view": {\n      "continuousHeight": 300,\n      "continuousWidth": 400\n    }\n  },\n  "datasets": {\n    "data-59f9c215fc29cc06d6f9ba2bcadf7ca6": [\n      {\n        "Commercial Consumption (MWh/capita)": 3.43,\n        "Consumption (MWh/capita)": 5.76,\n        "Electricity Type": "Residential Consumption",\n        "Industry Consumption (MWh/capita)": 8.63,\n        "Residential Consumption (MWh/capita)": 5.76,\n        "STATE_NAME": "Alabama",\n        "mean_DSCI": 124.7,\n        "population": 4841164,\n        "state_abbr": "AL",\n        "state_map_id": 1,\n        "total MWh/capita": 17.82\n      },\n      {\n        "Commercial Consumption (MWh/capita)": 2.09,\n        "Consumption (MWh/capita)": 2.24,\n        "Electricity Type": "Residential Consumption",\n        "Industry Consumption (MWh/capita)": 4.41,\n        "Residential Consumption (MWh/capita)": 2.24,\n        "STATE_NAME":