# Exploring DSIRE Database of State Incentives for Renewables & Efficiency
***

In [2]:
import pandas as pd
pd.options.display.float_format = '{:,.2f}'.format # display numbers as with thousands separator
pd.set_option('display.max_colwidth', None) # display full text in columns

from IPython.display import display, HTML
# Custom CSS to align DataFrame values to the right
css_rules = """
<style>
    table.dataframe th, table.dataframe td {
        text-align: right !important;
    }
</style>
"""
# Apply the custom CSS
display(HTML(css_rules))

In [3]:
dsire = pd.read_csv('Data/DSIRE/clean_dsire_data_off.csv')

# These will help to show display useful information since our main dataset uses id's instead of names
tech_dict = pd.read_csv('Data/DSIRE/technology_category.csv', usecols=['id', 'name'], index_col='id', dtype={"id" : str})['name'].to_dict()
sect_dict = pd.read_csv('Data/DSIRE/sector.csv', usecols=['id', 'name'], index_col='id', dtype={"id" : str})['name'].to_dict()


## Counts of programs by various attributes
<a class="anchor" id="counts-of-programs-by-various-attributes"></a>
***
Attributes of interest:
1. Policy/Incentive Category
2. Policy/Incentive Type 
3. Implementing Sector (What type of entity enacted the program?)
4. Eligible Technologies (Which technologies does this program support?)
5. Eligible Sectors (Which sectors does this program support?)

In [10]:
cols_interest = ['CategoryName', 'TypeName', 'ImplementingSectorName']

attr_counts = {}

# Sectors and technologies are stored as binary categorical vars (0/1)
sect_counts = {}
for col in dsire.columns:
    if 'Sector_' in col:
        sec_name = sect_dict[col.split('_')[1]] # format is `Sector_id`
        sect_counts[sec_name] = dsire[col].sum() # binary categorical var so sum is the count

# Add sector counts to the collection of attribute counts
sect_series = pd.Series(sect_counts)
attr_counts['Sector'] = sect_series

# See notes about sectors starting line 5
tech_counts = {}
for col in dsire.columns:
    if 'Tech_' in col:
        tech_name = tech_dict[col.split('_')[1]]
        tech_counts[tech_name] = dsire[col].sum()
tech_series = pd.Series(tech_counts)
attr_counts['Tech'] = tech_series

# Find the value counts for the columns of interest
for col in cols_interest:
    attr_counts[col] = dsire[col].value_counts()

attr_counts['ImplementingSectorName'].describe()

count       6.00
mean      410.00
std       537.96
min         1.00
25%        14.25
50%       130.50
75%       843.00
max     1,142.00
Name: count, dtype: float64

## Counts of programs by state and attributes
<a class="anchor" id="counts-of-programs-by-state-and-attributes"></a>
***
Attributes of interest are the same as in [Counts of programs by various attributes](#counts-of-programs-by-various-attributes)