# PV System Categories

In [1]:
# pyathena
from pyathena.connection import Connection
from pyathena.pandas_cursor import PandasCursor

In [2]:
s3_staging_dir = "s3://nrel-tests/tracking_the_sun/"
table_name = "oedi.oedi_dev_tracking_the_sun"

In [3]:
cursor = Connection(region_name="us-west-2", s3_staging_dir=s3_staging_dir).cursor(PandasCursor)

## Unit Price Trend

In [4]:
import numpy as np
import pandas as pd
from ipywidgets import interact
from bokeh.core.properties import value
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap
output_notebook()

In [5]:
pv_price = cursor.execute(
    f"""
    SELECT 
        state,
        CAST(YEAR(date_parse(installation_date, '%c/%e/%y')) AS VARCHAR(4)) AS year,
        system_size,
        total_installed_price,
        ROUND((total_installed_price / system_size), 2) AS unit_installed_price,
        customer_segment
    FROM {table_name}
    WHERE total_installed_price != -9999
    AND system_size != -9999 
    AND system_size != 0
    """
).as_pandas()

In [6]:
states = sorted(pv_price["state"].unique())
customers = pv_price["customer_segment"].unique()

In [8]:
@interact
def show_pv_price_trend(state=states):
    # filter
    state_pv_price = pv_price[pv_price["state"]==state]
    
    # wrapping
    unstack_pv_price = state_pv_price.groupby(["customer_segment", "year"]).mean().round(2)[["unit_installed_price"]].unstack(0)
    customer_pv_price = unstack_pv_price.reindex(pd.Index([str(x) for x in list(range(1998, 2018, 1))], name="year"))["unit_installed_price"]
    
    customers = ["RES", "NON-RES"]
    for customer in customers:
        if customer in customer_pv_price.columns:
            continue
        customer_pv_price.loc[:, customer] = np.NaN
    customer_pv_price = customer_pv_price[customers] #.fillna(0)
    
    # plots
    years = customer_pv_price.index.values
    categories = ["R", "N"]
    data = {
        "years": years,
        "R": customer_pv_price["RES"],
        "N": customer_pv_price["NON-RES"]
    }
    x = [(year, customer) for year in years for customer in categories]
    prices = sum(zip(data["R"], data["N"]), ())
    
    fig = figure(
        title="PV Unit Price Trend",
        x_range=FactorRange(*x),
        sizing_mode="stretch_width",
        plot_height=400,
        tools="hover",
        tooltips="@prices",
        toolbar_location="below"
    )
    
    source = ColumnDataSource(data=dict(x=x, prices=prices))
    colors = ["#718dbf", "#e84d60"]
    fig.vbar(
        source=source,
        x="x",
        top='prices',
        width=0.9,
        fill_color=factor_cmap('x', palette=colors, factors=categories, start=1, end=2),
        line_color=factor_cmap('x', palette=colors, factors=categories, start=1, end=2),
        alpha=0.7
    )
    fig.line(x=years, y=data["R"], line_width=2, line_color="#718dbf")
    fig.line(x=years, y=data["N"], line_width=2, line_color="#e84d60")
    
    fig.xgrid.grid_line_color = None
    show(fig)

interactive(children=(Dropdown(description='state', options=('AR', 'AZ', 'CA', 'CO', 'CT', 'DE', 'FL', 'IL', '…