# Tracking the Sun 2019

OEDI Berkeley Lab’s *Tracking the Sun* report series is dedicated to summarizing installed prices and other trends among grid-connected, distributed solar photovoltaic (PV) systems in the United States. For more information, please refer to https://emp.lbl.gov/tracking-the-sun/

We assume that your OEDI data lake is deployed. In this example, the deployed database in data lake is `oedi_database_test`, the Tracking the Sun data table is `lbnl_tracking_the_sun_2019`.

## 1. Metadata in Table

In this setion, we'll check the database schema of `Tracking the Sun` table, like `Columns` and `Partition` information, via `OEDIGlue` class, so it'll be useful for later on SQL query.

In [1]:
from oedi.AWS.utils.glue import OEDIGlue

In [2]:
database_name = "oedi_database_test"
table_name = "lbnl_tracking_the_sun_2019"

In [3]:
glue = OEDIGlue()

In [4]:
glue.get_table_columns(database_name, table_name)

Unnamed: 0,Name,Type
0,data_provider,string
1,system_id_from_first_data_provider,string
2,system_id_from_second_data_provider_if_applicable,string
3,system_id_tracking_the_sun,string
4,installation_date,date
5,system_size,double
6,total_installed_price,double
7,appraised_value_flag,boolean
8,sales_tax_cost,double
9,rebate_or_grant,double


In [5]:
glue.get_partition_keys(database_name, table_name)

Unnamed: 0,Name,Type
0,state,string


In [6]:
glue.get_partition_values(database_name, table_name)

['OR',
 'ME',
 'NM',
 'KS',
 'AR',
 'UT',
 'OH',
 'CO',
 'MN',
 'IL',
 'MD',
 'WI',
 'NH',
 'VT',
 'CT',
 'NY',
 'CA',
 'WA',
 'MA',
 'DE',
 'TX',
 'FL',
 'DC',
 'RI',
 'AZ',
 'MO',
 'NJ',
 'PA']

## 2. PV System Installation Trend
To visualize the number of PV System installation among states using grid plot.

In [7]:
# bokeh
from bokeh.io import output_notebook
from bokeh.models import LinearColorMapper, PrintfTickFormatter, ColorBar, LogColorMapper, LogTicker
from bokeh.plotting import figure, show

from oedi.AWS.utils.athena import OEDIAthena
output_notebook()

In [8]:
athena = OEDIAthena(staging_location="s3://nrel-tests/tracking-the-sun", region_name="us-west-2")

In [9]:
query_string = f"""
    SELECT state, CAST(YEAR(installation_date) AS VARCHAR(4)) AS year, COUNT(*) as count
    FROM {database_name}.{table_name}
    GROUP BY CAST(YEAR(installation_date) AS VARCHAR(4)), state;
"""
pv_state_year = athena.run_query(query_string)

In [10]:
years = sorted(pv_state_year["year"].unique())
states = list(reversed(sorted(pv_state_year["state"].unique())))

ifig = figure(
    title="Heatmap of State PV Systems",
    x_range=years,
    y_range=states,
    plot_height=350,
    sizing_mode="scale_width",
    x_axis_location="above",
    toolbar_location='below',
    tooltips=[("state", "@state"), ("year", "@year"), ("count", "@count")]
)

colors = ["#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1", "#cc7878", "#933b41", "#550b1d"]
mapper = LogColorMapper(
    palette=colors,
    low=pv_state_year["count"].min(),
    high=pv_state_year["count"].max()
)
ifig.rect(
    x="year",
    y="state",
    width=1,
    height=1,
    source=pv_state_year,
    fill_color={"field": "count", "transform": mapper},
    line_color=None
)
ifig.grid.grid_line_color = None
ifig.axis.axis_line_color = None
ifig.axis.major_tick_line_color = None
ifig.axis.major_label_text_font_size = "6pt"
ifig.axis.major_label_standoff = 0

color_bar = ColorBar(
    color_mapper=mapper,
    major_label_text_font_size="6pt",
    ticker=LogTicker(),
    formatter=PrintfTickFormatter(format="%d"),
    label_standoff=6,
    border_line_color=None,
    location=(0, 0),
)

ifig.add_layout(color_bar, "right")


show(ifig)

## 3. PV System Unit Price Trend
To visualize the average installation price of PV system over past years state by state.

In [11]:
import numpy as np
import pandas as pd
from ipywidgets import interact
from bokeh.core.properties import value
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.plotting import figure, show
from bokeh.transform import factor_cmap

In [12]:
query_string = f"""
    SELECT 
        state,
        CAST(YEAR(installation_date) AS VARCHAR(4)) AS year,
        system_size,
        total_installed_price,
        ROUND((total_installed_price / system_size), 2) AS unit_installed_price,
        customer_segment
    FROM {database_name}.{table_name}
    WHERE total_installed_price != -9999
    AND system_size != -9999 
    AND system_size != 0
"""
pv_price = athena.run_query(query_string)

In [13]:
pv_price.head()

Unnamed: 0,state,year,system_size,total_installed_price,unit_installed_price,customer_segment
0,ME,2012,4.41,15109.0,3426.08,-9999
1,ME,2012,5.76,18155.0,3151.91,-9999
2,ME,2012,4.32,15997.0,3703.01,-9999
3,ME,2012,4.62,13838.0,2995.24,-9999
4,ME,2012,10.0,21368.0,2136.8,-9999


In [14]:
states = sorted(pv_price["state"].unique())
customers = pv_price["customer_segment"].unique()

In [15]:
@interact
def show_pv_price_trend(state=states):
    # filter
    state_pv_price = pv_price[pv_price["state"]==state]
    
    # wrapping
    unstack_pv_price = state_pv_price.groupby(["customer_segment", "year"]).mean().round(2)[["unit_installed_price"]].unstack(0)
    customer_pv_price = unstack_pv_price.reindex(pd.Index([str(x) for x in list(range(1998, 2019, 1))], name="year"))["unit_installed_price"]
    
    customers = ["RES", "NON-RES"]
    for customer in customers:
        if customer in customer_pv_price.columns:
            continue
        customer_pv_price.loc[:, customer] = np.NaN
    customer_pv_price = customer_pv_price[customers] #.fillna(0)
    
    # plots
    years = customer_pv_price.index.values
    categories = ["R", "N"]
    data = {
        "years": years,
        "R": customer_pv_price["RES"],
        "N": customer_pv_price["NON-RES"]
    }
    x = [(year, customer) for year in years for customer in categories]
    prices = sum(zip(data["R"], data["N"]), ())
    
    fig = figure(
        title="PV Unit Price Trend",
        x_range=FactorRange(*x),
        plot_height=300,
        sizing_mode="scale_width",
        tools="hover",
        tooltips="@prices",
        toolbar_location="below"
    )
    
    source = ColumnDataSource(data=dict(x=x, prices=prices))
    colors = ["#718dbf", "#e84d60"]
    fig.vbar(
        source=source,
        x="x",
        top='prices',
        width=0.9,
        fill_color=factor_cmap('x', palette=colors, factors=categories, start=1, end=2),
        line_color=factor_cmap('x', palette=colors, factors=categories, start=1, end=2),
        alpha=0.7
    )
    fig.line(x=years, y=data["R"], line_width=2, line_color="#718dbf")
    fig.line(x=years, y=data["N"], line_width=2, line_color="#e84d60")
    
    fig.xgrid.grid_line_color = None
    show(fig)

interactive(children=(Dropdown(description='state', options=('AR', 'AZ', 'CA', 'CO', 'CT', 'DE', 'FL', 'IL', '…

R: Residential <br>
N: Non-residential