In [None]:
import h5pyd
import requests
import pandas as pd

### DATA PROCESSING STEPS
##### 1. acquiring API KEY
##### 2. specifying parameters of interest
##### 3. wrangling the api response; reading it as a text and string manipulation to read it into a df
##### 4. save pandas df

In [None]:
API_KEY = "19dv992vs4tNDqBmz2qX5UIWERNrFtyNcHoX6JdH"

base_url = "https://developer.nrel.gov/api/wind-toolkit/v2/wind/offshore-great-lakes-download.csv?"
params = f"wkt=POINT(-87.669888 42.052294)&attributes=wind_speed,wind_direction,pressure,temperature&names=2012&utc=false&leap_day=true&full_name=Mark%20Roth&email=rothmark%40oregonstate.edu&api_key={API_KEY}"
z = requests.get(base_url+params)

In [None]:
lines = z.text.split("\n")
meta_data = lines[0:2]
header_names = lines[2:3]

In [None]:
rows = [x.split(",") for x in lines[3:]]
df = pd.DataFrame(rows)
print('done')

In [None]:
df.columns = header_names[0].split(",")
df.head()

In [None]:
df.to_csv("~/Desktop/NREL/lakefill_df.csv", index=False)

### Data Exploration

#### Assumptions:
- diurnal means occurring in the daytime (e.g., looking at 6am-6pm of each day) as opposed to daily
- I made this assumption because energy consumption varies drastically during the day vs the night
- we can change the interpretation of diurnal to mean daily if we switch the "IS_DAILY" flag to True

In [None]:
import plotly.express as xp
import matplotlib
import plotly.figure_factory as ff
import pandas as pd
import plotly.graph_objects as go
import numpy as np
IS_DAILY = False

In [None]:
df = pd.read_csv("~/Documents/NREL/lakefill_df.csv")
# if not IS_DAILY:
#     df = df[df['Hour'] < 18]
#     df = df[df['Hour'] > 5]
# diurnal_df = df.groupby(['Month', 'Day'], as_index=False).mean()
# df_std = df[['Month', 'Day', 'wind speed at 100m (m/s)']].groupby(['Month', 'Day'], as_index=False).agg(np.std)
# diurnal_df['wind speed std'] = df_std['wind speed at 100m (m/s)']
# diurnal_df = df.groupby(['Month', 'Day'], as_index=False).agg(['mean', np.std])

In [None]:
# df.columns
z = df.drop("Hour", axis=1)


In [63]:
# can be made to take a continuous variable (i.e., number of days to aggregate) 
# rather than a categorical variable
def filter_df_by_time(df, scale):
    """
    function to filter a dataframe to examine different temporal scales

    :param df:      dataframe for filtering; should have the following
                    columns: Hour, Day, Month, Year, wind speed at
                    100m (m/s)

    :param scale:   desired temporal scale for analysis; should be one
                    of the following: drnl, mnth

    :return:        df filtered by the way specified in scale, with the
                    addition of (1) the wind speed std calc over the
                    same scale and (2) 7 day rolling average
    """

    if scale == DIURNAL:
        df = df[df['Hour'] < 19]
        df = df[df['Hour'] > 5]
        df_std = df[['Month', 'Day', 'wind speed at 100m (m/s)']].groupby(['Month', 'Day'], as_index=False).agg(np.std)
        df = df.groupby(['Month', 'Day'], as_index=False).mean()
        df['wind speed std'] = df_std['wind speed at 100m (m/s)']
    
    elif scale == MONTHLY:
        df_std = df[['Month', 'wind speed at 100m (m/s)']].groupby(['Month'], as_index=False).agg(np.std)
        df = df.groupby(['Month'], as_index=False).mean()
        df['wind speed std'] = df_std['wind speed at 100m (m/s)']

    df['date'] = pd.to_datetime(
        df[['Month', 'Day', 'Year']],
        infer_datetime_format=True
    )

    df['7day avg'] = df['wind speed at 100m (m/s)'].rolling(7).mean()

    return df


def base_graph(df, scale, highlight_type, var_of_interest="wind speed at 100m (m/s)"):
    # TODO: mix btwn hard code and variable var of interest
    t0 = go.Bar(
        x=df['date'],
        y=df[var_of_interest],
        error_y=dict(
                type='data',
                array=df["wind speed std"],
                visible=True),
        name="wind speed at 100m (m/s)"
    )
    t1 = go.Scatter(x=df['date'], y=[df[var_of_interest].mean()]*len(df), name="avg wind speed")
    t2 = go.Scatter(x=df['date'], y=df["7day avg"], name="7 day average")

    if highlight_type is not None:
        # this categorical variable could easily be turned into a slider (continuous)
        if highlight_type in ["top10", "above avg"]:
            if highlight_type == "top10":
                q = .9
            elif highlight_type == "above avg":
                q = .5

            q_val = df[var_of_interest].quantile(q)
            hi_df = df[df[var_of_interest] >= q_val]
            reg_df = df[df[var_of_interest] < q_val]
            tr_name = f"TOP {round((1-q)*100)}% of wind speeds"


        # find idx with highest 7 day average and work 7 idx back from that
        if highlight_type == "7day":
            idx = df[var_of_interest].idxmax()
            hi_df = df.iloc[(idx-6):(idx+1)]
            reg_df = df.drop([x for x in range(idx-6, idx+1)])
            tr_name = f"Greatest Cumulative 7 Days of Wind Speed"
    
        t3 = go.Bar(
            x=reg_df['date'],
            y=reg_df[var_of_interest],
            error_y=dict(
                    type='data',
                    array=reg_df["wind speed std"],
                    visible=True),
            name="wind speed at 100m (m/s)"
        )

        t4 = go.Bar(
            x=hi_df['date'],
            y=hi_df[var_of_interest],
            error_y=dict(
                    type='data',
                    array=hi_df["wind speed std"],
                    visible=True),
            name=tr_name,
            marker_color="red"
        )

    traces = [t0, t1]
    if scale == DIURNAL:
        traces.append(t2)

    if highlight_type is not None:
        traces.append(t3)
        traces = [t3, t1, t2, t4]

    fig = go.Figure(data=traces)
    fig.update_layout(title=f"{scale} Wind Speed Variability")
    return fig


def agg_by(df, temporal, scale="Hour", var_of_interest="wind speed at 100m (m/s)"):
    # df_std = df[[scale, var_of_interest]].groupby([scale], as_index=False).agg(np.std)
    # TODO: currently hard coded for diurnal analysis
    if temporal == DIURNAL:
        df = df[df['Hour'] < 19]
        df = df[df['Hour'] > 5]
    df = df.groupby([scale], as_index=False).mean()
    # TODO: inconsistent variables vs hardcoding
    # df['wind speed var'] = df_std["wind speed at 100m (m/s)"]
    return df[[scale, var_of_interest]]

def heat_map_corr_mat(df, var_of_interest='wind speed at 100m (m/s)'):
    EXCLUDE = ["Hour", "Day", "date", "Year", "Month", "Minute", "7day avg", "wind speed std"]
    to_keep = list(set(df.columns) - set(EXCLUDE))
    df_corr = df[to_keep]
    # df_corr.columns
    cors = []
    for x in df_corr.columns:
        cors.append(str(round(df_corr[var_of_interest].corr(df_corr[x]), 4)))

    fig = go.Figure(data=[go.Table(
        header=dict(values=['Variable', 'Correlation'],
                    line_color='darkslategray',
                    fill_color='lightgreen',
                    align='center'),
        cells=dict(values=[df_corr.columns, # 1st column
                           cors], # 2nd column
                   line_color='darkslategray',
                   fill_color='lightcyan',
                   align='center'))
    ])

    return fig


## DASH components setup

In [64]:
import dash
from dash import dcc
from dash import html
from dash import Input, Output

In [None]:
DIURNAL = "Diurnal"
MONTHLY = "Monthly"
df = pd.read_csv("~/Documents/NREL/lakefill_df.csv")

di_df = filter_df_by_time(df, DIURNAL)
mn_df = filter_df_by_time(df, MONTHLY)
app = dash.Dash()
app.layout = html.Div([
    dcc.Dropdown(
        id='base-graph-dropdown',
        options=[
            {'label': 'Diurnal', 'value': DIURNAL},
            {'label': 'Monthly', 'value': MONTHLY}
            # {'label': 'Weekly', 'value': 'wk'}
        ],
        value=DIURNAL
    ),
    dcc.Dropdown(
        id='highlight-graph-dropdown',
        options=[
            {'label': 'Top 10%', 'value': 'top10'},
            {'label': 'Above Average', 'value': 'above avg'},
            {'label': 'Largest 7 Day Period', 'value': '7day'}
            # {'label': 'Weekly', 'value': 'wk'}
        ],
        value=None
    ),
    html.Div(id='base-graph-output-container'),

    dcc.Dropdown(
        id='bar-x-axis',
        options=[
            {'label': 'Avg Wind by Hour', 'value': 'aWbH'},
            {'label': 'Correlation Matrix', 'value': 'CorMat'},
            # {'label': 'Month', 'value': 'Month'}
        ],
        value="aWbH"
    ),
    html.Div(id='bar-output-container')
])


@app.callback(
    Output('base-graph-output-container', 'children'),
    Input('base-graph-dropdown', 'value'),
    Input('highlight-graph-dropdown', 'value')
)
def update_output(temp_scale, highlight_type):
    if temp_scale == DIURNAL:
        df_to_plot = di_df
    elif temp_scale == MONTHLY:
        df_to_plot = mn_df
    fig = base_graph(df_to_plot, temp_scale, highlight_type)
    return dcc.Graph(figure=fig)


@app.callback(
    Output('bar-output-container', 'children'),
    Input('bar-x-axis', 'value'),
    Input('base-graph-dropdown', 'value')
)
def update_secondary(bar_type, temp_input):

    if bar_type == "aWbH":
        # it would be nice to have this change based on the
        # dynamic x-range that plotly offers
        hist_df = agg_by(df, temporal=temp_input)
        t0 = go.Bar(
            x=hist_df["Hour"],
            y=hist_df["wind speed at 100m (m/s)"],
            name="wind speed at 100m (m/s)"
        )
        traces = [t0]
        fig = go.Figure(data=traces)
        fig.update_layout(title=f"Wind Speed Variability X Hour")

    elif bar_type == "CorMat":
        if temp_input == DIURNAL:
            df_to_plot = di_df
        elif temp_input == MONTHLY:
            df_to_plot = mn_df
        else:
            exit(f"ERROR; bar_type not {DIURNAL} nor {MONTHLY}")

        fig = heat_map_corr_mat(df_to_plot)

    return dcc.Graph(figure=fig)

app.run_server(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


In [58]:
# var_of_interest
# print(2)
idx = df["wind speed at 100m (m/s)"].idxmax()
print(idx)
# df.iloc[idx]


8519
