In [1]:
import h5pyd
import requests
import pandas as pd

### DATA PROCESSING STEPS
##### 1. acquiring API KEY
##### 2. specifying parameters of interest
##### 3. wrangling the api response; reading it as a text and string manipulation to read it into a df
##### 4. save pandas df

In [46]:
API_KEY = "19dv992vs4tNDqBmz2qX5UIWERNrFtyNcHoX6JdH"

base_url = "https://developer.nrel.gov/api/wind-toolkit/v2/wind/offshore-great-lakes-download.csv?"
params = f"wkt=POINT(-87.669888 42.052294)&attributes=wind_speed,wind_direction,pressure,temperature&names=2012&utc=false&leap_day=true&full_name=Mark%20Roth&email=rothmark%40oregonstate.edu&api_key={API_KEY}"
z = requests.get(base_url+params)

In [71]:
lines = z.text.split("\n")
meta_data = lines[0:2]
header_names = lines[2:3]

In [83]:
rows = [x.split(",") for x in lines[3:]]
df = pd.DataFrame(rows)
print('done')

done


In [84]:
df.columns = header_names[0].split(",")
df.head()

Unnamed: 0,Year,Month,Day,Hour,Minute,wind speed at 100m (m/s),wind direction at 100m (deg),air pressure at 100m (Pa),air temperature at 100m (C)
0,2012,1,1,0,30,14.86,179.16,97110,4.57
1,2012,1,1,1,30,14.76,186.39,96950,4.94
2,2012,1,1,2,30,15.26,192.45,96820,5.11
3,2012,1,1,3,30,14.56,203.3,96680,5.2700000000000005
4,2012,1,1,4,30,13.45,217.14,96530,4.94


In [85]:
df.to_csv("~/Desktop/NREL/lakefill_df.csv", index=False)

### Data Exploration

#### Assumptions:
- diurnal means occurring in the daytime (e.g., looking at 6am-6pm of each day) as opposed to daily
- I made this assumption because energy consumption varies drastically during the day vs the night
- we can change the interpretation of diurnal to mean daily if we switch the "IS_DAILY" flag to True

In [2]:
import plotly.express as xp
import matplotlib
import pandas as pd
import plotly.graph_objects as go
import numpy as np
IS_DAILY = False

In [5]:
df = pd.read_csv("~/Desktop/NREL/lakefill_df.csv")
# if not IS_DAILY:
#     df = df[df['Hour'] < 18]
#     df = df[df['Hour'] > 5]
# diurnal_df = df.groupby(['Month', 'Day'], as_index=False).mean()
# df_std = df[['Month', 'Day', 'wind speed at 100m (m/s)']].groupby(['Month', 'Day'], as_index=False).agg(np.std)
# diurnal_df['wind speed std'] = df_std['wind speed at 100m (m/s)']
# diurnal_df = df.groupby(['Month', 'Day'], as_index=False).agg(['mean', np.std])

In [4]:
diurnal_df['date'] = pd.to_datetime(
    diurnal_df[['Month', 'Day', 'Year']],
    infer_datetime_format=True
)

diurnal_df['7day avg'] = diurnal_df['wind speed at 100m (m/s)'].rolling(7).mean()

In [42]:
# can be made to take a continuous variable (i.e., number of days to aggregate) 
# rather than a categorical variable
def filter_df_by_time(df, scale):
    """
    function to filter a dataframe to examine different temporal scales

    :param df:      dataframe for filtering; should have the following
                    columns: Hour, Day, Month, Year, wind speed at
                    100m (m/s)

    :param scale:   desired temporal scale for analysis; should be one
                    of the following: drnl, mnth

    :return:        df filtered by the way specified in scale, with the
                    addition of (1) the wind speed std calc over the
                    same scale and (2) 7 day rolling average
    """

    if scale == DIURNAL:
        df = df[df['Hour'] < 18]
        df = df[df['Hour'] > 5]
        df_std = df[['Month', 'Day', 'wind speed at 100m (m/s)']].groupby(['Month', 'Day'], as_index=False).agg(np.std)
        df = df.groupby(['Month', 'Day'], as_index=False).mean()
        df['wind speed std'] = df_std['wind speed at 100m (m/s)']
    
    elif scale == MONTHLY:
        df_std = df[['Month', 'wind speed at 100m (m/s)']].groupby(['Month'], as_index=False).agg(np.std)
        df = df.groupby(['Month'], as_index=False).mean()
        df['wind speed std'] = df_std['wind speed at 100m (m/s)']

    df['date'] = pd.to_datetime(
        df[['Month', 'Day', 'Year']],
        infer_datetime_format=True
    )

    df['7day avg'] = df['wind speed at 100m (m/s)'].rolling(7).mean()

    return df


def base_graph(df, scale):
    t0 = go.Bar(
        x=df['date'],
        y=df["wind speed at 100m (m/s)"],
        error_y=dict(
                type='data',
                array=df["wind speed std"],
                visible=True),
        name="wind speed at 100m (m/s)"
    )
    t1 = go.Scatter(x=df['date'], y=[df["wind speed at 100m (m/s)"].mean()]*len(df), name="avg wind speed")
    t2 = go.Scatter(x=df['date'], y=df["7day avg"], name="7 day average")

    traces = [t0, t1]
    if scale == DIURNAL:
        traces.append(t2)

    fig = go.Figure(data=traces)
    fig.update_layout(title=f"{scale} Wind Speed Variability")
    return fig


def agg_by(df, scale="Hour", var_of_interest="wind speed at 100m (m/s)"):
    # df_std = df[[scale, var_of_interest]].groupby([scale], as_index=False).agg(np.std)
    # TODO: currently hard coded for diurnal analysis
    # df = df[df['Hour'] < 18]
    # df = df[df['Hour'] > 5]
    df = df.groupby([scale], as_index=False).mean()
    # TODO: inconsistent variables vs hardcoding
    # df['wind speed var'] = df_std["wind speed at 100m (m/s)"]
    return df[[scale, var_of_interest]]


## DASH components setup

In [12]:
import dash
from dash import dcc
from dash import html
from dash import Input, Output

In [43]:
DIURNAL = "Diurnal"
MONTHLY = "Monthly"
df = pd.read_csv("~/Desktop/NREL/lakefill_df.csv")

di_df = filter_df_by_time(df, DIURNAL)
mn_df = filter_df_by_time(df, MONTHLY)
app = dash.Dash()
app.layout = html.Div([
    dcc.Dropdown(
        id='base-graph-dropdown',
        options=[
            {'label': 'Diurnal', 'value': DIURNAL},
            {'label': 'Monthly', 'value': MONTHLY}
            # {'label': 'Weekly', 'value': 'wk'}
        ],
        value=DIURNAL
    ),
    html.Div(id='base-graph-output-container'),

    dcc.Dropdown(
        id='bar-x-axis',
        options=[
            {'label': 'Hour', 'value': 'Hour'},
            {'label': 'Day', 'value': 'Day'},
            {'label': 'Month', 'value': 'Month'}
        ],
        value="Hour"
    ),
    # dcc.Dropdown(
    #     id='agg-y-axis',
    #     options=[
    #         {'label': 'Hour', 'value': 'Hour'},
    #         {'label': 'Day', 'value': 'Day'},
    #         {'label': 'Month', 'value': 'Month'}
    #     ],
    #     value="Hour"
    # ),
    html.Div(id='bar-output-container')
])


@app.callback(
    Output('base-graph-output-container', 'children'),
    Input('base-graph-dropdown', 'value')
)
def update_output(value):
    if value == DIURNAL:
        df_to_plot = di_df
    elif value == MONTHLY:
        df_to_plot = mn_df
    fig = base_graph(df_to_plot, value)
    return dcc.Graph(figure=fig)


@app.callback(
    Output('bar-output-container', 'children'),
    Input('bar-x-axis', 'value')
)
def update_bar(value):
    hist_df = agg_by(df, scale=value)
    t0 = go.Bar(
        x=hist_df[value],
        y=hist_df["wind speed at 100m (m/s)"],
        # error_y=dict(
        #         type='data',
        #         array=hist_df["wind speed var"],
        #         visible=True),
        name="wind speed at 100m (m/s)"
    )
    traces = [t0]
    fig = go.Figure(data=traces)
    fig.update_layout(title=f"Wind Speed Variability X {value}")

    return dcc.Graph(figure=fig)

app.run_server(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
