## Plotly and Dashboards

* Plotly
* Creating Dashboards

In [1]:
import numpy as np
import pandas as pd
import polars as pl

In [2]:
df = pd.read_csv('data/snow-alta-1990-2017.csv')

In [3]:
df_pl = pl.read_csv("data/snow-alta-1990-2017.csv")

## Clean Data

In [4]:
df.head()

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,DAPR,DAPR_ATTRIBUTES,DASF,DASF_ATTRIBUTES,...,WT03,WT03_ATTRIBUTES,WT04,WT04_ATTRIBUTES,WT05,WT05_ATTRIBUTES,WT06,WT06_ATTRIBUTES,WT11,WT11_ATTRIBUTES
0,USC00420072,"ALTA, UT US",40.5905,-111.6369,2660.9,1990-01-01,,,,,...,,,,,,,,,,
1,USC00420072,"ALTA, UT US",40.5905,-111.6369,2660.9,1990-01-02,,,,,...,,,,,,,,,,
2,USC00420072,"ALTA, UT US",40.5905,-111.6369,2660.9,1990-01-03,,,,,...,,,,,,,,,,
3,USC00420072,"ALTA, UT US",40.5905,-111.6369,2660.9,1990-01-04,,,,,...,,,,,,,,,,
4,USC00420072,"ALTA, UT US",40.5905,-111.6369,2660.9,1990-01-05,,,,,...,,,,,,,,,,


In [5]:
df.dtypes

STATION             object
NAME                object
LATITUDE           float64
LONGITUDE          float64
ELEVATION          float64
DATE                object
DAPR               float64
DAPR_ATTRIBUTES     object
DASF               float64
DASF_ATTRIBUTES     object
MDPR               float64
MDPR_ATTRIBUTES     object
MDSF               float64
MDSF_ATTRIBUTES     object
PRCP               float64
PRCP_ATTRIBUTES     object
SNOW               float64
SNOW_ATTRIBUTES     object
SNWD               float64
SNWD_ATTRIBUTES     object
TMAX               float64
TMAX_ATTRIBUTES     object
TMIN               float64
TMIN_ATTRIBUTES     object
TOBS               float64
TOBS_ATTRIBUTES     object
WT01               float64
WT01_ATTRIBUTES     object
WT03               float64
WT03_ATTRIBUTES     object
WT04               float64
WT04_ATTRIBUTES     object
WT05               float64
WT05_ATTRIBUTES     object
WT06               float64
WT06_ATTRIBUTES     object
WT11               float64
W

In [6]:
def tweak_alta(df:pd.DataFrame) -> pd.DataFrame:
    return (df
           .assign(DATE=pd.to_datetime(df.DATE).dt.tz_localize('America/Denver'))
           .loc[: ,['DATE', 'STATION', 'NAME', 'LATITUDE', 'LONGITUDE',
                   'PRCP', 'SNOW', 'SNWD', 'TMIN', 'TMAX', 'TOBS'
                  ]]
            .assign(MONTH=lambda df_: df_.DATE.dt.month,
                    YEAR=lambda df_: df_.DATE.dt.year,
                    SEASON=lambda df_: np.select([df_.MONTH < 5,
                                                 df_.MONTH > 10],
                [(df_.YEAR - 1).astype(str) + '-' + 
                 (df_.YEAR).astype(str) + ' Season',
                (df_.YEAR).astype(str) + '-' + 
                 (df_.YEAR + 1).astype(str) + ' Season'],
                default='Off Season'))
           )

In [7]:
def tweak_alta_pl(df:pl.DataFrame) -> pl.DataFrame:
    return (df_pl
        .with_columns(
        DATE = pl.col("DATE").str.to_datetime().dt.replace_time_zone("America/Denver")
        )
        .select(['DATE', 'STATION', 'NAME', 'LATITUDE', 'LONGITUDE',
                        'PRCP', 'SNOW', 'SNWD', 'TMIN', 'TMAX', 'TOBS'
                        ])
        .with_columns(
        MONTH = pl.col("DATE").dt.month(),
        YEAR = pl.col("DATE").dt.year(),
        )
        .with_columns(
        SEASON = pl.when(pl.col("MONTH") < 5)
                        .then((pl.col("YEAR") - 1).cast(pl.String) + '-' +
                        pl.col("YEAR").cast(pl.String) + ' Season')
                        .when(pl.col("MONTH") > 10)
                        .then((pl.col("YEAR") + 1).cast(pl.String) + '-' +
                        pl.col("YEAR").cast(pl.String) + ' Season')
                        .otherwise(pl.lit("Off Season"))
                        )
        )

In [8]:
alta = tweak_alta(df)
alta_pl = tweak_alta_pl(df_pl)

In [9]:
alta

Unnamed: 0,DATE,STATION,NAME,LATITUDE,LONGITUDE,PRCP,SNOW,SNWD,TMIN,TMAX,TOBS,MONTH,YEAR,SEASON
0,1990-01-01 00:00:00-07:00,USC00420072,"ALTA, UT US",40.5905,-111.6369,0.00,0.0,38.0,11.0,38.0,30.0,1,1990,1989-1990 Season
1,1990-01-02 00:00:00-07:00,USC00420072,"ALTA, UT US",40.5905,-111.6369,0.53,17.0,55.0,11.0,30.0,11.0,1,1990,1989-1990 Season
2,1990-01-03 00:00:00-07:00,USC00420072,"ALTA, UT US",40.5905,-111.6369,0.08,4.0,51.0,-5.0,13.0,11.0,1,1990,1989-1990 Season
3,1990-01-04 00:00:00-07:00,USC00420072,"ALTA, UT US",40.5905,-111.6369,0.00,0.0,51.0,0.0,17.0,12.0,1,1990,1989-1990 Season
4,1990-01-05 00:00:00-07:00,USC00420072,"ALTA, UT US",40.5905,-111.6369,0.52,13.0,56.0,12.0,23.0,20.0,1,1990,1989-1990 Season
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10020,2018-01-22 00:00:00-07:00,USC00420072,"ALTA, UT US",40.5905,-111.6369,0.05,1.3,45.0,2.0,22.0,18.0,1,2018,2017-2018 Season
10021,2018-01-23 00:00:00-07:00,USC00420072,"ALTA, UT US",40.5905,-111.6369,0.10,1.8,44.0,14.0,26.0,17.0,1,2018,2017-2018 Season
10022,2018-01-24 00:00:00-07:00,USC00420072,"ALTA, UT US",40.5905,-111.6369,0.00,0.0,41.0,13.0,41.0,34.0,1,2018,2017-2018 Season
10023,2018-01-25 00:00:00-07:00,USC00420072,"ALTA, UT US",40.5905,-111.6369,0.20,3.9,44.0,18.0,35.0,18.0,1,2018,2017-2018 Season


In [10]:
alta_pl

DATE,STATION,NAME,LATITUDE,LONGITUDE,PRCP,SNOW,SNWD,TMIN,TMAX,TOBS,MONTH,YEAR,SEASON
"datetime[μs, America/Denver]",str,str,f64,f64,f64,f64,i64,i64,i64,i64,i8,i32,str
1990-01-01 00:00:00 MST,"""USC00420072""","""ALTA, UT US""",40.5905,-111.6369,0.0,0.0,38,11,38,30,1,1990,"""1989-1990 Seas…"
1990-01-02 00:00:00 MST,"""USC00420072""","""ALTA, UT US""",40.5905,-111.6369,0.53,17.0,55,11,30,11,1,1990,"""1989-1990 Seas…"
1990-01-03 00:00:00 MST,"""USC00420072""","""ALTA, UT US""",40.5905,-111.6369,0.08,4.0,51,-5,13,11,1,1990,"""1989-1990 Seas…"
1990-01-04 00:00:00 MST,"""USC00420072""","""ALTA, UT US""",40.5905,-111.6369,0.0,0.0,51,0,17,12,1,1990,"""1989-1990 Seas…"
1990-01-05 00:00:00 MST,"""USC00420072""","""ALTA, UT US""",40.5905,-111.6369,0.52,13.0,56,12,23,20,1,1990,"""1989-1990 Seas…"
1990-01-06 00:00:00 MST,"""USC00420072""","""ALTA, UT US""",40.5905,-111.6369,0.0,0.0,53,10,30,26,1,1990,"""1989-1990 Seas…"
1990-01-07 00:00:00 MST,"""USC00420072""","""ALTA, UT US""",40.5905,-111.6369,0.0,0.0,51,10,30,27,1,1990,"""1989-1990 Seas…"
1990-01-08 00:00:00 MST,"""USC00420072""","""ALTA, UT US""",40.5905,-111.6369,1.5,9.0,61,,,,1,1990,"""1989-1990 Seas…"
1990-01-09 00:00:00 MST,"""USC00420072""","""ALTA, UT US""",40.5905,-111.6369,0.21,2.0,57,30,47,39,1,1990,"""1989-1990 Seas…"
1990-01-10 00:00:00 MST,"""USC00420072""","""ALTA, UT US""",40.5905,-111.6369,0.0,0.0,55,29,42,41,1,1990,"""1989-1990 Seas…"


## Plotting with Plotly

In [11]:
import plotly
import plotly.express as px
# Important!
pd.options.plotting.backend = 'plotly'

In [12]:
(alta
.plot(x="DATE", y='SNWD'))

In [13]:
(alta
 .query('SEASON.str.contains("2010-2011")')
 .plot(x='DATE', y='SNWD', title='2011 Season Snow Depth')
)

## Bar Plot

In [16]:
# Expect an error (for demonstration purposes)
# TypeError: putmask: first argument must be an array

# (alta
#  .SNOW
#  .pipe(pd.cut, bins=10)
#  .value_counts()
#  #.plot.bar()
#  .index
# )

In [18]:
# Expect an error (for demonstration purposes)
# TypeError: putmask: first argument must be an array

# (alta
#  .SNOW
#  .pipe(pd.cut, bins=10)
#  .value_counts()
#  .reset_index()
#  .rename(columns={'SNOW': 'Bin (in)', 'count': 'Count'})
#  .astype({'Bin (in)': str})
#  .plot.bar(x='Bin (in)', y='Count', title='SNOW FALL COUNTS')
# )

In [19]:
(alta
 .SNOW
# .pipe(pd.cut, bins=10)
 .pipe(pd.cut, bins=[-1, 0, 1, 5, 10, 15, 20, 25, 40]) 
 .value_counts()
 .reset_index()
 .rename(columns={'SNOW': 'Bin (in)', 'count': 'Count'})
 .astype({'Bin (in)': str})
 .plot.bar(x='Bin (in)', y='Count', title='SNOW FALL COUNTS')
)

In [20]:
(alta
 .query('SNOW > 0') 
 .SNOW
 .pipe(pd.cut, bins=[-1, 0, 1, 5, 10, 15, 20, 25, 40]) 
 .value_counts()
 .reset_index()
 .rename(columns={'SNOW': 'Bin (in)', 'count': 'Count'})
 .astype({'Bin (in)': str})
 .plot.bar(x='Bin (in)', y='Count', title='SNOW FALL COUNTS')
)

## Scatter Plot

In [21]:
(alta
 .plot.scatter(x='MONTH', y='SNWD', c='TOBS', opacity=.2)
)

In [22]:
(alta
 .plot.scatter(x='TOBS', y='SNWD', color='MONTH')
)

In [23]:
import plotly.graph_objects as go

fig = go.Figure(data=go.Scatter(
    x=alta.TOBS,
    y=alta.SNWD,
    mode='markers',
    marker=dict(
        size=10,
        color=alta.MONTH,  
        colorscale='Twilight',  
        colorbar=dict(title='Month') 
    )
))

fig

## Dash

In [24]:
%%writefile dashdemo.py
import dash
from dash import dcc, html, Dash, Input, Output
import numpy as np
import pandas as pd
import plotly
import plotly.express as px
pd.options.plotting.backend = 'plotly'

def tweak_alta(df):
    return (df
           .assign(DATE=pd.to_datetime(df.DATE).dt.tz_localize('America/Denver'))
           .loc[: ,['DATE', 'STATION', 'NAME', 'LATITUDE', 'LONGITUDE',
                   'PRCP', 'SNOW', 'SNWD', 'TMIN', 'TMAX', 'TOBS'
                  ]]
            .assign(MONTH=lambda df_: df_.DATE.dt.month,
                    YEAR=lambda df_: df_.DATE.dt.year,
                    SEASON=lambda df_: np.select([df_.MONTH < 5,
                                                 df_.MONTH > 10],
                [(df_.YEAR - 1).astype(str) + '-' + 
                 (df_.YEAR).astype(str) + ' Season',
                (df_.YEAR).astype(str) + '-' + 
                 (df_.YEAR + 1).astype(str) + ' Season'],
                default='Off Season'))
           )
        
df = pd.read_csv('notebooks/data/snow-alta-1990-2017.csv')
alta = tweak_alta(df)
app = Dash(__name__)
fig = (alta
 .query('SEASON.str.contains("2010-2011")')
 .plot(x='DATE', y='SNWD', title='2011 Season Snow Depth')
)
app.layout = dash.html.Div(children=[
    dash.html.H1('Alta 2011 Season'),
    dash.dcc.Markdown('''## Line Plot of Snow Depth

* This is Markdown text.
* Plot of Snow Depth

    '''),
    dash.dcc.Graph(id='line-graph', figure=fig)
])

if __name__ == '__main__':
    app.run_server(debug=True)


Writing dashdemo.py


## Widget

In [25]:
%%writefile dashdemo2.py
import dash
from dash import dcc, html, Dash, Input, Output
import numpy as np
import pandas as pd
import plotly
import plotly.express as px
pd.options.plotting.backend = 'plotly'

def tweak_alta(df):
    return (df
           .assign(DATE=pd.to_datetime(df.DATE).dt.tz_localize('America/Denver'))
           .loc[: ,['DATE', 'STATION', 'NAME', 'LATITUDE', 'LONGITUDE',
                   'PRCP', 'SNOW', 'SNWD', 'TMIN', 'TMAX', 'TOBS'
                  ]]
            .assign(MONTH=lambda df_: df_.DATE.dt.month,
                    YEAR=lambda df_: df_.DATE.dt.year,
                    SEASON=lambda df_: np.select([df_.MONTH < 5,
                                                 df_.MONTH > 10],
                [(df_.YEAR - 1).astype(str) + '-' + 
                 (df_.YEAR).astype(str) + ' Season',
                (df_.YEAR).astype(str) + '-' + 
                 (df_.YEAR + 1).astype(str) + ' Season'],
                default='Off Season'))
           )
        
df = pd.read_csv('notebooks/data/snow-alta-1990-2017.csv')
alta = tweak_alta(df)
app = Dash(__name__)
fig = (alta
 .query('SEASON.str.contains("2010-2011")')
 .plot(x='DATE', y='SNWD', title='2011 Season Snow Depth')
)


app.layout = dash.html.Div(children=[
    dash.html.H1('Alta Season Snow Depths'),
    dash.dcc.Markdown('''## Line Plot of Snow Depth
* This is Markdown text.
* Plot of Snow Depth
    '''),
    dash.dcc.Dropdown(
                    options=alta.SEASON.unique(),
                    id='years'),
    dash.dcc.Graph(id='line-graph', figure=fig)
])
    
@app.callback(
    dash.Output(component_id='line-graph', component_property='figure'),
    dash.Input(component_id='years', component_property='value'),
)
def update_line_figure(max_year):
    return (alta
     .query(f'SEASON == @max_year')
     .plot(x='DATE', y='SNWD',
          title=f'{max_year} Snow Depth')
    )   

if __name__ == '__main__':
    app.run_server(debug=True)


Writing dashdemo2.py
