## All Offers Daily Submissions Heatmap

In [1]:
import datetime as dt
import psycopg2
import pandas as pd
import plotly
import chart_studio.plotly as py
import plotly.graph_objs as go

from nbstyler import DATA_STYLE as DS
from nbstyler import JS_SNIPPETS as JS

### Objectives



### Data Preparation

Get the data and prepare the `Pandas.DataFrame` object.

In [2]:
with psycopg2.connect('dbname=jobsbg') as conn:
    daily_df = pd.read_sql_query('SELECT * FROM all_offers.ao_count_daily', conn, index_col='subm_date')

daily_df.index = pd.to_datetime(daily_df.index)
daily_df.tail(5)

Unnamed: 0_level_0,subm_count
subm_date,Unnamed: 1_level_1
2021-01-08,1322
2021-01-09,6
2021-01-10,3
2021-01-11,1919
2021-01-12,1434


#### Shaping the dataframe

Since I wanted to shape the heatmap in a way where the most active day of the week (Monday) is on the bottom, and the other days of the week following on top, I have to provide `Plotly` with a list of list with each of the lists being the same day (e.g. Monday) for the whole time period. In order to do that I first filled in the missing dates to create a complete rectangular dataframe table which could then be reshaped. See also: https://stackoverflow.com/a/45850005_

In [3]:
min_ts = min(daily_df.index)
max_ts = max(daily_df.index)
idx = pd.date_range(
    min_ts - dt.timedelta(days=min_ts.weekday()),
    max_ts + dt.timedelta(days=6-max_ts.weekday()))

In [4]:
daily_df = daily_df.reindex(idx)
daily_df.head()

Unnamed: 0,subm_count
2017-09-25,
2017-09-26,
2017-09-27,1840.0
2017-09-28,1658.0
2017-09-29,1741.0


Now the dataframe can be reshaped into columns representing the days of the week, and then transposed to completely match our heatmap structure. That will make composing the chart itself very easy.

In [5]:
matrix_df_values = pd.DataFrame(daily_df.values.reshape(len(daily_df)//7, 7), columns=daily_df.index[:7].strftime('%A'))
matrix_df_values = matrix_df_values.T
matrix_df_values.head(7)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,163,164,165,166,167,168,169,170,171,172
Monday,,2567.0,2630.0,2537.0,2475.0,2457.0,2364.0,2323.0,2303.0,2123.0,...,1477.0,1372.0,1302.0,1034.0,1206.0,1175.0,934.0,5.0,2623.0,1919.0
Tuesday,,2435.0,1857.0,2002.0,1943.0,1675.0,1781.0,1711.0,1713.0,1618.0,...,1107.0,1072.0,855.0,786.0,874.0,818.0,631.0,1227.0,1914.0,1434.0
Wednesday,1840.0,1740.0,1704.0,1837.0,1688.0,1654.0,1723.0,1666.0,1617.0,1355.0,...,1085.0,997.0,847.0,884.0,928.0,847.0,641.0,504.0,1497.0,
Thursday,1658.0,1757.0,1688.0,1650.0,1532.0,1533.0,1550.0,1572.0,1444.0,1236.0,...,889.0,1000.0,772.0,868.0,881.0,705.0,1.0,245.0,1390.0,
Friday,1741.0,1751.0,1570.0,1621.0,1618.0,1592.0,1549.0,1564.0,1340.0,1359.0,...,1041.0,861.0,851.0,937.0,872.0,765.0,0.0,0.0,1322.0,
Saturday,12.0,24.0,41.0,19.0,37.0,27.0,6.0,0.0,28.0,15.0,...,8.0,2.0,4.0,3.0,6.0,4.0,4.0,4.0,6.0,
Sunday,36.0,8.0,11.0,6.0,10.0,1.0,9.0,16.0,6.0,7.0,...,2.0,4.0,1.0,5.0,5.0,1.0,3.0,2.0,3.0,


In [6]:
matrix_df_dates = daily_df
matrix_df_dates = matrix_df_dates.drop(['subm_count'], axis=1).reset_index()
matrix_df_dates = pd.DataFrame(matrix_df_dates.values.reshape(len(matrix_df_dates)//7, 7)).T
matrix_df_dates

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,163,164,165,166,167,168,169,170,171,172
0,2017-09-25,2017-10-02,2017-10-09,2017-10-16,2017-10-23,2017-10-30,2017-11-06,2017-11-13,2017-11-20,2017-11-27,...,2020-11-09,2020-11-16,2020-11-23,2020-11-30,2020-12-07,2020-12-14,2020-12-21,2020-12-28,2021-01-04,2021-01-11
1,2017-09-26,2017-10-03,2017-10-10,2017-10-17,2017-10-24,2017-10-31,2017-11-07,2017-11-14,2017-11-21,2017-11-28,...,2020-11-10,2020-11-17,2020-11-24,2020-12-01,2020-12-08,2020-12-15,2020-12-22,2020-12-29,2021-01-05,2021-01-12
2,2017-09-27,2017-10-04,2017-10-11,2017-10-18,2017-10-25,2017-11-01,2017-11-08,2017-11-15,2017-11-22,2017-11-29,...,2020-11-11,2020-11-18,2020-11-25,2020-12-02,2020-12-09,2020-12-16,2020-12-23,2020-12-30,2021-01-06,2021-01-13
3,2017-09-28,2017-10-05,2017-10-12,2017-10-19,2017-10-26,2017-11-02,2017-11-09,2017-11-16,2017-11-23,2017-11-30,...,2020-11-12,2020-11-19,2020-11-26,2020-12-03,2020-12-10,2020-12-17,2020-12-24,2020-12-31,2021-01-07,2021-01-14
4,2017-09-29,2017-10-06,2017-10-13,2017-10-20,2017-10-27,2017-11-03,2017-11-10,2017-11-17,2017-11-24,2017-12-01,...,2020-11-13,2020-11-20,2020-11-27,2020-12-04,2020-12-11,2020-12-18,2020-12-25,2021-01-01,2021-01-08,2021-01-15
5,2017-09-30,2017-10-07,2017-10-14,2017-10-21,2017-10-28,2017-11-04,2017-11-11,2017-11-18,2017-11-25,2017-12-02,...,2020-11-14,2020-11-21,2020-11-28,2020-12-05,2020-12-12,2020-12-19,2020-12-26,2021-01-02,2021-01-09,2021-01-16
6,2017-10-01,2017-10-08,2017-10-15,2017-10-22,2017-10-29,2017-11-05,2017-11-12,2017-11-19,2017-11-26,2017-12-03,...,2020-11-15,2020-11-22,2020-11-29,2020-12-06,2020-12-13,2020-12-20,2020-12-27,2021-01-03,2021-01-10,2021-01-17


#### Building the heatmap

The heatmap is composed of three dimensions: x, y, and z. The z dimension will hold our values matrix. Let's prepare that first:

In [7]:
cell_values = [
    [v for v in matrix_df_values.loc['Monday'].values],
    [v for v in matrix_df_values.loc['Tuesday'].values],
    [v for v in matrix_df_values.loc['Wednesday'].values],
    [v for v in matrix_df_values.loc['Thursday'].values],
    [v for v in matrix_df_values.loc['Friday'].values],
    [v for v in matrix_df_values.loc['Saturday'].values],
    [v for v in matrix_df_values.loc['Sunday'].values]
]

The approach to prepare the labels is the same:

In [8]:
cell_labels = [
    [str(v)[:10] for v in matrix_df_dates.iloc[0].values],
    [str(v)[:10] for v in matrix_df_dates.iloc[1].values],
    [str(v)[:10] for v in matrix_df_dates.iloc[2].values],
    [str(v)[:10] for v in matrix_df_dates.iloc[3].values],
    [str(v)[:10] for v in matrix_df_dates.iloc[4].values],
    [str(v)[:10] for v in matrix_df_dates.iloc[5].values],
    [str(v)[:10] for v in matrix_df_dates.iloc[6].values]
]

A manually defined color scale to match with the presentation style:

In [9]:
colorscale =[
    [0.0, DS['colorramp']['acc1'][0]],
    [0.1111111111111111, DS['colorramp']['acc1'][1]],
    [0.2222222222222222, DS['colorramp']['acc1'][2]],
    [0.3333333333333333, DS['colorramp']['acc1'][3]],
    [0.4444444444444444, DS['colorramp']['acc1'][4]],
    [0.5555555555555556, DS['colorramp']['acc1'][5]],
    [0.6666666666666666, DS['colorramp']['acc1'][6]],
    [0.7777777777777778, DS['colorramp']['acc1'][7]],
    [0.8888888888888888, DS['colorramp']['acc1'][8]],
    [1.0, DS['colorramp']['acc1'][9]]
]

Finally, the heatmap trace definition:

In [10]:
hm_trace = go.Heatmap(
    x=[w for w in daily_df.index[::7]],
    y=[d for d in matrix_df_values.index],
    z=cell_values,
    text=cell_labels,
    hoverinfo='text+z+y',
    colorscale=colorscale,
    showscale=False,
    xgap=0,
    ygap=0,
)

data = [hm_trace]

Layout preparation is fairly standard. Labels have been turned off for both axes.

In [11]:
layout = go.Layout(
    paper_bgcolor=DS['colors']['bg1'],
    plot_bgcolor=DS['colors']['bg1'],
    title='Offer Submissions Daily Heatmap',
    titlefont=DS['chart_fonts']['title'],
    font=DS['chart_fonts']['text'],
    autosize=True,
    showlegend=False,
    hidesources=True,
    xaxis=dict(
        title='Week',    
        ticks='outside',
        ticklen=2,
        tickcolor=DS['colors']['bg1'],
        gridcolor=DS['colors']['bg1'],   
    ),
    yaxis=dict(
        title='Day',
        type='category',
        ticks='outside',
        ticklen=5,
        tickcolor=DS['colors']['bg1'],
        gridcolor=DS['colors']['bg1'],
    ),
)

In [13]:
fig = go.Figure(data=data, layout=layout)
fig

In [14]:
fig.write_html(
    file='all_offers_daily_submissions_heatmap.html',
    include_plotlyjs='directory',
    post_script=JS['chart_fonts'])

In [15]:
from IPython.core.display import HTML
with open('../resources/styles/datum.css', 'r') as f:
    style = f.read()
HTML(style)