## All  Jobs Monthly Salary Statistics

In [1]:
import datetime
import psycopg2
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import ipywidgets as widgets
import plotly
import chart_studio.plotly as py
import plotly.graph_objs as go

from nbstyler import DATA_STYLE as DS

plotly.offline.init_notebook_mode(connected=True)

%matplotlib notebook
%matplotlib inline

### Objectives



### Data Preparation

For the box plot chart Plotly requires all values for the respective aggregation level and calculates the statistics on the fly. As the concatenation of the individual values per aggregation level takes some time a materialized view was utilized in PostgreSQL.

A `Pandas.DataFrame` is constructed for each dataset and the first and last time periods for each dataset is dropped to remove incomplete time periods at the start and finish. This step also converts the `Pandas.DataFrame.Index` to `DateTimeIndex` to utilize the temporal methods that it exposes.

### Data Preparation

In [2]:
%%time

conn = psycopg2.connect("dbname=jobsbg")

sstats_monthly = pd.read_sql_query('SELECT * FROM all_offers.ao_salary_lists_monthly', conn, index_col = 'month_ts')

sstats_monthly = sstats_monthly[1:-1]

sstats_monthly.index = pd.to_datetime(sstats_monthly.index)

CPU times: user 6.88 ms, sys: 0 ns, total: 6.88 ms
Wall time: 16.7 s


In [3]:
sstats_monthly.head(3)

Unnamed: 0_level_0,sample_size,sample_list
month_ts,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-10-01,10143,"775.00,1250.00,622.50,675.00,675.00,637.50,110..."
2017-11-01,9061,"1000.00,825.00,675.00,1050.00,731.25,900.00,10..."
2017-12-01,5713,"825.00,1200.00,945.00,750.00,1150.00,550.00,78..."


### Implementing the Chart in Plotly

In [4]:
x_vals = [month for month in sstats_monthly.index]
y_vals = []

for l in sstats_monthly.sample_list:
    values  = [float(val) for val in l.split(',')]
    y_vals.append(values)

In [5]:
data = []
for j in range(len(x_vals)):
    trace = go.Box(
        name=str(sstats_monthly.index[j]),
        y=y_vals[j],
        marker=dict(
            opacity=0.8,
        ),
        line=dict(
            width=1,
            color=DS['colors']['acc1'],
        ),
        whiskerwidth=0.5,
        boxpoints=False,
        showlegend=False,
    )
    data.append(trace)

Plotly buttons and defined interactions to make the chart interactive

In [6]:
layout = go.Layout(
    paper_bgcolor=DS['colors']['bg1'],            
    plot_bgcolor=DS['colors']['bg1'],
    title='Monthly Salary Statistics Box Plot',
    titlefont=DS['chart_fonts']['title'],
    font=DS['chart_fonts']['text'],
    autosize=True,
    showlegend=False,
    hidesources=True,
    xaxis = dict(
        type='date',
        fixedrange=True,
        hoverformat='',
        ticks='outside',
        tickmode='auto',
        zerolinecolor=DS['colors']['bg3'],
    ),
    yaxis = dict(
        title='BGN',
        type='linear',
        range=[0, 1400],
        rangemode='tozero',
        gridcolor=DS['colors']['bg3'],
        hoverformat='{:,}',
        tickformat='{:,}',
        ticks='outside',
        tickwidth=1,
    ),
)

In [7]:
fig = go.Figure(data = data, layout = layout)

plotly.offline.iplot(fig, filename = 'all_offers_monthly_salary_stats_box.html')

In [8]:
# Uncomment the line below to export an HTML version of the chart.
plotly.offline.plot(fig, filename = 'all_offers_monthly_salary_stats_box.html', show_link=False)

'all_offers_monthly_salary_stats_box.html'

In [9]:
from IPython.core.display import HTML
with open('../resources/styles/datum.css', 'r') as f:
    style = f.read()
HTML(style)