## Data Offers Pie Chart and Table

In [1]:
import itertools
import datetime as dt
import psycopg2
import pandas as pd
import plotly
from plotly import tools
import plotly.plotly as py
import plotly.graph_objs as go

from nbstyler import DATA_STYLE as s

plotly.offline.init_notebook_mode(connected=True) # run at the start of every ipython notebook to use plotly.offline

%matplotlib notebook
%matplotlib inline

In [2]:
conn = psycopg2.connect("dbname=jobsbg")
cur = conn.cursor()
cur.execute('SELECT count(*) FROM v_full_offers_history')
all_jobs_count = cur.fetchone()[0]
cur.close()
datajobs_df = pd.read_sql_query('SELECT * FROM v_full_data_offers_history', conn, index_col='subm_date')

conn.close()

In [3]:
datajobs_count = len(datajobs_df)
datajobs_count, all_jobs_count

(1607, 625856)

In [4]:
pie_trace = go.Pie(
    values=[all_jobs_count, datajobs_count],
    labels=['All Other', 'Data Jobs'],
    marker=dict(
        colors=[
            s['colorramp']['acc1'][5],
            s['colors']['acc1'],
        ],
        line=dict(
            color=s['colors']['acc1'],
            width=1,
        )
    ),
    showlegend=False,
    domain={'x':[0, 0.32], 'y':[0,1]},
    name='Data Jobs Share of the Job Market',
    hoverinfo='none',
    textinfo='value',
    textfont=s['chart_fonts']['anno'],
    insidetextfont=s['chart_fonts']['anno_in'],
    hole=0.25,
    rotation=45,
    pull=0.05,
)

In [5]:
ptrn = '((data analy(st|tics|sis))|(анализ.*данни))|(data (engineer|warehouse))|(((^|[/( ])bi(/| |$))|((data|business) intelligence))|(reporting (analyst|specialist))|(etl( |$))|(data (engineer|warehouse|scientist))'

# looking at unmatched offers for count stats:
datajobs_df[~datajobs_df['job_title'].str.lower().str.contains(ptrn)].head(1)


This pattern has match groups. To actually get the groups, use str.extract.



Unnamed: 0_level_0,subm_type,job_id,company_id,norm_salary,job_title,company_name,text_salary,job_contents
subm_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2017-12-20,submission,4102927,229915,,Marketing Analyst with Tableau experience,key2market EOOD,,"<!DOCTYPE html PUBLIC ""-//W3C//DTD XHTML 1.0 ..."


In [6]:
bi_count = sum(datajobs_df.job_title.str.lower().str.count('((^|[/( ])bi(/| |$))|((data|business) intelligence)'))
da_count = sum(datajobs_df.job_title.str.lower().str.count('(data analy(st|tics|sis))|(анализ.*данни)'))
rep_count = sum(datajobs_df.job_title.str.lower().str.count('reporting (analyst|specialist)'))
etl_count = sum(datajobs_df.job_title.str.lower().str.count('etl( |$)'))
de_count = sum(datajobs_df.job_title.str.lower().str.count('data (engineer|warehouse)'))
ds_count = sum(datajobs_df.job_title.str.lower().str.count('data scientist'))


bi_count, da_count, rep_count, de_count, etl_count, ds_count

(548, 507, 239, 180, 108, 79)

In [7]:
bars_trace = go.Bar(
    y=['Business Intelligence', 'Data Analysis', 'Reporting', 'Data Engineering', 'ETL', 'Data Science'],
    x=[bi_count, da_count, rep_count, de_count, etl_count, ds_count],
    orientation='h',
    marker=dict(
        line = dict(
            width=1, 
            color=s['colorramp']['acc1'][-1]), 
        color = s['colors']['acc1'],
        opacity=0.8,
    ),
    xaxis='x2',
    showlegend=False,
)

In [8]:
data = [pie_trace, bars_trace]
layout=go.Layout(
    paper_bgcolor=s['colors']['bg1'],            
    plot_bgcolor=s['colors']['bg1'],
    title="Data Jobs Market Share",
    titlefont=s['chart_fonts']['title'],
    font = s['chart_fonts']['text'],
    xaxis=dict(
        title='x1',
    ),
    xaxis2=dict(
        title='Hits per key phrase',
        domain=[0.50, 1]
    ),
)

In [9]:
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig, filename = 'data_offers_pie_and_bar.html')

In [12]:
# Uncomment the line below to export an HTML version of the chart.
# plotly.offline.plot(fig, filename = 'data_offers_pie_and_bar.html')

'file:///data/WORKSPACE/jpynb_Employment_Trends_Bulgaria/workbooks/data_offers_pie_and_bar.html'

In [11]:
from IPython.core.display import HTML
with open('../resources/styles/datum.css', 'r') as f:
    style = f.read()
HTML(style)