## Data Offers Key Players

In [1]:
import datetime as dt
import psycopg2
import pandas as pd
import plotly
from plotly import tools
import plotly.plotly as py
import plotly.graph_objs as go

from nbstyler import DATA_STYLE as s

plotly.offline.init_notebook_mode(connected=True) # run at the start of every ipython notebook to use plotly.offline

%matplotlib notebook
%matplotlib inline

In [2]:
conn = psycopg2.connect("dbname=jobsbg")
cur = conn.cursor()
cur.close()
datajobs_df = pd.read_sql_query('SELECT subm_date, subm_type, job_id, company_id, job_title, company_name FROM v_full_data_offers_history', conn, index_col='subm_date')
conn.close()

In [3]:
datajobs_df.index = pd.to_datetime(datajobs_df.index)


In [4]:
datajobs_df.head(2)

Unnamed: 0_level_0,subm_type,job_id,company_id,job_title,company_name
subm_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-09-27,submission,3994437,124912,Data Analyst,ПрайсуотърхаусКупърс Одит ООД
2017-09-27,submission,3994555,67058,ETL Developer,Adastra Bulgaria Ltd.


In [5]:
source_df = datajobs_df.groupby('company_name')[['job_id']].count().sort_values(['job_id'], ascending=False)

In [6]:
source_df['cumulative_sum'] = source_df.job_id.cumsum()
source_df['cumulative_perc'] = 100 * source_df.cumulative_sum / source_df.job_id.sum()
source_df['demarcation'] = 80

In [7]:
source_df.head(5)

Unnamed: 0_level_0,job_id,cumulative_sum,cumulative_perc,demarcation
company_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bEYE Ltd,110,110,6.731946,80
EXPERIAN BULGARIA EAD,64,174,10.648715,80
Си Екс Джи ООД,63,237,14.504284,80
Технементалс Технолоджис (България) ЕАД,43,280,17.135863,80
ДОПАМИН ЕООД,36,316,19.339045,80


In [8]:
trace1 = go.Bar(
    x = source_df.index,
    y = source_df.job_id,
    name = 'Count',
    marker = dict(
        color = s['colors']['acc1'],
    )
)

trace2 = go.Scatter(
    x = source_df.index,
    y = source_df.cumulative_perc,
    name = 'Cumulative Percentage',
    yaxis = 'y2',
    line = dict(
        color = s['colors']['acc2'],
        width = 2,
    )
)

trace3 = go.Scatter(
    x = source_df.index,
    y = source_df.demarcation,
    name = '80%',
    yaxis = 'y2',
    line = dict(
        color = s['colors']['acc1'],
        width = 1.2,
        dash = 'dash',
    )
)

In [9]:
data = [trace1, trace2, trace3]


In [21]:
layout = go.Layout(
    paper_bgcolor = s['colors']['bg1'],            
    plot_bgcolor = s['colors']['bg1'],
    title = 'Key Players by Total Number of Submitted Data Jobs',
    titlefont = s['chart_fonts']['title'],
    font = s['chart_fonts']['text'],
    autosize=True,
    showlegend=False,
    hidesources=True,
    xaxis = dict(
        title = 'Companies',
        tickangle = -90,
        showticklabels = False,
    ), 
    yaxis = dict(
        title = 'Data Jobs Count',
        
    ),
    yaxis2 = dict(
        range=[0,101],
        tickvals = [0, 25, 50, 75, 100],
        overlaying = 'y',
        side = 'right',
    )
)

In [22]:
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig, filename = 'data_offers_key_players_pareto.html')

In [23]:
# Uncomment the line below to export an HTML version of the chart.
# plotly.offline.plot(fig, filename = 'data_offers_key_players_pareto.html', show_link=False)

'file:///data/WORKSPACE/jpynb_Employment_Trends_Bulgaria/workbooks/data_offers_key_players_pareto.html'

In [17]:
from IPython.core.display import HTML
with open('../resources/styles/datum.css', 'r') as f:
    style = f.read()
HTML(style)