## Data Offers by Location


In [1]:
import psycopg2
import pandas as pd
import plotly
import chart_studio.plotly as py
import plotly.graph_objs as go

from nbstyler import DATA_STYLE as DS

plotly.offline.init_notebook_mode(connected=True)

%matplotlib notebook
%matplotlib inline

### Objectives

Prepare a simple bubble map to illustrate the location concentration of targeted job type.

### Data preparation

For the `data_df` we can prepare a custom query calculating the total number of job offers per settlement.

In [2]:
data_querystr = """
SELECT job_location, COUNT(*) subm_count 
FROM data_offers.do_full_offer_history 
GROUP BY job_location 
ORDER BY subm_count DESC
"""

In [3]:
conn = psycopg2.connect('dbname=jobsbg')
data_df = pd.read_sql_query(data_querystr, conn, index_col='job_location')
conn.close()

In [4]:
data_df

Unnamed: 0_level_0,subm_count
job_location,Unnamed: 1_level_1
София,3854
Варна,226
Пловдив,164
Дистанционна,45
Русе,18
Велико Търново,15
,9
Бургас,8
Стара Загора,5
Божурище,5


We are going to need also the locations of the settlements. For the initial version of this map we are going to use a simple dictionary (the number of locations having data jobs offered is very limited).

In [5]:
locations = {
    'София':[42.6900, 23.3102],
    'Варна':[43.2100, 27.9000],
    'Пловдив':[42.1500, 24.7500],
    'Русе':[43.8500, 25.9800],
    'Велико Търново':[43.0900, 25.6300],
    'Бургас':[42.2100, 27.4700],
    'Божурище':[42.7642, 23.1940],
    'Горна Оряховица':[43.1300, 25.7000],
    'Стара Загора':[42.4200, 25.6300],
    'Благоевград':[42.0300, 23.0900],
    'Челопеч':[42.7011, 24.0866],
    'Ботевград':[42.9200, 23.7800],
    'Радиново':[42.1921, 24.6428],
    'Казачево':[43.0805, 24.7497],
    'Добрич':[43.5800, 27.8300],
    'Монтана':[43.4200, 23.2300],
    'Карлово':[42.6600, 24.8100],
    'Сливен':[42.6800, 26.3200],
    'Девня':[43.2300, 27.6000],
    'Шумен':[43.2800, 26.9300],
    'Враца':[43.2200, 23.5600],
    'Пирдоп':[42.7100, 24.1900],
}

loc_df = pd.DataFrame.from_dict(locations, orient='index', columns=['lat', 'long'])
loc_df

Unnamed: 0,lat,long
София,42.69,23.3102
Варна,43.21,27.9
Пловдив,42.15,24.75
Русе,43.85,25.98
Велико Търново,43.09,25.63
Бургас,42.21,27.47
Божурище,42.7642,23.194
Горна Оряховица,43.13,25.7
Стара Загора,42.42,25.63
Благоевград,42.03,23.09


In [6]:
data_df = data_df.join(loc_df).dropna()
data_df

Unnamed: 0_level_0,subm_count,lat,long
job_location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
София,3854,42.69,23.3102
Варна,226,43.21,27.9
Пловдив,164,42.15,24.75
Русе,18,43.85,25.98
Велико Търново,15,43.09,25.63
Бургас,8,42.21,27.47
Стара Загора,5,42.42,25.63
Божурище,5,42.7642,23.194
Горна Оряховица,5,43.13,25.7
Радиново,4,42.1921,24.6428


In [7]:
ranks = [(0,1),(2,10),(11,50),(51,100),(101,500),(501,10000)]
colors = [
    DS['colorramp']['acc2'][12],
    DS['colorramp']['acc2'][10],
    DS['colorramp']['acc2'][8],
    DS['colorramp']['acc2'][6],
    DS['colorramp']['acc2'][4],
    DS['colorramp']['acc2'][2],
]
data = []
scale = 1

In [8]:
for i in range(len(ranks)):
    lim = ranks[i]
    df_sub = data_df[lim[0]:lim[1]]
    city = go.Scattergeo(
        locationmode='country names',
        lon=df_sub['long'],
        lat=df_sub['lat'],
        text=df_sub['subm_count'],
        marker=go.scattergeo.Marker(
            size = df_sub['subm_count']/scale,
            color = colors[i],
            line = go.scattergeo.marker.Line(
                width=0.5, 
                color='rgb(40,40,40)'
            ),
            sizemode = 'area'
        ),
        name = '{0} - {1}'.format(lim[0],lim[1]))
    data.append(city)

In [9]:
layout = go.Layout(
    paper_bgcolor=DS['colors']['bg1'],            
    plot_bgcolor=DS['colors']['bg1'],
    title = 'Data Jobs Locations',
    titlefont=DS['chart_fonts']['title'],
    font=DS['chart_fonts']['text'],
    height=525,
    autosize=True,
    showlegend=False,
    hidesources=True,
    geo = go.layout.Geo(
        resolution=50,
        scope='europe',
        projection=go.layout.geo.Projection(
            type='mercator',
            scale=1,
        ),
        lonaxis=go.layout.geo.Lonaxis(
            range=[20.5, 30.5],
        ),
        lataxis=go.layout.geo.Lataxis(
            range=[41.0,44.5],
        ),
        center=dict(
            lat=42.8,
            lon=25.5,
        ),
        bgcolor=DS['colorramp']['acc1'][1],
        showframe=False,
        showland=True,
        landcolor=DS['colorramp']['acc1'][1],
        showocean=True,
        oceancolor=DS['colorramp']['acc2'][3],
        countrywidth=1.5,
        countrycolor=DS['colors']['acc1'],
    )
)

In [10]:
fig = go.Figure(data=data, layout=layout)

plotly.offline.iplot(fig, filename='data_offers_total_submissions_by_location.html')

In [11]:
# Uncomment the line below to export an HTML version of the chart.
plotly.offline.plot(fig, filename='data_offers_total_submissions_by_location.html', show_link=False)

'data_offers_total_submissions_by_location.html'

In [12]:
from IPython.core.display import HTML
with open('../resources/styles/datum.css', 'r') as f:
    style = f.read()
HTML(style)