In [None]:
!pip install scipy numpy matplotlib pandas sklearn > /dev/null
!pip install requests
!pip install geopandas
!pip install geopy
!pip install folium

In [None]:
# Load libraries
import json
import requests
import pandas as pd
from pandas.io.json import json_normalize
from geopy import Nominatim
import numpy as np
import webbrowser
import folium
from folium.plugins import HeatMap
import geopandas as gpd


<h2>Dataset load</h2>

In [None]:
                  
def get_jobs_info(api_url = 'https://justjoin.it/api/offers'):
    headers = {'Content-Type': 'application/json'}
    response = requests.get(api_url, headers=headers)
    if response.status_code == 200:
        return json.loads(response.content.decode('utf-8'))
    else:
        return None


<h2>Global columns name</h2>

Columns name used in building HeatMap and columns

In [None]:
lat_column_name = 'lat'
lon_column_name = 'lon'
numerical_column_name = 'no'
province_column_name = 'province'
province_id_column_name = 'province_id'
city_column_name = 'city'
id_column_name = 'id'
country_code_column_name = 'country_code'

<h2>Coordinates load</h2>

In [None]:
def get_province_from_address (address_text, province_string = "województwo ",
                               split_sign=","):
    province = address_text.partition(province_string)[2] 
    province = province.partition(split_sign)[0] 
    
    return province
    
def fill_geo_info(places_data, loc_column_name, fill_province = False):
    locator = Nominatim(user_agent="myGeocoder")
    
    for i, row in places_data.iterrows():
        location = locator.geocode(row[loc_column_name])
        if location is not None:
            places_data.loc[i,lat_column_name] = location.latitude
            places_data.loc[i,lon_column_name] = location.longitude
            if fill_province:
                places_data.loc[i, province_column_name] = get_province_from_address(
                location.address)
            
    return places_data


<h2>Print heat map methods</h2>

In [None]:
def print_heat_map(places_data, file_path = 'heat_map.html', 
                   heat_column_name = numerical_column_name):

    places_len = len(places_data)
    
    lat = np.array(places_data[lat_column_name][0:places_len])
    lon = np.array(places_data[lon_column_name][0:places_len])
    no = np.array(places_data[heat_column_name][0:places_len],dtype=float)
    data = [[lat[i],lon[i],no[i]] for i in range(places_len)] 
    
    #location is the center location, draw a Map, and start zooming is 6 times.
    map_osm = folium.Map(location=[lat.mean(),lon.mean()],zoom_start=6,control_scale=True)
    HeatMap(data).add_to(map_osm) # Add heat map to the created map
    map_osm.save(file_path) # Save as html file
    webbrowser.open(file_path) # Default browser open

def print_province_heat_map(province_data = None, heat_column_name = numerical_column_name,
                            province_key_column_name = province_id_column_name,
                            file_path = 'province_heat_map.html',
                            legend_name = None):    
    #preprocessing 
    province_data = province_data.dropna()
    province_data[province_id_column_name]=\
        province_data[province_id_column_name].astype(int)
    
    province_geo_paths = get_province_geo_paths()
    province_map = folium.Map([52, 19], zoom_start=6)
    folium.Choropleth(geo_data=province_geo_paths,
                  data=province_data,
                    # kolumna z kluczem, kolumna z wartościami
                  columns=[province_key_column_name, heat_column_name], 
                      # klucz z geoJSON
                  key_on='feature.properties.JPT_KOD_JE', 
                  fill_color='YlOrRd', 
                  fill_opacity=0.7,
                  line_opacity=0.2,
                  legend_name=legend_name).add_to(province_map)
    # zapisanie utworzonej mapy do pliku HTML
    province_map.save(outfile = file_path)
    webbrowser.open(file_path) # Default browser open
    
def get_province_geo_paths():
    province_shapes = gpd.read_file('wojewodztwa.shp')
    province_shapes = province_shapes[['JPT_KOD_JE', "geometry"]]
    province_shapes['JPT_KOD_JE']=province_shapes['JPT_KOD_JE'].astype(int)
    
    # uproszczenie geometrii (mniejsza wartosc = bardziej dokładnie)
    province_shapes.geometry = province_shapes.geometry.simplify(0.005)
    province_geo_path = province_shapes.to_json()
    return province_geo_path

In [None]:
def group_by_city(data, address_column_name):
    unique_places = data.groupby(address_column_name)[id_column_name].nunique()
    places = pd.DataFrame({city_column_name:unique_places.index,
                           numerical_column_name:unique_places.values})
    places[lat_column_name]=np.nan
    places[lon_column_name]=np.nan
    
    return places

In [None]:
#just join it 
jjit_json = get_jobs_info()
jjit_data = json_normalize(jjit_json)
jjit_data.drop(jjit_data[ jjit_data[country_code_column_name] != 'PL' ].index , inplace=True)

# print heat map with offer count
grouped_city_data = group_by_city(jjit_data, city_column_name)
places_map = fill_geo_info(grouped_city_data, city_column_name)
print_heat_map(places_map)

<h2>Get province data </h2>

In [None]:
def get_province_ids():
    return pd.read_csv('woj_oznaczenia.csv', engine='python')


def get_city_data(data, address_column_name):
    unique_places = data.groupby(address_column_name)[id_column_name].nunique()
    
    places = pd.DataFrame({city_column_name:unique_places.index})
    places[lat_column_name]=np.nan
    places[lon_column_name]=np.nan
    places[province_column_name]=np.nan
    
    return places

In [None]:
places_map = get_city_data(jjit_data, city_column_name)
places_map = fill_geo_info(places_map, city_column_name, fill_province=True)

jjit_data = pd.merge(jjit_data, places_map, how='outer', 
                     left_on=city_column_name, right_on=city_column_name)

province_ids = get_province_ids()

<h2>Print heat map for min/max average salary </h2>

In [None]:
#group by province and aggregate min salary
min_salary_per_province_data = jjit_data.groupby(province_column_name) \
       .agg({'salary_from':'mean'}) \
       .rename(columns={'salary_from':'mean_salary_from'}) \
       .reset_index()

#merge with province GUGiK data (we need province id)
province_data = pd.merge(min_salary_per_province_data, province_ids, how='outer',
                         left_on=province_column_name, right_on=province_column_name)

print_province_heat_map(province_data = province_data,
                        heat_column_name="mean_salary_from",
                        file_path="min.html",
                        legend_name="Średnie minimalne wynagrodzenia")

In [None]:
#group by province and aggregate max salary
max_salary_per_province_data = jjit_data.groupby(province_column_name) \
       .agg({'salary_to':'mean'}) \
       .rename(columns={'salary_to':'mean_salary_to'}) \
       .reset_index()

#merge with province GUGiK data (we need province id)
province_data = pd.merge(max_salary_per_province_data, province_ids, how='outer',
                         left_on=province_column_name, right_on=province_column_name)

print_province_heat_map(province_data = province_data,
                        heat_column_name="mean_salary_to",
                        file_path="maks.html",
                        legend_name="Średnie maksymalne wynagrodzenia")