In [1]:
import numpy as np
import pandas as pd

import os
import flask
import json
import unidecode 

from flask import Flask, session, redirect, url_for, render_template, request
from werkzeug.serving import run_simple

In [2]:
# Connect to the database and setup the app
app = Flask(__name__)
app.secret_key = b'_5#y2L"F4Q8z\n\xec]/'

# Load data

In [3]:
df = pd.read_csv("data/df_get.csv").drop(["Unnamed: 0", "date_start_job", 
                                          "unique_id", "date_end_job", "source"], axis=1)

In [4]:
df.head()

Unnamed: 0,candidate_id,function_id,isco_functie_niveau,isco_code4,function_name_self,company_name,time_spent,education
0,93728,896.0,1.0,346.0,Functienaam van code 9721,378466.0,4.0,3.0
1,93728,73.0,2.0,202.0,Archiefmedewerker,349302.0,17.0,3.0
2,93728,1841.0,4.0,86.0,ICT-beheerder,70896.0,1461.0,3.0
3,93728,2529.0,2.0,233.0,Onderwijsassistent,73627.0,365.0,3.0
4,93728,20.0,2.0,196.0,Administratief Logistiek Medewerker,221032.0,1826.0,3.0


In [5]:
certs = pd.read_csv("data/candidate_certificates_mappings.csv").drop("Unnamed: 0", axis=1)
certs = certs.groupby("candidate_id")["candidate_certificate_id"].apply(lambda x: x.values)

In [6]:
isco_codes = pd.read_csv("data/isco translations it sp uk nl fr de - Sheet1.csv")[["ISCO_08_Code", 
                                                                                   "NL ISCO "]].set_index(
    "ISCO_08_Code").to_dict()["NL ISCO "]

# isco_codes = {int(k): v for k, v in isco_codes.items() if k.isdigit()}

In [7]:
languages = pd.read_csv("data/languages_mappings.csv").drop("Unnamed: 0", axis=1)
languages = languages.groupby("candidate_id")["language_id"].apply(lambda x: x.values)

In [8]:
licenses = pd.read_csv("data/license_mappings.csv")
licenses = licenses.groupby("candidate_id")["driving_licenses"].apply(lambda x: x.values)

In [9]:
addresses = pd.read_csv("data/address_mappings.csv").set_index("candidate_id").to_dict()["from_post_code"]

In [10]:
skills = pd.read_csv("data/candidate_skills_mapped.csv").drop(["Unnamed: 0", "date_start"], axis=1)
skills = skills.groupby("candidate_id")["skill_id"].apply(lambda x: x.values)

In [11]:
education = {5 : "WO", 4 : "HBO", 3 : "MBO", 2 : "Middelbaar onderwijs", 1 : "Basisonderwijs", 0 : "N.v.t."}

In [12]:
with open("data/index_to_company.json") as f:
    companies = json.load(f)
    companies = {int(k): v for k, v in companies.items()}

In [13]:
with open("data/index_to_isco.json") as f:
    isco = json.load(f)
    isco = {int(k): int(v) for k, v in isco.items()}

In [14]:
skill_levels = {1 : "Routineus werk",
                2 : "Regulier werk",
                3 : "Bovengemiddeld werk",
                4 : "Kenniswerk"}

In [15]:
finance = {5, 20, 70, 71, 72, 73, 74, 75, 142, 143, 146, 195}

healthcare = {16, 17, 48, 49, 50, 51, 52, 53, 54, 55, 
              56, 57, 58, 59, 60, 127, 128, 129, 130,
              131, 132, 133, 134, 135, 136, 137, 138,
              140, 141, 128, 234, 235, 236}

cust_supp = {152, 183, 185, 186, 187, 188, 189, 190, 191, 192}

index_to_name = {k: isco_codes[v] for k, v in isco.items()} 

In [16]:
df.dropna(subset=["isco_code4"], inplace=True)
df["education"] = df["education"].fillna(0)

# Convert DataFrame values to human-readable text

In [17]:
df["company_name"] = df["company_name"].apply(lambda x: companies[x] if x in companies else x)

In [18]:
df["isco_code4"] = df["isco_code4"].apply(lambda x: isco[int(x)] if int(x) in isco else int(x))

In [19]:
df["isco_code4"] = df["isco_code4"].apply(lambda x: isco_codes[x])

In [20]:
df["location"] = df["candidate_id"].apply(lambda x: addresses[x] if x in addresses else "N.v.t.")

In [21]:
df["skills"] = df["candidate_id"].apply(lambda x: ", ".join(skills[x]) if x in skills else "N.v.t.")

In [22]:
df["certificates"] = df["candidate_id"].apply(lambda x: ", ".join(certs[x]) if x in certs else "N.v.t.")

In [23]:
df["licenses"] = df["candidate_id"].apply(lambda x: ", ".join(licenses[x]) if x in licenses else "N.v.t.")

In [24]:
df["languages"] = df["candidate_id"].apply(lambda x: ", ".join(languages[x]) if x in languages else "N.v.t.")

In [25]:
df["isco_functie_niveau"] = df["isco_functie_niveau"].apply(lambda x: skill_levels[int(x)])

In [26]:
df["education"] = df["education"].apply(lambda x: education[int(x)])

In [27]:
df["time_spent"] = df["time_spent"].fillna(0).astype(int)

In [28]:
df.head()

Unnamed: 0,candidate_id,function_id,isco_functie_niveau,isco_code4,function_name_self,company_name,time_spent,education,location,skills,certificates,licenses,languages
0,93728,896.0,Routineus werk,Laders en lossers,Functienaam van code 9721,Friesland Foods Fresh** NIET GEBRUIKEN**,4,MBO,3862,"Programmeertalen, Rijervaring in Oostenrijk, A...",N.v.t.,B,"Duits, Engels"
1,93728,73.0,Regulier werk,Archiverings- en kopieermedewerkers,Archiefmedewerker,UL International (Netherlands) BV,17,MBO,3862,"Programmeertalen, Rijervaring in Oostenrijk, A...",N.v.t.,B,"Duits, Engels"
2,93728,1841.0,Kenniswerk,Systeembeheerders,ICT-beheerder,Het Nieuwe Eemland college,1461,MBO,3862,"Programmeertalen, Rijervaring in Oostenrijk, A...",N.v.t.,B,"Duits, Engels"
3,93728,2529.0,Regulier werk,Onderwijsassistenten,Onderwijsassistent,Het Nieuwe Eemland,365,MBO,3862,"Programmeertalen, Rijervaring in Oostenrijk, A...",N.v.t.,B,"Duits, Engels"
4,93728,20.0,Regulier werk,Administratief magazijnpersoneel,Administratief Logistiek Medewerker,Veluweloop evenementorganisatie,1826,MBO,3862,"Programmeertalen, Rijervaring in Oostenrijk, A...",N.v.t.,B,"Duits, Engels"


In [29]:
df.drop(["function_id"], axis=1, inplace=True)
df = df.rename({"function_name_self": "function_id"}, axis=1)

In [30]:
df_grouped = df.groupby("candidate_id")

In [31]:
                              # CNN-LSTM, LSTM, CNN, 
examples = {"Gezondheidszorg" : [(8468794, 131), (6894267, 235)],
            "Financiën" : [(6924590, 70)],
            "Klantenservice" : [(6615225, 186)]}

# Flask 

In [32]:
@app.route('/', methods=["GET", "POST"])
def index():
    if request.method == "POST":
        session["user_type"] = request.form['submit_button']
        session["example"] = 0
        return redirect(url_for("sliders"))
    else:
        return render_template("/index.html")

In [33]:
@app.route('/sliders', methods=["GET", "POST"])
def sliders():
    
    if request.method == "POST":
        top_left = int(request.form["topleft"].replace("option", ""))
        top_right = int(request.form["topright"].replace("option", ""))
        bottom = int(request.form["bottom"].replace("option", ""))
        general = int(request.form["general"].replace("option", ""))
        
        print(f'Type: {session["user_type"]}, Voorbeeld: {session["example"] - 1}:\n' +
              f'- Top left: {top_left}\n- Top right: {top_right}\n- Bottom: {bottom}\n- General: {general}')
        
    
    if session["example"] >= 3:
        return "We zijn klaar! Hartelijk dank voor uw bijdrage!"
    
    current_example = examples[session["user_type"]][session["example"]]
    session["pred"] = index_to_name[current_example[1]]
    
    # instead --> get examples[group_type][example_number]    
    df = df_grouped.get_group(current_example[0]).reset_index().drop(["index", 
                                                                      "candidate_id"], axis=1).iloc[:-1]
            
    static_features = pd.DataFrame(df[["location", "skills", "certificates", "licenses", "languages"]].iloc[0].T)
    df = df.drop(["location", "skills", "certificates", "licenses", "languages"], axis=1)
    
    df = df[["time_spent", "isco_functie_niveau", "education", "company_name", "function_id", "isco_code4"]]
        
    df.index.name = "Baan nummer"
    df.index += 1
    df = df.T
    df.index = ["Dagen gewerkt", "Werkniveau", "Opleidingsniveau", "Bedrijf", "Functie", "Isco code"]
    df = df.loc[["Isco code", "Functie", "Bedrijf", "Opleidingsniveau", "Dagen gewerkt", "Werkniveau"]]

    static_features.index = ["Postcode", "Vaardigheden", "Certificaten", "Rijbewijzen", "Talen"]
    static_feautres = static_features.loc[["Vaardigheden", "Certificaten", "Talen", "Rijbewijzen", "Postcode"]]
    
    session["df"] = df.to_html(classes="table")
    session["static_features"] = static_features.to_html(header=False, 
                                                         classes=["table", "static_data"])
    
    return render_template("/sliders.html", 
                           dataframe=session["df"], 
                           static_features=session["static_features"],
                           pred=session["pred"])

In [34]:
@app.route("/show_results", methods=["GET", "POST"])
def show_results():
    image = f"../static/{unidecode.unidecode(session['user_type'])}/{session['example'] - 1}.png"
        
    return render_template("/show_results.html",
                           image=image,
                           dataframe=session["df"],
                           static_features=session["static_features"],
                           pred=session["pred"])

In [35]:
@app.route("/store_results", methods=["POST"])
def store_results():
    
    r = request.get_json()
    
    values = dict(zip(["isco code", "function id", "company", "education", "days worked", 
                       "isco level", "CV", "skills", "certificates", "languages", "licenses",
                       "location"], r["values"]))
    
    print(session["user_type"], session["example"], values)
    session["example"] += 1
        
    return redirect(url_for("show_results"))

In [None]:
run_simple('localhost', 8080, app)

 * Running on http://localhost:8080/ (Press CTRL+C to quit)
127.0.0.1 - - [28/May/2022 00:33:19] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [28/May/2022 00:33:21] "POST / HTTP/1.1" 302 -
127.0.0.1 - - [28/May/2022 00:33:22] "GET /sliders HTTP/1.1" 200 -
127.0.0.1 - - [28/May/2022 00:33:24] "POST /store_results HTTP/1.1" 302 -


Gezondheidszorg 0 {'isco code': '2.5', 'function id': '2.5', 'company': '2.5', 'education': '2.5', 'days worked': '2.5', 'isco level': '2.5', 'CV': '2.5', 'skills': '2.5', 'certificates': '2.5', 'languages': '2.5', 'licenses': '2.5', 'location': '2.5'}


127.0.0.1 - - [28/May/2022 00:33:24] "GET /show_results HTTP/1.1" 200 -
127.0.0.1 - - [28/May/2022 00:33:24] "GET /show_results HTTP/1.1" 200 -
