In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from text_processing import process_text
from constants import PROCESSED_DATA_PATH
from utils import load_pickle_file
from detectors import ProgrammingLanguageDetector, TechnologyDetector, LocationDetector, PythonLibraryDetector, detect_features
from IPython.core.display import display, HTML
import numpy as np
import pandas as pd
from ipywidgets import interact
import ipywidgets as widgets

In [3]:
detector_dict = {
    'red': ProgrammingLanguageDetector(),
    'green': TechnologyDetector(),
    'blue': LocationDetector()
}

In [4]:
processed_data = load_pickle_file(PROCESSED_DATA_PATH)
job_summary_list = processed_data['job_summary_list']
documents = processed_data['documents']
lemmas = processed_data['lemmas']
df_data = processed_data['df_processed']

In [5]:
def colorify_words(text, words, color):
    for word in words:
        text = text.replace(word, f'<font color="{color}">{word}</font>')
    return text

In [6]:
@interact(index=widgets.IntSlider(min=0, max=len(documents)-1, step=1, value=0))
def show_colorifed_text(index=0):
    text = documents[index]
    for color, detector in detector_dict.items():
        matches = detector.detect(text)
        text = colorify_words(text, matches, color)
    display(HTML(text))

interactive(children=(IntSlider(value=0, description='index', max=6658), Output()), _dom_classes=('widget-inte…

In [7]:
feature_list = []
for index, row in df_data.iterrows():
    feature_list.append(detect_features(row.job_description))

In [8]:
df_data = df_data.join(pd.DataFrame(feature_list))

In [9]:
df_data.tail(15).T

Unnamed: 0,6644,6645,6646,6647,6648,6649,6650,6651,6652,6653,6654,6655,6656,6657,6658
id,62850,62858,62859,62860,62861,62862,62863,62864,62865,62866,62867,62898,62899,62906,62907
categories__name,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data
date_published,2019-05-04,2019-05-04,2019-05-03,2019-05-04,2019-05-04,2019-05-03,2019-05-04,2019-05-04,2019-05-04,2019-05-04,2019-05-04,2019-04-11,2019-04-11,2019-05-11,2019-05-11
employer__name,Telia Company,Abloy Oy,Santen Oy,barona.fi,Powervision Oy,Kemira Oyj,Rovio Mobile Oy,Santander Consumer Bank,Outotec,Ework.fi,Ework.fi,"CGI Group, Inc.",Wargaming.net,Fiskars Group,Verto analytics
is_active,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True
job_description,"Have you analyzed as-is situation, understood ...",We are looking for an experienced and ambitiou...,Santen EMEA Pharmacovigilance Unit is looking ...,Hakuaika päättyy 20.05.2019\nHaemme hyvän asen...,Are you fascinated about entering consumer dro...,Kemira is a global company providing chemicals...,Welcome to Rovio!\n\nWe are looking for an exp...,Santander Consumer Bank is a Nordic bank with ...,"Senior Manager, Operative Procurement\nOutotec...",For our client we are looking for Senior Solut...,For our client we are looking for a Senior Pro...,Position Description:\nCGI tarjoaa palveluja I...,Wargaming Helsinki is looking for a versatile ...,"Location: Helsinki, Finland\n\nThe Area\nSales...",Want to be a part of a team that is building a...
job_title__name,ECM Lead Architect,Senior Controller,"Sr. Specialist, Medical Device Safety Vigilanc...","Prosessinhoitaja Orionille, tabletinvalmistus",European Technical Support and Delivery Servic...,"Manager, Customer Communications",Lead Designer (Battle Studio),Head of Compliance Finland,"Senior Manager, Operative Procurement",Senior Solution Consultant or Solution Experts,Senior Project Manager for Dynamics CRM Project,Data Architect (Financial Services),Senior UI Programmer,"Customer Service Representative, Domestic",Cloud Operations Lead
location__name,Helsinki,Espoo,Vantaa,Espoo,Espoo,Helsinki,Espoo,Helsinki,Espoo,Espoo,Espoo,Helsinki,Helsinki,Helsinki,Espoo
location__population,645482,280247,224397,280247,280247,645482,280247,645482,280247,280247,280247,645482,645482,645482,280247
pages__name,Indeed,Indeed,Indeed,Indeed,Indeed,Indeed,Indeed,Indeed,Indeed,Indeed,Indeed,Indeed,Indeed,Indeed,Indeed
