## 태그 임베딩 모델 불러오기

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd

# TODO: 모델 학습에서 사용한 코드 재사용으로 고치기

tag = pd.read_csv('data/tags.csv')
jobtag = pd.read_csv('data/job_tags.csv')

jobkeywordlist = jobtag.merge(tag, on="tagID", how="left").drop_duplicates()
jobkeywordlist = jobkeywordlist.groupby("jobID")["keyword"].apply(list).reset_index(name="tags")


class Encoder(nn.Module):
    def __init__(self, inputdim: int, hiddendim: int, embeddim: int):
        super().__init__()
        # self.inputdim = inputdim
        # self.hiddendim = hiddendim
        # self.embeddim = embeddim
        self.hidden = nn.Linear(inputdim, hiddendim)
        self.embed = nn.Linear(hiddendim, embeddim)

    def forward(self, input):
        embedvec = self.hidden(input)
        embedvec = self.embed(embedvec)
        embedvec = F.softmax(embedvec, dim=0)
        return embedvec


INPUT_DIM = 887
ENC_HID_DIM = 100
EMB_DIM = 25

enc = Encoder(INPUT_DIM, ENC_HID_DIM, EMB_DIM)

PATH = "2021-05-10 11_57_53"
enc.load_state_dict(torch.load(PATH))


def onehot(keyword):
    onehot_vec = np.zeros(tag.shape[0])
    assert (tag.index[tag["keyword"] == keyword] + 1).any(), keyword
    onehot_vec[tag.index[tag["keyword"] == keyword]] = 1
    return onehot_vec


def tagembedding(keyword):
    inputvec = torch.from_numpy(onehot(keyword))
    with torch.no_grad():
        embeddingvec = enc(inputvec.float())
    return embeddingvec

## 적합도 비교

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

top_k = 30

def get_cosim(tag1, tag2):
    tag1 = tag1.reshape(1, -1)
    tag2 = tag2.reshape(1, -1)

    return cosine_similarity(tag1, tag2)


def cal_score(user_taglist):
    jobidlist = jobkeywordlist["jobID"].tolist()
    job_scores = []
    for jobid in jobidlist:  # job 하나
        job_taglist = jobkeywordlist[jobkeywordlist['jobID'] == jobid]['tags'].tolist()[0]
        tag_scores = dict()

        for job_tag in job_taglist:  # job의 tag 하나
            embedded_jobtag = tagembedding(job_tag)

            for user_tag in user_taglist:  # user의 tag 하나
                score = 0
                
                if job_tag == user_tag:
                    score = 1
                else:
                    embedded_usertag = tagembedding(user_tag)
                    cosim = get_cosim(embedded_jobtag, embedded_usertag)

                    if cosim > 0.9:
                        score = 0.7

                if score > 0:
                    if job_tag in tag_scores:
                        tag_scores[job_tag] = max(score, tag_scores[job_tag])
                    else:
                        tag_scores[job_tag] = score
                    if score == 1:
                        break

        sum_scores = 0
        for k, v in tag_scores.items():
            sum_scores += v

        job_scores.append(sum_scores / len(job_taglist))

    job_score_pd = pd.DataFrame({'jobID': jobidlist,
                                 'tags': jobkeywordlist["tags"].tolist(),
                                 'score': job_scores})

    job_score_pd = job_score_pd.sort_values(by=['score'], axis=0, ascending=False)
    job_score_pd.reset_index(drop=True, inplace=True)
    return job_score_pd[:top_k]

## 테스트

In [None]:
cal_score(['HTML', 'CSS', "JavaScript", 'jQuery'])

Unnamed: 0,jobID,tags,score
0,ebd9629fc3ae5e9f6611e2ee05a31cef,"[JavaScript, HTML]",1.0
1,286674e3082feb7e5afb92777e48821f,"[HTML, JavaScript, CSS]",1.0
2,84d2004bf28a2095230e8e14993d398d,"[JavaScript, CSS, HTML]",1.0
3,fb89705ae6d743bf1e848c206e16a1d7,"[JavaScript, HTML, CSS]",1.0
4,995665640dc319973d3173a74a03860c,"[JavaScript, CSS, HTML]",1.0
5,63538fe6ef330c13a05a3ed7e599d5f7,"[CSS, JavaScript, HTML]",1.0
6,f74909ace68e51891440e4da0b65a70c,[JavaScript],1.0
7,c042f4db68f23406c6cecf84a7ebb0fe,"[HTML, CSS, JavaScript]",1.0
8,2b6d65b9a9445c4271ab9076ead5605a,"[JavaScript, CSS, HTML]",1.0
9,dc912a253d1e9ba40e2c597ed2376640,"[CSS, JavaScript, HTML]",1.0


## 테스트 with GUI

In [None]:
import ipywidgets as widgets

desc = widgets.Label(value='기술 스택 (쉼표로 구분)')
tags_input = widgets.Text(
    value='',
    placeholder='예: HTML, CSS, JavaScript',
    description='',
    disabled=False,
    layout=widgets.Layout(width='50%')
)
search_button = widgets.Button(
    description='검색',
    layout=widgets.Layout(width='10%'))
input_with_button = widgets.HBox([tags_input, search_button])
output_html = widgets.HTML(
    value=""
)
display(widgets.VBox([desc, input_with_button, output_html]))

def search(b):
    tags_input_ed = [x.strip() for x in tags_input.value.split(',')]
    styles = [
        {'selector':'td', 'props':[('padding', '10px'), ('vertical-align', 'top'), ('border-bottom', '1px solid #ccc'), ('background', '#eee')
         ]}, 
        {'selector':'th', 'props':[('padding', '10px'), ('font-weight', 'bold'), ('vertical-align', 'top'), ('color', '#fff'), ('background', '#ce4869')]}
    ]
    output_html.value = cal_score(tags_input_ed).style.set_table_styles(styles).render()
    # print(cal_score(tags_input_ed).to_html(notebook=True))

search_button.on_click(search)

VBox(children=(Label(value='기술 스택 (쉼표로 구분)'), HBox(children=(Text(value='', layout=Layout(width='50%'), placeh…