In [2]:
import os
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

In [3]:
FILE_PATH = './data'
SAVE_PATH = './data'

# [훈련 데이터 생성]

## apply_train 파일 로드

In [4]:
apply = pd.read_csv(f'{FILE_PATH}/apply_train.csv')
apply.head()

Unnamed: 0,resume_seq,recruitment_seq
0,U05833,R03838
1,U06456,R02144
2,U07807,R01877
3,U04842,R02463
4,U08336,R00112


## 딕셔너리 치환 (인코딩, 디코딩)

### Resume 딕셔너리 생성

In [5]:
resume_2_idx = { res : i for i, res in enumerate(apply['resume_seq'].unique())}
idx_2_resume = { i : res for res, i in resume_2_idx.items()}

### Recruitment 딕셔너리 생성

In [6]:
recruitment_2_idx = { rec : i for i, rec in enumerate(apply['recruitment_seq'].unique())}
idx_2_recruitment = { i : rec for rec, i in recruitment_2_idx.items()}

## idx 인코딩 작업

In [7]:
apply['resume_idx'] = apply['resume_seq'].apply(lambda x : resume_2_idx[x])
apply['recruitment_idx'] = apply['recruitment_seq'].apply(lambda x : recruitment_2_idx[x])

In [8]:
apply.head()

Unnamed: 0,resume_seq,recruitment_seq,resume_idx,recruitment_idx
0,U05833,R03838,0,0
1,U06456,R02144,1,1
2,U07807,R01877,2,2
3,U04842,R02463,3,3
4,U08336,R00112,4,4


## train, test split

### 각 RESUME 별 TEST 1개

In [9]:
# train, test = [], []
# apply_train_groupby = apply.groupby('resume_idx')['recruitment_idx'].apply(list)
# for uid, iids in zip(apply_train_groupby.index.tolist(), apply_train_groupby.values.tolist()):
#     for iid in iids[:-1]:
#         train.append([uid,iid])
#     test.append([uid, iids[-1]])

In [10]:
# train = pd.DataFrame(train, columns=['resume_idx', 'recruitment_idx'])
# test = pd.DataFrame(test, columns=['resume_idx', 'recruitment_idx'])

### train_test_split(0.35)

In [11]:
train, test = train_test_split(apply, test_size=0.35, stratify=apply['resume_idx'], random_state=42)

## 훈련 데이터 생성

In [12]:
train = train.groupby('resume_idx')['recruitment_idx'].agg(list).reset_index()
train = train['recruitment_idx'].apply(lambda x: ' '.join(map(str,x))).to_frame().reset_index()
train = train.rename(columns={'index': 'resume_idx'})
train.head()

Unnamed: 0,resume_idx,recruitment_idx
0,0,0 996 6499
1,1,5352 3262 2645 2174 1 4663 1804 1927 1871
2,2,6527 3926 2 4100 3986
3,3,5787 3 3852 4715
4,4,4 3596


In [13]:
test = test.groupby('resume_idx')['recruitment_idx'].agg(list).reset_index()
test = test['recruitment_idx'].apply(lambda x: ' '.join(map(str,x))).to_frame().reset_index()
test = test.rename(columns={'index': 'resume_idx'})
test.head()

Unnamed: 0,resume_idx,recruitment_idx
0,0,5621
1,1,5607 5413 1064 1507
2,2,1019 4016
3,3,2022 1029
4,4,3575


## 모델 별 훈련데이터 저장

In [14]:
FOLDER_NAME = "JOB_2_42"
BSPM_SAVE_PATH = f"./BSPM/data/{FOLDER_NAME}/"
LTOCF_SAVE_PATH = f"./LT-OCF/data/{FOLDER_NAME}/"
CF_SAVE_PATH = f"./CF/data/{FOLDER_NAME}/"

# 폴더가 존재하지 않으면 생성
for path in [BSPM_SAVE_PATH, LTOCF_SAVE_PATH, CF_SAVE_PATH]:
    if not os.path.exists(path):
        os.makedirs(path)
        print(f"폴더가 생성되었습니다: {path}")
    else:
        print(f"폴더가 이미 존재합니다: {path}")

np.savetxt(f'{BSPM_SAVE_PATH}train.txt', train.values, fmt='%s')
np.savetxt(f'{BSPM_SAVE_PATH}test.txt', test.values, fmt='%s')

np.savetxt(f'{LTOCF_SAVE_PATH}train.txt', train.values, fmt='%s')
np.savetxt(f'{LTOCF_SAVE_PATH}test.txt', test.values, fmt='%s')

np.savetxt(f'{CF_SAVE_PATH}train.txt', train.values, fmt='%s')
np.savetxt(f'{CF_SAVE_PATH}test.txt', test.values, fmt='%s')

폴더가 생성되었습니다: ./BSPM/data/JOB_2_42/
폴더가 생성되었습니다: ./LT-OCF/data/JOB_2_42/
폴더가 생성되었습니다: ./CF/data/JOB_2_42/


# [모델별 학습]

* 각 모델 실행 시 마다 커널을 재시작 해줍니다.

## BSPM

In [None]:
os._exit(00)

In [1]:
import os
ROOT_PATH = os.getcwd()
RELATIVE_BSPM_PATH = "./BSPM/bspm"

In [2]:
# Root 작업 경로로 변경
os.chdir(ROOT_PATH)

# 현재 작업 경로 출력
print("현재 작업 경로:", os.getcwd())

# 상대 경로를 절대 경로로 변환
absolute_path = os.path.abspath(RELATIVE_BSPM_PATH)

# 작업 경로 변경
os.chdir(absolute_path)

# 변경된 작업 경로 출력
print("변경된 작업 경로:", os.getcwd())

현재 작업 경로: C:\Users\medici\DACON_Job-Recommendation-System-1
변경된 작업 경로: C:\Users\medici\DACON_Job-Recommendation-System-1\BSPM\bspm


* seed(default: 2020)

In [3]:
%run main.py --dataset="JOB_1" --topks="[20]" --simple_model="bspm" --solver_shr="rk4" \
--K_s=1 --T_s=3.5 --final_sharpening=True --idl_beta=0.3 --factor_dim=960


██████╗ ███████╗██████╗ ███╗   ███╗
██╔══██╗██╔════╝██╔══██╗████╗ ████║
██████╔╝███████╗██████╔╝██╔████╔██║
██╔══██╗╚════██║██╔═══╝ ██║╚██╔╝██║
██████╔╝███████║██║     ██║ ╚═╝ ██║
╚═════╝ ╚══════╝╚═╝     ╚═╝     ╚═╝

[0;30;43mCpp extension not loaded[0m
>>SEED: 2020
[0;30;43mloading [../data/JOB_1][0m
# of user: 8482
# of item: 6695
49464 interactions for training
8482 interactions for testing
JOB_1 Sparsity : 0.0010204097804796487
JOB_1 is ready to go
{'A_n_fold': 100,
 'A_split': False,
 'K_b': 1,
 'K_idl': 1,
 'K_s': 1,
 'T_b': 1,
 'T_idl': 1,
 'T_s': 3.5,
 'bigdata': False,
 'bpr_batch_size': 2048,
 'dataset': 'JOB_1',
 'decay': 0.0001,
 'device': device(type='cuda', index=0),
 'dropout': 0,
 'factor_dim': 960,
 'final_sharpening': True,
 'idl_beta': 0.3,
 'keep_prob': 0.6,
 'latent_dim_rec': 64,
 'lightGCN_n_layers': 3,
 'lr': 0.001,
 'multicore': 0,
 'pretrain': 0,
 'sharpening_off': False,
 'solver_blr': 'euler',
 'solver_idl': 'euler',
 'solver_shr': 'rk4',
 't_point_combi

## LT-OCF

In [None]:
os._exit(00)

In [15]:
import os
ROOT_PATH = os.getcwd()
RELATIVE_LTOCF_PATH = "./LT-OCF/code"

In [16]:
# Root 작업 경로로 변경
os.chdir(ROOT_PATH)

# 현재 작업 경로 출력
print("현재 작업 경로:", os.getcwd())

# 상대 경로를 절대 경로로 변환
absolute_path = os.path.abspath(RELATIVE_LTOCF_PATH)

# 작업 경로 변경
os.chdir(absolute_path)

# 변경된 작업 경로 출력
print("변경된 작업 경로:", os.getcwd())

현재 작업 경로: C:\Users\medici\DACON_Job-Recommendation-System-1
변경된 작업 경로: C:\Users\medici\DACON_Job-Recommendation-System-1\LT-OCF\code


* seed(default: 2020)

In [18]:
%run main.py --dataset="JOB_1" --model="ltocf" --solver="rk4" --adjoint=False \
--K=4 --learnable_time=False --dual_res=False --lr=1e-3 --lr_time=1e-3 \
--decay=1e-4 --topks="[20]" --tensorboard=1 --gpuid=0 \
--epochs=250 --layer=2 --recdim=360 --bpr_batch=2048 --pretrain=0

>>SEED: 2020
[0;30;43muse NORMAL distribution initilizer[0m
loading adjacency matrix
lgn is already to go(dropout:0)
load and save to C:\Users\medici\DACON_Job-Recommendation-System-1\LT-OCF\code\pretrain\ltocf
Train time: 1.6641s
EPOCH[1/120] loss0.40770-|Sample:0.00|
Train time: 1.4482s
EPOCH[2/120] loss0.25317-|Sample:0.02|
Train time: 1.5050s
EPOCH[3/120] loss0.13217-|Sample:0.01|
Train time: 1.4977s
EPOCH[4/120] loss0.08131-|Sample:0.00|
Train time: 1.4625s
EPOCH[5/120] loss0.06010-|Sample:0.00|
Train time: 1.4632s
EPOCH[6/120] loss0.04780-|Sample:0.01|
Train time: 1.4632s
EPOCH[7/120] loss0.03976-|Sample:0.01|
Train time: 1.4470s
EPOCH[8/120] loss0.03664-|Sample:0.00|
Train time: 1.4559s
EPOCH[9/120] loss0.03161-|Sample:0.02|
Train time: 1.4778s
EPOCH[10/120] loss0.02889-|Sample:0.00|
Train time: 1.4681s
EPOCH[11/120] loss0.02692-|Sample:0.02|
Train time: 1.4457s
EPOCH[12/120] loss0.02411-|Sample:0.00|
Train time: 1.4623s
EPOCH[13/120] loss0.02433-|Sample:0.00|
Train time: 1.47

## CF

In [None]:
os._exit(0)

In [13]:
import os
ROOT_PATH = os.getcwd()
RELATIVE_CF_PATH = "./CF/cf"

In [14]:
# Root 작업 경로로 변경
os.chdir(ROOT_PATH)

# 현재 작업 경로 출력
print("현재 작업 경로:", os.getcwd())

# 상대 경로를 절대 경로로 변환
absolute_path = os.path.abspath(RELATIVE_CF_PATH)

# 작업 경로 변경
os.chdir(absolute_path)

# 변경된 작업 경로 출력
print("변경된 작업 경로:", os.getcwd())

현재 작업 경로: C:\Users\medici\DACON_Job-Recommendation-System-1
변경된 작업 경로: C:\Users\medici\DACON_Job-Recommendation-System-1\CF\cf


In [15]:
%run main.py --dataset="JOB_1" --test="test" --gamma=0.2 --similarity="pearson"


[gamma] '0.2', [similarity] 'pearson'

reading data...
measuring similarity of users...
measuring user-based recommendations...
measuring similarity of items...


  B = np.nan_to_num((coo-np.multiply.outer(k, k)/N)/np.nan_to_num(np.multiply.outer(np.sum((M-k/N)**2, axis = 0)**0.5,np.sum((M-k/N)**2, axis = 0)**0.5)))


measuring item-based recommendations...


  rec_i = np.nan_to_num(np.dot(M,B)/np.sum(abs(B), axis=0))


measuring final model recommendations...
measuring performance of final model...

[final model]
recall@20: 0.28048

--------------------------------------------------------------------------------
Total Time: 16.73 seconds
