In [67]:
import pickle
import pandas as pd
import numpy as np
from tqdm import tqdm

In [20]:
# pickle 파일 불러오기
result_13_18 = pd.read_csv('../processed/CFFM_predict_proba_13_18.csv')
with open('../processed/country_to_idx.pkl', 'rb') as f:
    country_to_idx = pickle.load(f)
with open('../processed/idx_to_country.pkl', 'rb') as f:
    idx_to_country = pickle.load(f)
with open('../processed/new_exp_3years.pkl', 'rb') as f:
    new_exp_3years = pickle.load(f)
with open('../processed/new_exp_5years.pkl', 'rb') as f:
    new_exp_5years = pickle.load(f)

In [50]:
result_13_18

Unnamed: 0,country_id,year,item_id,GDP_growth,pop_growth,preference,label,predict_proba
0,0,2013,1.0,0.101959,0.287706,1.108999e-08,0,0.331913
1,0,2013,2.0,0.101959,0.287706,6.544139e-07,0,0.243521
2,0,2013,3.0,0.101959,0.287706,1.969499e-06,0,0.345073
3,0,2013,4.0,0.101959,0.287706,7.394813e-08,0,0.321218
4,0,2013,5.0,0.101959,0.287706,3.336427e-07,0,0.287764
...,...,...,...,...,...,...,...,...
64884,202,2018,80.0,0.194652,0.232127,7.380276e-07,0,0.099187
64885,202,2018,86.0,0.194652,0.232127,6.606483e-07,0,0.180853
64886,202,2018,88.0,0.194652,0.232127,2.919775e-06,0,0.181265
64887,202,2018,89.0,0.194652,0.232127,8.232880e-06,0,0.125569


In [54]:
# precision 함수로 만들기
def FM_precision(country, current_year, new_exp_Nyears, proba_threshold, neighbor_size):
    c_items = new_exp_Nyears[(idx_to_country[country], current_year)]
    rec_13_18 = result_13_18[result_13_18['predict_proba'] > proba_threshold]
    rec_items = rec_13_18[rec_13_18['country_id']==country].query('year==@current_year').item_id.tolist()
    hit_items = [i for i in c_items if i in rec_items]
    if len(rec_items) == 0:
        precision = 0
    else:
        precision = len(hit_items) / len(rec_items)
    return precision

# 해당년도 모든 나라에 대해서 precision 계산
def FM_precision_year(year, new_exp_Nyears, proba_threshold=0.5, neighbor_size=16):
    precision = []
    c_len = len(idx_to_country)
    for country in range(0, c_len):
        precision.append(FM_precision(country, year, new_exp_Nyears, proba_threshold, neighbor_size))
    return sum(precision) / c_len

# 전체 데이터에 대해서 precision 계산
def FM_precision_all(n_year, proba_threshold=0.5, neighbor_size=16):
    precision = []
    c_len = len(idx_to_country)
    if n_year == 3:
        years = range(2013, 2019)
        for country in range(0, c_len):
            for year in years:
                precision.append(FM_precision(country, year, new_exp_3years, proba_threshold, neighbor_size))
    elif n_year == 5:
        for country in range(0, c_len):
            years = range(2013, 2017)
            for year in years:
                precision.append(FM_precision(country, year, new_exp_5years, proba_threshold, neighbor_size))
    return sum(precision) / (c_len*len(years))

In [73]:
def get_best_combination(n_year):
    best_score = 0
    best_thres = 0
    for thres in tqdm(np.arange(0.1,0.9, 0.01)):
        score = FM_precision_all(n_year, thres)
        if score >= best_score:
            best_score = score
            best_thres = thres
    return best_score, best_thres

In [56]:
# 2018년도 모든 나라에 대해서 향후 3년 내의 precision 계산
FM_precision_year(2018, new_exp_3years)

0.4311314881709464

In [58]:
# 전체 연도 모든 나라에 대해서 향후 3년 내의 precision 계산
FM_precision_all(3, 0.5)

0.3412232184148822

In [69]:
get_best_combination(3)

  0%|          | 0/80 [00:00<?, ?it/s]

100%|██████████| 80/80 [03:00<00:00,  2.26s/it]


(0.4748295007823885, 0.30999999999999994)

In [70]:
# 2016년도 모든 나라에 대해서 향후 5년 내의 precision 계산
FM_precision_year(2016, new_exp_5years)

0.33092084541871736

In [74]:
# 전체 연도 모든 나라에 대해서 향후 5년 내의 precision 계산
FM_precision_all(5, 0.5)

0.36352755302861633

In [75]:
get_best_combination(5)

100%|██████████| 80/80 [01:30<00:00,  1.14s/it]


(0.5535778443304197, 0.2599999999999999)