In [3]:
# default_exp val

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
from nbdev.export import create_config

create_config(host='localhost', lib_name='code', user='ian', title='title1', copyright='cp1', description='des1')

# the official script for evalution, which is posted in the forum.
https://tianchi.aliyun.com/forum/postDetail?spm=5176.12586969.1002.3.6c3f5619NDeQ04&postId=102089
# [Must Read!] Official Script for Evaluation

In [15]:
# export

# coding=utf-8
# from __future__ import division
# from __future__ import print_function

import datetime
import json
import sys
import time
from collections import defaultdict

import numpy as np
import pandas as pd
from loguru import logger
import os

## evaluate_each_phase

In [8]:
# export

# the higher scores, the better performance
def evaluate_each_phase(predictions: dict, answers: dict):
    """
    
    predictions: dict
        key是user_id, value是list [pred_item_id_1,pred_item_id_2...]
    answers: dict
        key是user_id, value是(item_id, item_degree)
    """
    list_item_degress = []
    for user_id in answers:
        item_id, item_degree = answers[user_id]  # item_degree 应该是内置的表示新颖性的指标，越小，越新颖
        list_item_degress.append(item_degree)
    list_item_degress.sort()
    median_item_degree = list_item_degress[len(list_item_degress) // 2]

    num_cases_full = 0.0
    ndcg_50_full = 0.0
    ndcg_50_half = 0.0
    num_cases_half = 0.0
    hitrate_50_full = 0.0
    hitrate_50_half = 0.0
    for user_id in answers:
        item_id, item_degree = answers[user_id]
        rank = 0
        # 把正确的item_id和 预测值 依次 比对，
        while rank < 50 and predictions[user_id][rank] != item_id:
            rank += 1
        num_cases_full += 1.0
        if rank < 50:
            ndcg_50_full += 1.0 / np.log2(rank + 2.0)
            hitrate_50_full += 1.0
        if item_degree <= median_item_degree:
            num_cases_half += 1.0
            if rank < 50:
                ndcg_50_half += 1.0 / np.log2(rank + 2.0)
                hitrate_50_half += 1.0
    ndcg_50_full /= num_cases_full
    hitrate_50_full /= num_cases_full
    ndcg_50_half /= num_cases_half
    hitrate_50_half /= num_cases_half
    return np.array([hitrate_50_full, ndcg_50_full,hitrate_50_half, ndcg_50_half], dtype=np.float32)

## evaluate

In [9]:
# export

# submit_fname is the path to the file submitted by the participants.
# debias_track_answer.csv is the standard answer, which is not released.
def evaluate(submit_fname,
             answer_fname='data_gen/debias_track_answer.csv', current_time=None):
    schedule_in_unix_time = [
        0,  # ........ 1970-01-01 08:00:00 (T=0)
        1586534399,  # 2020-04-10 23:59:59 (T=1)
        1587139199,  # 2020-04-17 23:59:59 (T=2)
        1587743999,  # 2020-04-24 23:59:59 (T=3)
        1588348799,  # 2020-05-01 23:59:59 (T=4)
        1588953599,  # 2020-05-08 23:59:59 (T=5)
        1589558399,  # 2020-05-15 23:59:59 (T=6)
        1590163199,  # 2020-05-22 23:59:59 (T=7)
        1590767999,  # 2020-05-29 23:59:59 (T=8)
        1591372799  # .2020-06-05 23:59:59 (T=9)
    ]
    assert len(schedule_in_unix_time) == 10
    for i in range(1, len(schedule_in_unix_time) - 1):
        # 604800 == one week
        assert schedule_in_unix_time[i] + 604800 == schedule_in_unix_time[i + 1]

    if current_time is None:
        current_time = int(time.time())
    print('current_time:', current_time)
    print('date_time:', datetime.datetime.fromtimestamp(current_time))
    current_phase = 0
    while (current_phase < 9) and (
            current_time > schedule_in_unix_time[current_phase + 1]):
        current_phase += 1
    print('current_phase:', current_phase)
#     current_phase = 4
    try:
        answers = [{} for _ in range(10)]
        with open(answer_fname, 'r') as fin:
            for line in fin:
                line = [int(x) for x in line.split(',')]
                phase_id, user_id, item_id, item_degree = line
                assert user_id % 11 == phase_id
                # exactly one test case for each user_id
                answers[phase_id][user_id] = (item_id, item_degree)
    except Exception as _:
        print('server-side error: answer file incorrect')

    try:
        predictions = {}
        with open(submit_fname, 'r') as fin:
            for line in fin:
                line = line.strip()
                if line == '':
                    continue
                line = line.split(',')
                user_id = int(line[0])
                if user_id in predictions:
                    print('submitted duplicate user_ids')
                item_ids = [int(i) for i in line[1:]]
                if len(item_ids) != 50:
                    print('each row need have 50 items')
                if len(set(item_ids)) != 50:
                    return report_error(
                        stdout, 'each row need have 50 DISTINCT items')
                predictions[user_id] = item_ids
    except Exception as _:
        print('submission not in correct format')

    scores = np.zeros(4, dtype=np.float32)

    # The final winning teams will be decided based on phase T=7,8,9 only.
    # We thus fix the scores to 1.0 for phase 0,1,2,...,6 at the final stage.
    if current_phase >= 7:  # if at the final stage, i.e., T=7,8,9
        scores += 7.0  # then fix the scores to 1.0 for phase 0,1,2,...,6
    phase_beg = (7 if (current_phase >= 7) else 0)
    phase_end = current_phase + 1
    r_dict = {}  # 每阶段成绩
    for phase_id in range(phase_beg, phase_end):
        for user_id in answers[phase_id]:
            if user_id not in predictions:
                print('user_id %d of phase %d not in submission' % (
                        user_id, phase_id))
        try:
            # We sum the scores from all the phases, instead of averaging them.
            rs = evaluate_each_phase(predictions, answers[phase_id])
            print(f'phase_id: {phase_id}, score: {rs}')
            r_dict[phase_id] = rs
            scores += rs
        except Exception as _:
            print('error occurred during evaluation')

#     print(score=float(scores[0]))
#     print(ndcg_50_full=float(scores[0]), ndcg_50_half=float(scores[1]),
#         hitrate_50_full=float(scores[2]), hitrate_50_half=float(scores[3]))
#     print(float(scores[1]))
#     print(float(scores[0]), float(scores[1]), float(scores[2]), float(scores[3]))
    return (float(scores[0]), float(scores[1]), float(scores[2]), float(scores[3])), r_dict

## `_create_answer_file_for_evaluation`

In [10]:
# export

# FYI. You can create a fake answer file for validation based on this. For example,
# you can mask the latest ONE click made by each user in underexpose_test_click-T.csv,
# and use those masked clicks to create your own validation set, i.e.,
# a fake underexpose_test_qtime_with_answer-T.csv for validation.
def _create_answer_file_for_evaluation(answer_fname='data_gen/debias_track_answer.csv'):
    train = './data_origin/underexpose_train/underexpose_train_click-%d.csv'
    test = './data_origin/underexpose_test/underexpose_test_click-%d/underexpose_test_click-%d.csv'

    # underexpose_test_qtime-T.csv contains only <user_id, item_id>
    # underexpose_test_qtime_with_answer-T.csv contains <user_id, item_id, time>
    answer = 'data_gen/underexpose_test_qtime_with_answer-%d.csv'  # not released

    item_deg = defaultdict(lambda: 0)
    now_phase = 5
    with open(answer_fname, 'w') as fout:
        for phase_id in range(now_phase+1):
#             print(phase_id)
            with open(train % phase_id) as fin:
                for line in fin:
                    user_id, item_id, timestamp = line.split(',')
                    user_id, item_id, timestamp = (
                        int(user_id), int(item_id), float(timestamp))
                    item_deg[item_id] += 1
            with open(test % (phase_id, phase_id)) as fin:
                for line in fin:
                    user_id, item_id, timestamp = line.split(',')
                    user_id, item_id, timestamp = (
                        int(user_id), int(item_id), float(timestamp))
                    item_deg[item_id] += 1
            with open(answer % phase_id) as fin:
                for line in fin:
                    user_id, item_id, timestamp = line.split(',')
                    user_id, item_id, timestamp = (
                        int(user_id), int(item_id), float(timestamp))
                    assert user_id % 11 == phase_id
                    print(phase_id, user_id, item_id, item_deg[item_id],
                          sep=',', file=fout)

In [18]:
# export

def create_underexpose_test_qtime_with_answer(now_phase):
    test_path = './data_origin/underexpose_test'
    for c in range(now_phase + 1):  
        logger.info(f'phase: {c}') 
        ff = f'data_gen/underexpose_test_qtime_with_answer-{c}.csv'
        if os.path.exists(ff): 
            logger.info(f'{ff} exists!')
            continue
        click_test1 = pd.read_csv(test_path + '/underexpose_test_click-{}/underexpose_test_click-{}.csv'.format(c, c), header=None,  names=['user_id', 'item_id', 'time']) 
        click_test1.sort_values(['user_id', 'time']).drop_duplicates(subset=['user_id','item_id'],keep='last').reset_index(drop=True).to_csv(ff, index=None, header=None)

# main

In [12]:
# export

def main(now_phase, submit_fname): 
    create_underexpose_test_qtime_with_answer(now_phase)
    _create_answer_file_for_evaluation()
    r = evaluate(submit_fname)
    return r

In [13]:
now_phase = 5
submit_fname = '/Users/luoyonggui/Downloads/baseline1_itemcf1414.csv'
main(now_phase, submit_fname)

2020-05-11 15:42:29.240 | INFO     | __main__:create_underexpose_test_qtime_with_answer:6 - phase: 0
2020-05-11 15:42:29.505 | INFO     | __main__:create_underexpose_test_qtime_with_answer:6 - phase: 1
2020-05-11 15:42:29.596 | INFO     | __main__:create_underexpose_test_qtime_with_answer:6 - phase: 2
2020-05-11 15:42:29.681 | INFO     | __main__:create_underexpose_test_qtime_with_answer:6 - phase: 3
2020-05-11 15:42:29.770 | INFO     | __main__:create_underexpose_test_qtime_with_answer:6 - phase: 4
2020-05-11 15:42:29.869 | INFO     | __main__:create_underexpose_test_qtime_with_answer:6 - phase: 5


0
1
2
3
4
5
current_time: 1589182952
date_time: 2020-05-11 15:42:32
current_phase: 5
phase_id: 0, score: [0.7678894  0.25456    0.82934785 0.30067682]
phase_id: 1, score: [0.77925843 0.25543353 0.86615044 0.31160575]
phase_id: 2, score: [0.791716   0.25759798 0.84988177 0.30619693]
phase_id: 3, score: [0.7671642  0.24578968 0.83682984 0.29307976]
phase_id: 4, score: [0.67564404 0.21810669 0.75482404 0.26287678]
user_id 16 of phase 5 not in submission
user_id 27 of phase 5 not in submission
user_id 49 of phase 5 not in submission
user_id 71 of phase 5 not in submission
user_id 82 of phase 5 not in submission
user_id 93 of phase 5 not in submission
user_id 115 of phase 5 not in submission
user_id 126 of phase 5 not in submission
user_id 137 of phase 5 not in submission
user_id 148 of phase 5 not in submission
user_id 159 of phase 5 not in submission
user_id 170 of phase 5 not in submission
user_id 192 of phase 5 not in submission
user_id 203 of phase 5 not in submission
user_id 225 of ph

((3.7816720008850098,
  1.2314879894256592,
  4.137033939361572,
  1.4744360446929932),
 {0: array([0.7678894 , 0.25456   , 0.82934785, 0.30067682], dtype=float32),
  1: array([0.77925843, 0.25543353, 0.86615044, 0.31160575], dtype=float32),
  2: array([0.791716  , 0.25759798, 0.84988177, 0.30619693], dtype=float32),
  3: array([0.7671642 , 0.24578968, 0.83682984, 0.29307976], dtype=float32),
  4: array([0.67564404, 0.21810669, 0.75482404, 0.26287678], dtype=float32)})

In [14]:
now_phase = 5
submit_fname = '/Users/luoyonggui/Downloads/baseline1_itemcf3.csv'
main(now_phase, submit_fname)

2020-05-11 15:42:41.180 | INFO     | __main__:create_underexpose_test_qtime_with_answer:6 - phase: 0
2020-05-11 15:42:41.266 | INFO     | __main__:create_underexpose_test_qtime_with_answer:6 - phase: 1
2020-05-11 15:42:41.352 | INFO     | __main__:create_underexpose_test_qtime_with_answer:6 - phase: 2
2020-05-11 15:42:41.439 | INFO     | __main__:create_underexpose_test_qtime_with_answer:6 - phase: 3
2020-05-11 15:42:41.522 | INFO     | __main__:create_underexpose_test_qtime_with_answer:6 - phase: 4
2020-05-11 15:42:41.620 | INFO     | __main__:create_underexpose_test_qtime_with_answer:6 - phase: 5


0
1
2
3
4
5
current_time: 1589182964
date_time: 2020-05-11 15:42:44
current_phase: 5
phase_id: 0, score: [0.01743837 0.00581016 0.02391304 0.00832858]
phase_id: 1, score: [0.01274623 0.00464359 0.01991151 0.00624457]
phase_id: 2, score: [0.00473373 0.00158052 0.00591017 0.00205374]
phase_id: 3, score: [0.00776119 0.0033281  0.01048951 0.00459934]
phase_id: 4, score: [0.01053864 0.00325746 0.01475596 0.00511294]
phase_id: 5, score: [0.02224694 0.00869069 0.03080308 0.01323797]
0.027310509234666824
0.07546510547399521 0.027310509234666824 0.10578325390815735 0.03957713767886162


((0.07546510547399521,
  0.027310509234666824,
  0.10578325390815735,
  0.03957713767886162),
 {0: array([0.01743837, 0.00581016, 0.02391304, 0.00832858], dtype=float32),
  1: array([0.01274623, 0.00464359, 0.01991151, 0.00624457], dtype=float32),
  2: array([0.00473373, 0.00158052, 0.00591017, 0.00205374], dtype=float32),
  3: array([0.00776119, 0.0033281 , 0.01048951, 0.00459934], dtype=float32),
  4: array([0.01053864, 0.00325746, 0.01475596, 0.00511294], dtype=float32),
  5: array([0.02224694, 0.00869069, 0.03080308, 0.01323797], dtype=float32)})

# nb

In [22]:
from nbdev.export import *
notebook2script()
notebook2script('Val.ipynb')
notebook2script('0EDA.ipynb')

Converted Val.ipynb.
Converted 0EDA.ipynb.
