## Application of count models on Movielens

In [1]:
#%load_ext watermark
%load_ext autoreload 
%autoreload 2

In [63]:
import os
import sys
from collections import deque
import numpy as np
import pandas as pd
from subprocess import call
from pybpr import *
import pybpr
import matplotlib.pyplot as plt
from functools import partial
from dataclasses import dataclass
from pprint import pprint
from typing import List, Tuple, Union, Optional, Dict

In [3]:
print(os.getcwd())


/home/ctripp/zazzle/pybpr/examples


In [4]:
# %%time
# df = load_movielens_data('ml-1m')
df = pybpr.load_movielens_data('ml-100k')
df.sort_values(['timestamp', 'user_id'], inplace=True)
df.reset_index(inplace=True)
df['positive'] = (df['rating'] >= 4).astype(np.int8)
df.head()


Unnamed: 0,index,user_id,item_id,rating,timestamp,positive
0,214,259,255,4,874724710,1
1,83965,259,286,4,874724727,1
2,43027,259,298,4,874724754,1
3,21396,259,185,4,874724781,1
4,82655,259,173,4,874724843,1


In [5]:
df[df['positive'] == 0].head()


Unnamed: 0,index,user_id,item_id,rating,timestamp,positive
7,26399,259,288,3,874724905,0
11,4029,259,405,3,874725120,0
12,76434,259,1074,3,874725264,0
16,97222,851,687,2,874728168,0
17,69246,851,696,3,874728338,0


In [6]:
# %%time
test_proportion = 0.2

total = len(df)
slice_index = int(test_proportion * total)
print(f'{total} total entries. {test_proportion*100}% = {slice_index}')

# train_df = df.iloc[:slice_index].sort_values(['user_id', 'timestamp']).reset_index()
# test_df = df.iloc[slice_index:].sort_values(['user_id', 'timestamp']).reset_index()
train_df = df.iloc[:slice_index]
test_df = df.iloc[slice_index:]
test_timestamp = test_df['timestamp'].iloc[0]
print(f'test_timestamp: {test_timestamp}')


100000 total entries. 20.0% = 20000
test_timestamp: 878963305


In [7]:
test_df[test_df['positive'] == 1].head()


Unnamed: 0,index,user_id,item_id,rating,timestamp,positive
20022,96941,181,280,4,878963381,1
20023,24271,181,974,4,878963417,1
20042,11930,267,475,5,878970368,1
20043,9176,267,250,5,878970399,1
20044,45928,267,100,5,878970427,1


In [8]:
from count_model.link_counter import LinkCounter
from count_model.uniform_prior_model import UniformPriorModel
from count_model.window_counter import WindowCounter
from count_model.permutation_counter import PermutationCounter
from count_model.link_count_data import LinkCountData

positive_counter = PermutationCounter(LinkCounter())
negative_counter = PermutationCounter(LinkCounter())
both_counter = PermutationCounter(LinkCounter())
dest_counter = LinkCounter()
# counter = WindowCounter(LinkCounter(), 10)


In [9]:
def make_action_tuple(row, positive=None):
    return int(row['item_id']), (
        bool(row['positive'] == 1) if positive is None else positive
    )

def make_item_sequence(df):
    return (row['item_id'] for row_index, row in df.iterrows())


def make_action_sequence(df):
    return (make_action_tuple(row, None) for row_index, row in df.iterrows())


In [10]:
user_groups = df.groupby('user_id')
for user_id, group in user_groups:
    user_df = user_groups.get_group(user_id)
    # positive_interactions = user_df[user_df['positive'] >= 1]
    for idx, row in user_df.iterrows():
        dest_counter.observe_link(make_action_tuple(row, False), make_action_tuple(row))
    positive_counter.observe_sequence(
        make_action_sequence(user_df[user_df['positive'] == 1])
    )
    negative_counter.observe_sequence(
        make_action_sequence(user_df[user_df['positive'] == 0])
    )
    both_counter.observe_sequence(make_action_sequence(user_df))


In [11]:
# probability_model = UniformPriorModel(counter.link_counter, 1.1, 1e5)


In [193]:
def make_pos_neg_action(action):
    return (action[0], False), (action[0], True)


def make_opposite_action(action):
    return (action[0], not action[1])


def compute_with_links(
    assesment_func,
    dest_counter,
    link_counter,
    sources,
    dest,
):
    negative_dest, positive_dest = make_pos_neg_action(dest)

    dest_count = dest_counter.get_link_count(negative_dest, positive_dest)

    source_counts = []
    for source in sources:
        opp_source = make_opposite_action(source)

        source_counts.append(
            (
                source,
                link_counter.get_link_count(source, positive_dest),
                link_counter.get_link_count(source, negative_dest),
                link_counter.get_link_count(opp_source, positive_dest),
                link_counter.get_link_count(opp_source, negative_dest),
            )
        )

    p = assesment_func(positive_dest, LinkCountData(dest_count.count, dest_count.total), source_counts)
    if not dest[1]:
        p = 1.0 - p
    return np.log(p)


def make_assessment_function(prob_func, dest_counter, link_counter):
    return lambda sources, dest: compute_with_links(
        prob_func,
        dest_counter,
        link_counter,
        sources,
        dest,
    )


In [221]:
def compute_naive_bayes_posterior(
    prior_numerator,
    prior_denominator,
    pos_feature_prior_numerator,
    pos_feature_prior_denominator,
    neg_feature_prior_numerator,
    neg_feature_prior_denominator,
):
    def compute(
        dest,
        dest_count,
        source_counts,
    ):
        dest_prior = (prior_numerator + dest_count.count) / (
            prior_denominator + dest_count.total
        )
        pos_acc = np.log(dest_prior)
        neg_acc = np.log(1.0 - dest_prior)
        for source_count in source_counts:
            (
                source,
                source_to_pos_dest,
                source_to_neg_dest,
                neg_source_to_pos_dest,
                neg_source_to_neg_dest,
            ) = source_count
            # P(source | dest) -> # source to dest / (# total both source to dest)
            #  link_count(source, dest) / get_source_data(dest).total
            cond_prob = (pos_feature_prior_numerator + source_to_pos_dest.count) / (
                pos_feature_prior_denominator
                + source_to_pos_dest.count
                + neg_source_to_pos_dest.count
            )
            pos_acc += np.log(cond_prob)

            # source to -dest / total +/- source to -dest
            neg_cond_prob = (neg_feature_prior_numerator + source_to_neg_dest.count) / (
                neg_feature_prior_denominator
                + source_to_neg_dest.count
                + neg_source_to_neg_dest.count
            )
            neg_acc += np.log(neg_cond_prob)
            # evidence = (feature_prior_numerator source_count.source_count.count / source_count.source_count.total)
            # ep += dest_prior * cond_prob
            # s = source_count.source_count
            # ep += np.log((s.count + feature_prior_numerator) / (feature_prior_denominator + s.total))
        pos = np.exp(pos_acc)
        neg = np.exp(neg_acc)
        p = pos / (pos + neg)
        # print(f'{p} {pos} {neg}')
        return max(1e-100, min(1.0, p))

    return compute
    


In [195]:
def compute_nb_class_isolating_posterior(category, *args, **kwargs):
    nb_compute = compute_naive_bayes_posterior(*args, **kwargs)

    def compute(
        dest,
        dest_count,
        source_counts,
    ):
        return nb_compute(dest, dest_count, [t for t in source_counts if t[0][1] == category])
    return compute


In [196]:
def compute_max_posterior(
    prior_numerator,
    prior_denominator,
    feature_prior_numerator,
    feature_prior_denomenator,
):
    def compute(
        dest,
        dest_count,
        source_counts,
    ):
        p = (prior_numerator + dest_count.count) / (
            prior_denominator + dest_count.total
        )
        for (
            source,
            source_to_pos_dest,
            source_to_neg_dest,
            neg_source_to_pos_dest,
            neg_source_to_neg_dest,
        ) in source_counts:
            fp = (feature_prior_numerator + source_to_pos_dest.count) / (
                feature_prior_denomenator
                + source_to_pos_dest.count
                + source_to_neg_dest.count
            )
            p = max(p, fp)
        return p

    return compute


In [197]:
def compute_avg_posterior(
    prior_numerator,
    prior_denominator,
    feature_prior_numerator,
    feature_prior_denomenator,
):
    def compute(
        dest,
        dest_count,
        source_counts,
    ):
        p = (prior_numerator + dest_count.count) / (
            prior_denominator + dest_count.total
        )
        n = 1
        for (
            source,
            source_to_pos_dest,
            source_to_neg_dest,
            neg_source_to_pos_dest,
            neg_source_to_neg_dest,
        ) in source_counts:
            fp = (feature_prior_numerator + source_to_pos_dest.count) / (
                feature_prior_denomenator
                + source_to_pos_dest.count
                + source_to_neg_dest.count
            )
            p += fp
            n += 1
        return p / n

    return compute


In [198]:
def compute_combined_posterior(
    prior_numerator,
    prior_denominator,
):
    def compute(
        dest,
        dest_count,
        source_counts,
    ):
        numerator = prior_numerator + dest_count.count
        denominator = prior_denominator + dest_count.total
        for (
            source,
            source_to_pos_dest,
            source_to_neg_dest,
            neg_source_to_pos_dest,
            neg_source_to_neg_dest,
        ) in source_counts:
            numerator += source_to_pos_dest.count
            denominator += source_to_pos_dest.count + source_to_neg_dest.count
        return numerator / denominator

    return compute


In [199]:
def compute_bayes_posterior(
    prior_numerator,
    prior_denominator,
):
    def compute(
        dest,
        dest_count,
        source_counts,
    ):
        numerator = prior_numerator + dest_count.count
        denominator = prior_denominator + dest_count.total
        return numerator / denominator

    return compute


In [200]:

@dataclass
class Evaluation():
    name:str
    score:float
    positives:float
    negatives:float

@dataclass
class ScoreSummary():
    dynamic:List[Evaluation]
    dynamic_ndcg:float
    # static:List[Evaluation]
    # static_ndcg:float

In [201]:
def compute_evaluations(
    train_df,
    test_df,
    scoring_function,
    evaluation_functions,
):
    action_indicies = []
    num_conditioning_actions_list = []
    evaluation_data = [(*t, []) for t in evaluation_functions]

    # action_attrs = ('user_id', 'timestamp', 'positive')

    for user_id, test_actions in test_df.groupby('user_id'):
        train_user_actions = train_df[train_df['user_id'] == user_id]

        get_conditioning_actions = None
        if train_user_actions['timestamp'].max() < test_actions['timestamp'].min():
            # static mode
            conditioning_actions_ = list(make_action_sequence(train_user_actions))
            get_conditioning_actions = lambda action_row : conditioning_actions_
        else:
            # dynamic mode
            get_conditioning_actions = lambda action_row : list(make_action_sequence(
                    train_user_actions[train_user_actions['timestamp'] < action_row['timestamp']]))
        
        action_indicies.extend(test_actions.index)
        # print(test_actions.loc[test_actions.index].head())
        
        for idx, action_row in test_actions.iterrows():
            conditioning_actions = get_conditioning_actions(action_row)
            action = make_action_tuple(action_row)
            score = scoring_function(
                    conditioning_actions,
                    action,
                )
            
            num_conditioning_actions_list.append(len(conditioning_actions))
            for name, evaluation_func, evaluations in evaluation_data:
                evaluations.append(evaluation_func(score, conditioning_actions, action))

    res = test_df.loc[action_indicies]
    res['num_conditioning_actions'] = num_conditioning_actions_list
    for name, evaluation_func, evaluations in evaluation_data:
        res[name] = evaluations
    return res


In [202]:
# TODO: temporal vs one-shot evaluation
# TODO: n-gram temporal prediction vs set-based prediction
# TODO: per-rating assesment models

'''
    + test data: all ratings for user (positive = actual)
    + assesment data: assesed conditional pos probability (pos/(pos+neg)) for each test user rating
'''

def compute_dcg(seq):
    # for position, (predicted, actual) in enumerate(seq):
    #     print(f'{position}, {predicted}, {actual} : {position + 2 }, {np.log(position + 2)}, {1.0 / np.log(position + 2)}')
    return sum((
        actual / np.log(position + 2)
        for position, (predicted, actual) in enumerate(seq)
    ))

def compute_ndcg(seq):
    dcg = compute_dcg(sorted(seq, reverse=True))
    idcg = compute_dcg(sorted((
            (actual, actual)
            for predicted, actual in seq),
        reverse=True))
    if idcg <= 1e-6:
        return 1.0
    #   print(f'dcg: {dcg} / idcg: {idcg}  ; {len(seq)}')  
    # print(f'dcg: {dcg} / idcg: {idcg} = ndcg: {dcg / idcg} ; {len(seq)}')
    return dcg / idcg

# def compute_ndcg_binary(predicted, actual):
#     '''
#     + actual is 1 or 0
#     + dcg is sum(1/np.log(pos+1)) for all true positives
#     + idcg is sum(1/np.log(pos+1)) for pos = 1 .. # positive ratings
#     '''
#     dcg = compute_dcg(
#         sorted((
#             predicted 
#             for predicted, actual in seq),
#         reverse=True))
#     idcg = compute_dcg(
#         sorted((
#             actual 
#             for predicted, actual in seq),
#         reverse=True))
#     return dcg / idcg

def compute_ndcg_for_counter(
    train_df,
    test_df,
    scoring_function,
):
    ndcgs = []
    for user_id, test_actions in test_df.groupby('user_id'):
        conditioning_actions = list(make_action_sequence(train_df[train_df['user_id'] == user_id]))
        user_action_seq = [
            (
                scoring_function(
                    conditioning_actions,
                    make_action_tuple(action_row),
                ),
                action_row['positive'],
            )
            for idx, action_row in test_actions.iterrows()]
        ndcg = compute_ndcg(user_action_seq)
        ndcgs.append(ndcg)
    return np.mean(ndcgs)

In [203]:
# TODO: temporal vs one-shot evaluation
# TODO: n-gram temporal prediction vs set-based prediction
# TODO: per-rating assesment models

'''
    + test data: all ratings for user (positive = actual)
    + assesment data: assesed conditional pos probability (pos/(pos+neg)) for each test user rating
'''

def compute_dcg(seq):
    # for position, (predicted, actual) in enumerate(seq):
    #     print(f'{position}, {predicted}, {actual} : {position + 2 }, {np.log(position + 2)}, {1.0 / np.log(position + 2)}')
    return sum((
        actual / np.log(position + 2)
        for position, (predicted, actual) in enumerate(seq)
    ))

def compute_ndcg(seq):
    dcg = compute_dcg(sorted(seq, reverse=True))
    idcg = compute_dcg(sorted((
            (actual, actual)
            for predicted, actual in seq),
        reverse=True))
    if idcg <= 1e-6:
        return 1.0
    #   print(f'dcg: {dcg} / idcg: {idcg}  ; {len(seq)}')  
    # print(f'dcg: {dcg} / idcg: {idcg} = ndcg: {dcg / idcg} ; {len(seq)}')
    return dcg / idcg

# def compute_ndcg_binary(predicted, actual):
#     '''
#     + actual is 1 or 0
#     + dcg is sum(1/np.log(pos+1)) for all true positives
#     + idcg is sum(1/np.log(pos+1)) for pos = 1 .. # positive ratings
#     '''

def compute_mean_ndcg(
    train_df,
    test_df,
    scoring_function,
):
    ndcgs = []
    for user_id, test_actions in test_df.groupby('user_id'):
        train_user_actions = train_df[train_df['user_id'] == user_id]
        
        get_conditioning_actions = None
        if train_user_actions['timestamp'].max() < test_actions['timestamp'].min():
            # static mode
            conditioning_actions = list(make_action_sequence(
                train_user_actions[train_user_actions['user_id'] == user_id]
                ))
            get_conditioning_actions = lambda action_row : conditioning_actions
        else:
            # dynamic mode
            get_conditioning_actions = lambda action_row : train_user_actions[train_user_actions['timestamp'] <action_row['timestamp']]
        
        user_action_seq = [
            (
                scoring_function(
                    get_conditioning_actions(action_row),
                    make_action_tuple(action_row),
                ),
                action_row['positive'],
            )
            for idx, action_row in test_actions.iterrows()]
        ndcg = compute_ndcg(user_action_seq)
        ndcgs.append(ndcg)
    return np.mean(ndcgs)

# TODO: compute and graph distribution based on # of previous ratings and scores (incl iqr/var at each point)

In [204]:
def log_score(log_prob, *args, **kwargs):
    return log_prob

def brier_score(log_prob, *args, **kwargs):
    p = np.exp(log_prob)
    return (1 - p) ** 2 + (0 - (1-p)) ** 2

def prob_score(log_prob, *args, **kwargs):
    return np.exp(log_prob)

    
def compute_scores(
        df,
        train_df,
        test_df, 
        assessment_function,
    ):
    scores = (
        ('log', log_score), 
        ('brier', brier_score),
        ('prob', prob_score),
    )
    
    dynamic_evaluations = compute_evaluations(df, test_df, assessment_function, scores)
    # static_evaluations = compute_evaluations(train_df, test_df, assessment_function, scores)

    def summarize_evaluations(evals):
        result = []
        for name, _ in scores:
            e = evals[name]
            result.append(Evaluation(
                name,
                e.mean(),
                e[evals['positive'] == 1].mean(),
                e[evals['positive'] == 0].mean(),
            ))
        return result

    return ScoreSummary(
        summarize_evaluations(dynamic_evaluations),
        compute_mean_ndcg(df, test_df, assessment_function),
        # summarize_evaluations(static_evaluations),
        # compute_mean_ndcg(train_df, test_df, assessment_function),
    )

In [205]:
test_df_sample = test_df.sample(2000)
print(len(test_df_sample))
pos_df = df[df['positive'] == 1]
pos_train_df = train_df[train_df['positive'] == 1]
pos_test_df = test_df_sample[test_df_sample['positive'] == 1]

neg_df = df[df['positive'] == 0]
neg_train_df = train_df[train_df['positive'] == 0]
neg_test_df = test_df_sample[test_df_sample['positive'] == 0]


2000


In [206]:
pos_bayes = len(train_df[train_df['positive'] == 1]) / len(train_df)
print(pos_bayes)


0.55975


In [207]:
# How to compute NDCG for binary events?
# What performance measures should we use here?
# How is NDCG computed for ALS?
# Next counting model types to investigate

In [222]:
nb_scores = compute_scores(
    df,
    train_df,
    test_df_sample,
    make_assessment_function(compute_naive_bayes_posterior(
        pos_bayes * 100,
        100,
        1e-9,
        2e-9,
        1e-9,
        2e-9,
    ), dest_counter, both_counter.link_counter))

pprint(nb_scores)


ScoreSummary(dynamic=[Evaluation(name='log',
                                 score=-0.24338759838679827,
                                 positives=-0.1984257821565767,
                                 negatives=-0.2951178600710317),
                      Evaluation(name='brier',
                                 score=0.12917122790405977,
                                 positives=0.1089918761628339,
                                 negatives=0.1523883315203089),
                      Evaluation(name='prob',
                                 score=0.8947647869047793,
                                 positives=0.9034055548549438,
                                 negatives=0.8848232581879238)],
             dynamic_ndcg=0.9712154116395633)


In [209]:
max_scores = compute_scores(
    df,
    train_df,
    test_df_sample,
    make_assessment_function(compute_max_posterior(
        pos_bayes * 10,
        10,
        pos_bayes * 10,
        10,
    ), dest_counter, both_counter.link_counter))
pprint(max_scores)


ScoreSummary(dynamic=[Evaluation(name='log',
                                 score=-0.6591703377753474,
                                 positives=-0.2783549291734265,
                                 negatives=-1.0973127971345469),
                      Evaluation(name='brier',
                                 score=0.46963278090927063,
                                 positives=0.12972288252774072,
                                 negatives=0.8607119113052244),
                      Evaluation(name='prob',
                                 score=0.5724070312824615,
                                 positives=0.7631711426070232,
                                 negatives=0.3529257419090411)],
             dynamic_ndcg=1.0)


In [210]:
avg_scores = compute_scores(
    df,
    train_df,
    test_df_sample,
    make_assessment_function(compute_avg_posterior(
        pos_bayes * 10,
        10,
        pos_bayes * 10,
        10,
    ), dest_counter, both_counter.link_counter))

pprint(avg_scores)


ScoreSummary(dynamic=[Evaluation(name='log',
                                 score=-0.5909821179257125,
                                 positives=-0.4896732320356205,
                                 negatives=-0.7075418038422698),
                      Evaluation(name='brier',
                                 score=0.4025717637969581,
                                 positives=0.30814059407108035,
                                 negatives=0.5112183784278065),
                      Evaluation(name='prob',
                                 score=0.5691641607650249,
                                 positives=0.6231365049450509,
                                 negatives=0.5070669475686508)],
             dynamic_ndcg=1.0)


In [211]:
combined_scores = compute_scores(
    df,
    train_df,
    test_df_sample,
    make_assessment_function(compute_combined_posterior(
        pos_bayes * 10,
        10,
    ), dest_counter, both_counter.link_counter))

pprint(combined_scores)


ScoreSummary(dynamic=[Evaluation(name='log',
                                 score=-0.555030349005581,
                                 positives=-0.4769878326230674,
                                 negatives=-0.6448212011876128),
                      Evaluation(name='brier',
                                 score=0.37577321263178987,
                                 positives=0.303848529895264,
                                 negatives=0.45852526696306156),
                      Evaluation(name='prob',
                                 score=0.6088655217946385,
                                 positives=0.6444566186673256,
                                 negatives=0.5679166253927295)],
             dynamic_ndcg=0.9363226332549881)


In [212]:
bayes_scores = compute_scores(
    df,
    train_df,
    test_df_sample,
    make_assessment_function(compute_bayes_posterior(
        pos_bayes * 10,
        10,
    ), dest_counter, both_counter.link_counter))

pprint(bayes_scores)


ScoreSummary(dynamic=[Evaluation(name='log',
                                 score=-0.6113379964344878,
                                 positives=-0.5092882318675327,
                                 negatives=-0.7287500911513074),
                      Evaluation(name='brier',
                                 score=0.4222265724693931,
                                 positives=0.3303754964794713,
                                 negatives=0.5279046921567225),
                      Evaluation(name='prob',
                                 score=0.5715370035677586,
                                 positives=0.6221446820849407,
                                 negatives=0.5133109648436887)],
             dynamic_ndcg=0.9363226332549881)


In [223]:
inb_scores = compute_scores(
    df,
    train_df,
    test_df_sample,
    make_assessment_function(compute_naive_bayes_posterior(
        pos_bayes * 100,
        100,
        1e-9,
        2e-9,
        1e3,
        2e3,
    ), dest_counter, both_counter.link_counter))

pprint(nb_scores)
pprint(inb_scores)



ScoreSummary(dynamic=[Evaluation(name='log',
                                 score=-0.24338759838679827,
                                 positives=-0.1984257821565767,
                                 negatives=-0.2951178600710317),
                      Evaluation(name='brier',
                                 score=0.12917122790405977,
                                 positives=0.1089918761628339,
                                 negatives=0.1523883315203089),
                      Evaluation(name='prob',
                                 score=0.8947647869047793,
                                 positives=0.9034055548549438,
                                 negatives=0.8848232581879238)],
             dynamic_ndcg=0.9712154116395633)
ScoreSummary(dynamic=[Evaluation(name='log',
                                 score=-0.805735447109555,
                                 positives=-0.3543507496122633,
                                 negatives=-1.3250705291763316),
                   

In [227]:
inb_scores = compute_scores(
    df,
    train_df,
    test_df_sample,
    make_assessment_function(compute_naive_bayes_posterior(
        pos_bayes * 10,
        10,
        1e-3,
        2e-3,
        1e-3,
        2e-3,
    ), dest_counter, both_counter.link_counter))

pprint(nb_scores)
pprint(inb_scores)



ScoreSummary(dynamic=[Evaluation(name='log',
                                 score=-0.24338759838679827,
                                 positives=-0.1984257821565767,
                                 negatives=-0.2951178600710317),
                      Evaluation(name='brier',
                                 score=0.12917122790405977,
                                 positives=0.1089918761628339,
                                 negatives=0.1523883315203089),
                      Evaluation(name='prob',
                                 score=0.8947647869047793,
                                 positives=0.9034055548549438,
                                 negatives=0.8848232581879238)],
             dynamic_ndcg=0.9712154116395633)
ScoreSummary(dynamic=[Evaluation(name='log',
                                 score=-0.26950921099348035,
                                 positives=-0.22768041869526953,
                                 negatives=-0.31763481073443245),
               

In [229]:
inb_scores = compute_scores(
    df,
    train_df,
    test_df_sample,
    make_assessment_function(compute_naive_bayes_posterior(
        pos_bayes * 10,
        10,
        1e-6,
        2e-6,
        1e-6,
        2e-6,
    ), dest_counter, both_counter.link_counter))

pprint(inb_scores)



ScoreSummary(dynamic=[Evaluation(name='log',
                                 score=-0.24225435328575085,
                                 positives=-0.1946272168370058,
                                 negatives=-0.29705116618914573),
                      Evaluation(name='brier',
                                 score=0.12841100228749622,
                                 positives=0.10617075846032648,
                                 negatives=0.1539992398090786),
                      Evaluation(name='prob',
                                 score=0.8955275063847118,
                                 positives=0.905785754929305,
                                 negatives=0.8837250053710403)],
             dynamic_ndcg=0.9363226332549881)


In [230]:
inb_scores = compute_scores(
    df,
    train_df,
    test_df_sample,
    make_assessment_function(compute_naive_bayes_posterior(
        pos_bayes * 1,
        1,
        1e-6,
        2e-6,
        1e-6,
        2e-6,
    ), dest_counter, both_counter.link_counter))

pprint(inb_scores)



ScoreSummary(dynamic=[Evaluation(name='log',
                                 score=-0.24195550952420364,
                                 positives=-0.1943116140945185,
                                 negatives=-0.29677160426588445),
                      Evaluation(name='brier',
                                 score=0.12828181032430439,
                                 positives=0.10596924597429798,
                                 negatives=0.1539532553291505),
                      Evaluation(name='prob',
                                 score=0.8957238102028348,
                                 positives=0.906090965902175,
                                 negatives=0.88379600740897)],
             dynamic_ndcg=0.9265146724569904)


In [233]:
inb_scores = compute_scores(
    df,
    train_df,
    test_df_sample,
    make_assessment_function(compute_naive_bayes_posterior(
        pos_bayes * 1,
        1,
        1e-9,
        2e-9,
        1e-9,
        2e-9,
    ), dest_counter, both_counter.link_counter))

pprint(inb_scores)



ScoreSummary(dynamic=[Evaluation(name='log',
                                 score=-0.24109967656291503,
                                 positives=-0.19360098004195295,
                                 negatives=-0.29574871449563483),
                      Evaluation(name='brier',
                                 score=0.12778286470473474,
                                 positives=0.10550728730712806,
                                 negatives=0.15341175482886288),
                      Evaluation(name='prob',
                                 score=0.896401550631522,
                                 positives=0.9066257016131323,
                                 negatives=0.8846382801473036)],
             dynamic_ndcg=0.9265146724569904)
