In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation, NMF, TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from itertools import combinations
from scipy import sparse
from scipy.sparse.linalg import svds
import implicit

import random
my_seed = 0
random.seed(my_seed)
np.random.seed(my_seed)

import matplotlib.pyplot as plt
import json

In [2]:
import math
# ground_truth: list of items ordered by time
def nDCG_Time(ground_truth, _recList):
    rec_num = len(_recList) # topK
    # ground_truth is already sorted by time
    idealOrder = ground_truth
    idealDCG = 0.0
    for j in range(min(rec_num, len(idealOrder))):
        idealDCG += ((math.pow(2.0, len(idealOrder) - j) - 1) / math.log(2.0 + j))

    recDCG = 0.0
    for j in range(rec_num):
        item = _recList[j]
        if item in ground_truth:
            rank = len(ground_truth) - ground_truth.index(item) # why ground truth?
            recDCG += ((math.pow(2.0, rank) - 1) / math.log(1.0 + j + 1))

    return (recDCG / idealDCG)


def Recall(_test_set, _recList):
    hit = len(set(_recList).intersection(set(_test_set)))
    # return hit / float(len(_test_set))
    return hit / min(float(len(_test_set)), float(len(_recList)))

def Precision(_test_set, _recList):
    hit = len(set(_recList).intersection(set(_test_set)))
    return hit / float(len(_recList))

In [4]:
# for local
#listening_df = pd.read_csv('data/lastfm/listening_events.tsv', header=1, sep='\t',
#                      names=['user_id', 'track_id', 'album_id', 'timestamp'])
#user_df = pd.read_csv('data/lastfm/users.tsv', header=1, sep='\t',
#                      names=['user_id', 'country', 'age', 'gender', 'creation_time'])
listening_df = pd.read_csv('./data/lastfm_2020/listening_events_2020.tsv', header=1, sep='\t',
                           names=['user_id', 'track_id', 'album_id', 'timestamp'])
user_df = pd.read_csv('./data/lastfm_2020/users_2020.tsv', header=1, sep='\t',
                     names=['user_id', 'country', 'age', 'gender', 'creation_time'])

In [9]:
# for server
listening_df = pd.read_csv('/data/sli21/lastfm/listening_events.tsv', header=1, sep='\t',
                      names=['user_id', 'track_id', 'album_id', 'timestamp'])
user_df = pd.read_csv('/data/sli21/lastfm/users.tsv', header=1, sep='\t',
                      names=['user_id', 'country', 'age', 'gender', 'creation_time'])

In [5]:
listening_users = listening_df['user_id'].unique()
filed_users = user_df['user_id'].unique()

for id in listening_users:
    if id not in filed_users:
        print(id)

2


In [6]:
# user with id 2 is not in the `user_df`, so we delete their record from `listening_df` as well.
listening_df = listening_df[listening_df['user_id'] != 2]

In [7]:
user_df['gender'].unique()

array(['n', 'm', 'f', nan], dtype=object)

In [8]:
f_users = user_df[user_df['gender'] == 'f']
m_users = user_df[user_df['gender'] == 'm']
print('Number of female users: {}\nNumber of male users: {}'.format(f_users.shape[0], m_users.shape[0]))

Number of female users: 2372
Number of male users: 9950


### collaborative filtering

In [9]:
def df_to_mat(df, user_n, item_n, user_id_to_iid, item_id_to_iid):
    """
    Convert DataFrame to sparse matrix.

    Arg:
        df: DataFrame, ratings dataframe with user_id, movie_id and rating

    Return:
        mat: scipy.sparse.csr_matrix, sparse ratings matrix with rows being users and cols being items
    """
    
    mat = sparse.lil_matrix((user_n, item_n))
    for _, row in df.iterrows():
        user_id = int(row[0])
        item_id = int(row[1])
        user_iid = user_id_to_iid[user_id]
        item_iid = item_id_to_iid[item_id]
        mat[user_iid, item_iid] = 1
    
    return mat

In [10]:
def cf_recommend(user_id, topk, user_id_to_iid, item_iid_to_id, train_mat, est_mat):
    
    user_iid = user_id_to_iid[user_id]
    user_interactions = train_mat[user_iid, :]
    interacted_before = np.nonzero(user_interactions)[1]
    estimations = est_mat[user_iid, :].copy()
    estimations[interacted_before] = 0

    top_item_iids = np.argsort(-estimations)[:topk]
    top_item_ids = [item_iid_to_id[i] for i in top_item_iids]

    return (user_id, np.array(top_item_ids))

In [16]:
def cf_experiment(n_epochs, listening_df, user_df):
    
    all_f_cf_r = []
    all_m_cf_r = []

    for _ in range(n_epochs):
        # small_listening_df = listening_df.sample(frac=0.005, ignore_index=True) #1/200 of dataset
        small_listening_df = listening_df.sample(frac=0.005)  #ignore_index removed for pandas version < 1.3.0
        # small_listening_df = listening_df #1/1 of dataset

        user_n = small_listening_df['user_id'].nunique()
        item_n = small_listening_df['track_id'].nunique()

        user_ids = small_listening_df['user_id'].unique()
        item_ids = small_listening_df['track_id'].unique()

        user_id_to_iid = {user_ids[i]:i for i in range(len(user_ids))}
        user_iid_to_id = {i:user_ids[i] for i in range(len(user_ids))}

        item_id_to_iid = {item_ids[i]:i for i in range(len(item_ids))}
        item_iid_to_id = {i:item_ids[i] for i in range(len(item_ids))}

        gender_df = pd.merge(user_df, small_listening_df, on='user_id')[['user_id', 'gender']]
        f_user_ids = gender_df[gender_df['gender'] == 'f']['user_id'].unique()
        m_user_ids = gender_df[gender_df['gender'] == 'm']['user_id'].unique()

        train_df, test_df = train_test_split(small_listening_df, test_size=0.2)

        train_mat = df_to_mat(train_df, user_n, item_n, user_id_to_iid, item_id_to_iid)
        train_mat = train_mat.tocsr()

        # mf = NMF(n_components=10, init='random', random_state=0, max_iter=500, verbose=False)
        # mf = TruncatedSVD(n_components=10, algorithm='arpack', tol=0.0)
        # user_f = mf.fit_transform(train_mat)
        # item_f = mf.components_.T
        # est_mat = np.dot(user_f, item_f.T)
        """
        user_svd = TruncatedSVD(n_components=10, algorithm='arpack', tol=0.0)
        user_f = user_svd.fit_transform(train_mat)
        item_svd = TruncatedSVD(n_components=10, algorithm='arpack', tol=0.0)
        item_f = item_svd.fit_transform(train_mat.transpose())
        est_mat = np.dot(user_f, item_f.T)
        """
        mf = implicit.als.AlternatingLeastSquares(factors=10, regularization=0.05, alpha=2.0)
        mf.fit(train_mat)
        user_f = mf.user_factors
        item_f = mf.item_factors
        est_mat = np.dot(user_f, item_f.T)

        test_mat = df_to_mat(test_df, user_n, item_n, user_id_to_iid, item_id_to_iid)
        test_mat = test_mat.tocsr()
        
        f_cf_r = []
        for user_id in f_user_ids:
            user_iid = user_id_to_iid[user_id]
            test_item_iids = list(np.argwhere(test_mat[user_iid] > 0)[:, 1])
            test_item_ids = list(map(lambda x: item_iid_to_id[x], test_item_iids))

            if len(test_item_ids) > 0:
                top_item_ids = list(cf_recommend(user_id, 10, user_id_to_iid, item_iid_to_id, train_mat, est_mat)[1])

                recall = Recall(test_item_ids, top_item_ids)
                f_cf_r.append(recall)
        
        all_f_cf_r.append(np.average(f_cf_r))

        m_cf_r = []
        for user_id in m_user_ids:
            user_iid = user_id_to_iid[user_id]
            test_item_iids = list(np.argwhere(test_mat[user_iid] > 0)[:, 1])
            test_item_ids = list(map(lambda x: item_iid_to_id[x], test_item_iids))

            if len(test_item_ids) > 0:
                top_item_ids = list(cf_recommend(user_id, 10, user_id_to_iid, item_iid_to_id, train_mat, est_mat)[1])

                recall = Recall(test_item_ids, top_item_ids)
                m_cf_r.append(recall)
        
        all_m_cf_r.append(np.average(m_cf_r))
        print(all_f_cf_r, all_m_cf_r)
    
    return (all_f_cf_r, all_m_cf_r)

In [17]:
cf_results = cf_experiment(10, listening_df, user_df)



  0%|          | 0/15 [00:00<?, ?it/s]

[0.001954661114966458] [0.0008094667480413025]


  0%|          | 0/15 [00:00<?, ?it/s]

[0.001954661114966458, 0.0006958942240779402] [0.0008094667480413025, 0.0008147408147408146]


  0%|          | 0/15 [00:00<?, ?it/s]

[0.001954661114966458, 0.0006958942240779402, 0.0016294227188081935] [0.0008094667480413025, 0.0008147408147408146, 0.0019797198533894507]


  0%|          | 0/15 [00:00<?, ?it/s]

[0.001954661114966458, 0.0006958942240779402, 0.0016294227188081935, 0.0017997685185185187] [0.0008094667480413025, 0.0008147408147408146, 0.0019797198533894507, 0.0010060249559629595]


  0%|          | 0/15 [00:00<?, ?it/s]

[0.001954661114966458, 0.0006958942240779402, 0.0016294227188081935, 0.0017997685185185187, 0.0006514842510763652] [0.0008094667480413025, 0.0008147408147408146, 0.0019797198533894507, 0.0010060249559629595, 0.0009034057443786162]


  0%|          | 0/15 [00:00<?, ?it/s]

[0.001954661114966458, 0.0006958942240779402, 0.0016294227188081935, 0.0017997685185185187, 0.0006514842510763652, 0.0005771006463527238] [0.0008094667480413025, 0.0008147408147408146, 0.0019797198533894507, 0.0010060249559629595, 0.0009034057443786162, 0.00018838304552590268]


  0%|          | 0/15 [00:00<?, ?it/s]

[0.001954661114966458, 0.0006958942240779402, 0.0016294227188081935, 0.0017997685185185187, 0.0006514842510763652, 0.0005771006463527238, 0.0009449562957713206] [0.0008094667480413025, 0.0008147408147408146, 0.0019797198533894507, 0.0010060249559629595, 0.0009034057443786162, 0.00018838304552590268, 0.001001999555654299]


  0%|          | 0/15 [00:00<?, ?it/s]

[0.001954661114966458, 0.0006958942240779402, 0.0016294227188081935, 0.0017997685185185187, 0.0006514842510763652, 0.0005771006463527238, 0.0009449562957713206, 0.0014229064613949147] [0.0008094667480413025, 0.0008147408147408146, 0.0019797198533894507, 0.0010060249559629595, 0.0009034057443786162, 0.00018838304552590268, 0.001001999555654299, 0.0017776556776556774]


  0%|          | 0/15 [00:00<?, ?it/s]

[0.001954661114966458, 0.0006958942240779402, 0.0016294227188081935, 0.0017997685185185187, 0.0006514842510763652, 0.0005771006463527238, 0.0009449562957713206, 0.0014229064613949147, 0.0016315161161957817] [0.0008094667480413025, 0.0008147408147408146, 0.0019797198533894507, 0.0010060249559629595, 0.0009034057443786162, 0.00018838304552590268, 0.001001999555654299, 0.0017776556776556774, 0.0015249316811570242]


  0%|          | 0/15 [00:00<?, ?it/s]

[0.001954661114966458, 0.0006958942240779402, 0.0016294227188081935, 0.0017997685185185187, 0.0006514842510763652, 0.0005771006463527238, 0.0009449562957713206, 0.0014229064613949147, 0.0016315161161957817, 0.0009330534173081408] [0.0008094667480413025, 0.0008147408147408146, 0.0019797198533894507, 0.0010060249559629595, 0.0009034057443786162, 0.00018838304552590268, 0.001001999555654299, 0.0017776556776556774, 0.0015249316811570242, 0.0005960378983634797]


In [None]:
all_f_cf_r = np.array(cf_results[0])
all_m_cf_r = np.array(cf_results[1])
cf_fairness_scores = np.abs(all_f_cf_r - all_m_cf_r)

In [None]:
def cf_experiment_implicit(n_epochs, listening_df, user_df):
    
    all_f_cf_r = []
    all_m_cf_r = []

    for _ in range(n_epochs):
        # small_listening_df = listening_df.sample(frac=0.005, ignore_index=True) #1/200 of dataset
        small_listening_df = listening_df.sample(frac=0.1)
        # small_listening_df = listenting_df

        user_n = small_listening_df['user_id'].nunique()
        item_n = small_listening_df['track_id'].nunique()

        user_ids = small_listening_df['user_id'].unique()
        item_ids = small_listening_df['track_id'].unique()

        user_id_to_iid = {user_ids[i]:i for i in range(len(user_ids))}
        user_iid_to_id = {i:user_ids[i] for i in range(len(user_ids))}

        item_id_to_iid = {item_ids[i]:i for i in range(len(item_ids))}
        item_iid_to_id = {i:item_ids[i] for i in range(len(item_ids))}

        gender_df = pd.merge(user_df, small_listening_df, on='user_id')[['user_id', 'gender']]
        f_user_ids = gender_df[gender_df['gender'] == 'f']['user_id'].unique()
        m_user_ids = gender_df[gender_df['gender'] == 'm']['user_id'].unique()

        train_df, test_df = train_test_split(small_listening_df, test_size=0.2)

        train_mat = df_to_mat(train_df, user_n, item_n, user_id_to_iid, item_id_to_iid)
        train_mat = train_mat.tocsr()

        mf = implicit.als.AlternatingLeastSquares(factors=100, regularization=0.00, alpha=5.0) #
        mf.fit(train_mat)
        user_f = mf.user_factors
        item_f = mf.item_factors
        est_mat = np.dot(user_f, item_f.T)

        test_mat = df_to_mat(test_df, user_n, item_n, user_id_to_iid, item_id_to_iid)
        test_mat = test_mat.tocsr()
        
        f_cf_r = []
        for user_id in f_user_ids:
            user_iid = user_id_to_iid[user_id]
            test_item_iids = list(np.argwhere(test_mat[user_iid] > 0)[:, 1])
            test_item_ids = list(map(lambda x: item_iid_to_id[x], test_item_iids))

            if len(test_item_ids) > 0:
                top_item_iids = list(mf.recommend(user_iid, train_mat[user_iid], N=10, filter_already_liked_items=True)[0])
                top_item_ids = [item_iid_to_id[i] for i in top_item_iids]

                recall = Recall(test_item_ids, top_item_ids)
                f_cf_r.append(recall)
        
        all_f_cf_r.append(np.average(f_cf_r))

        m_cf_r = []
        for user_id in m_user_ids:
            user_iid = user_id_to_iid[user_id]
            test_item_iids = list(np.argwhere(test_mat[user_iid] > 0)[:, 1])
            test_item_ids = list(map(lambda x: item_iid_to_id[x], test_item_iids))

            if len(test_item_ids) > 0:
                top_item_iids = list(mf.recommend(user_iid, train_mat[user_iid], N=10, filter_already_liked_items=True)[0])
                top_item_ids = [item_iid_to_id[i] for i in top_item_iids]
                
                recall = Recall(test_item_ids, top_item_ids)
                m_cf_r.append(recall)
        
        all_m_cf_r.append(np.average(m_cf_r))
        # print(np.average(f_cf_r), np.average(m_cf_r))
    
    return (all_f_cf_r, all_m_cf_r)

In [None]:
cf_result_implicit = cf_experiment_implicit(1, listening_df, user_df)

In [None]:
print(f'female recall: {np.average(cf_result_implicit[0])}, std: {np.std(cf_result_implicit[0])}; male recall: {np.average(cf_result_implicit[1])}, std: {np.std(cf_result_implicit[1])}')

### content-based filtering

In [None]:
track_json_lst = []
with open('data/lastfm/tags.json', 'r', encoding='utf-8') as f:
    for obj in f:
        track_dict = json.loads(obj)
        track_json_lst.append(track_dict)

In [None]:
track_tags_lst = []
for obj in track_json_lst:
    track_id = obj['i']
    tags = list(obj['tags'].keys())[:10]
    track_tags_lst.append([track_id, tags])

In [None]:
tag_df = pd.DataFrame(track_tags_lst, columns=['track_id', 'tags'])

In [None]:
def cb_recommend(user_id, topk, knn, item_user_mat, X, user_id_to_iid, item_iid_to_id, item_n):
    user_iid = user_id_to_iid[user_id]
    user_ratings = item_user_mat[:, user_iid]
    rated_before = np.nonzero(user_ratings)[0]
    sorted_rated_before = rated_before[
        np.argsort(user_ratings[rated_before].toarray().squeeze())][::-1]
    
    if sorted_rated_before.size > 0:

        raw_recommends = {}
        for item_iid in sorted_rated_before:
            distances, indices = knn.kneighbors(X[item_iid], 
                                                n_neighbors=topk+1)
            sorted_pairs = sorted(list(zip(indices.squeeze().tolist(),
                                           distances.squeeze().tolist())),
                                  key=lambda x: x[1])
            raw_recommends[item_iid] = sorted_pairs 
        
        top_item_ids = []
        pos = 0
        while True:
            for item_iid in sorted_rated_before:
                next_neighbor_iid = raw_recommends[item_iid][pos][0]
                if next_neighbor_iid not in rated_before:
                    top_item_ids.append(item_iid_to_id[next_neighbor_iid])
                if len(top_item_ids) > topk - 1:
                    return (user_id, np.array(top_item_ids))
            
            pos += 1
    else:

        top_item_ids = list(map(lambda x: item_iid_to_id[x], 
                             random.sample(list(range(0, item_n)), topk)))
        return (user_id, np.array(top_item_ids))

In [None]:
def sample_evaluate(test_user_ids, knn, X, user_id_to_iid, item_iid_to_id, train_mat, test_mat, item_n):

    r = []

    for user_id in test_user_ids:
        user_iid = user_id_to_iid[user_id]
        test_item_iids = list(np.argwhere(test_mat[:, user_iid] > 0)[:, 0])
        test_item_ids = list(map(lambda x: item_iid_to_id[x], test_item_iids))

        if len(test_item_ids) > 0:
            top_item_ids = list(cb_recommend(user_id, 10, knn, train_mat, X, user_id_to_iid, item_iid_to_id, item_n)[1])

            recall = Recall(test_item_ids, top_item_ids)

            r.append(recall)
    
    return np.average(r)

In [None]:
def cb_experiment(n_epochs, n_iters, listening_df, user_df, tag_df):

    all_f_cb_r = []
    all_m_cb_r = []
    
    tagged_listening_df = pd.merge(listening_df, tag_df, on='track_id')

    for _ in range(n_epochs):
        small_listening_df = tagged_listening_df.sample(frac=0.02, ignore_index=True)

        user_n = small_listening_df['user_id'].nunique()
        item_n = small_listening_df['track_id'].nunique()

        user_ids = small_listening_df['user_id'].unique()
        item_ids = small_listening_df['track_id'].unique()

        user_id_to_iid = {user_ids[i]:i for i in range(len(user_ids))}
        user_iid_to_id = {i:user_ids[i] for i in range(len(user_ids))}

        item_id_to_iid = {item_ids[i]:i for i in range(len(item_ids))}
        item_iid_to_id = {i:item_ids[i] for i in range(len(item_ids))}

        gender_df = pd.merge(user_df, small_listening_df, on='user_id')[['user_id', 'gender']]
        f_user_ids = gender_df[gender_df['gender'] == 'f']['user_id'].unique()
        m_user_ids = gender_df[gender_df['gender'] == 'm']['user_id'].unique()

        small_tag_df = small_listening_df.drop_duplicates(subset=['track_id'])[['track_id', 'tags']]
        tf = TfidfVectorizer(analyzer = lambda x: (g for g in x))
        X_tfidf = tf.fit_transform(small_tag_df['tags'])

        knn = NearestNeighbors(metric='cosine', algorithm='auto', n_neighbors=10, n_jobs=-1)
        knn.fit(X_tfidf)

        train_df, test_df = train_test_split(small_listening_df, test_size=0.2)

        train_mat = df_to_mat(train_df, user_n, item_n, user_id_to_iid, item_id_to_iid)
        train_mat = train_mat.transpose().tocsr()

        test_mat = df_to_mat(test_df, user_n, item_n, user_id_to_iid, item_id_to_iid)
        test_mat= test_mat.transpose().tocsr()

        
        for _ in range(n_iters):

            test_f_user_ids = np.random.choice(f_user_ids, size=500, replace=False)
            test_m_user_ids = np.random.choice(m_user_ids, size=500, replace=False)

            f_cb_r = sample_evaluate(test_f_user_ids, knn, X_tfidf, user_id_to_iid, item_iid_to_id, train_mat, test_mat, item_n)
            m_cb_r = sample_evaluate(test_m_user_ids, knn, X_tfidf, user_id_to_iid, item_iid_to_id, train_mat, test_mat, item_n)

            all_f_cb_r.append(f_cb_r)
            all_m_cb_r.append(m_cb_r)

    return (np.array(all_f_cb_r), np.array(all_m_cb_r))

In [None]:
cb_experiment(1, 1, listening_df, user_df, tag_df)

In [None]:
cb_result = cb_experiment(10, 1, listening_df, user_df, tag_df)

In [None]:
print(f'female recall: {np.average(cb_result[0])}, std: {np.std(cb_result[0])}; male recall: {np.average(cb_result[1])}, std: {np.std(cb_result[1])}')

In [None]:
cb_result

In [None]:
cb_result = (np.array([0.        , 0.00714286, 0.01612903, 0.02051282, 0.        ,
        0.        , 0.00537634, 0.00151515, 0.01794872, 0.01058201,
        0.05191257, 0.04126984, 0.03703704, 0.0021978 , 0.02631579,
        0.05974843, 0.        , 0.04591837, 0.01538462, 0.03694581,
        0.        , 0.01149425, 0.00423729, 0.01969697, 0.        ,
        0.        , 0.00704225, 0.00431034, 0.02631579, 0.00166667,
        0.00628931, 0.01886792, 0.02380952, 0.01262626, 0.        ,
        0.02738873, 0.02380952, 0.01010101, 0.        , 0.01960784,
        0.0234375 , 0.04022989, 0.02037037, 0.00755858, 0.        ,
        0.        , 0.06060606, 0.03030303, 0.        , 0.        ,
        0.06222222, 0.00431034, 0.02419355, 0.01851852, 0.04661017,
        0.05085784, 0.01478495, 0.02777778, 0.01639344, 0.00438596,
        0.01785714, 0.00757576, 0.        , 0.        , 0.025     ,
        0.00537634, 0.        , 0.03773585, 0.        , 0.00505051,
        0.00819672, 0.01801802, 0.05555556, 0.03439153, 0.0030303 ,
        0.00892857, 0.        , 0.01730769, 0.00606061, 0.02051282,
        0.01397849, 0.        , 0.00701754, 0.01923077, 0.00909091,
        0.00403226, 0.        , 0.00076923, 0.00327869, 0.03225806,
        0.02150538, 0.00833333, 0.02051282, 0.00892857, 0.02698413,
        0.00546448, 0.00512821, 0.01333333, 0.        , 0.03044872]),
 np.array([0.03713607, 0.0172956 , 0.        , 0.00564972, 0.00574713,
        0.02272727, 0.00574713, 0.01694915, 0.00093284, 0.        ,
        0.00925926, 0.        , 0.01449275, 0.01641414, 0.0122549 ,
        0.02651515, 0.04404762, 0.00757576, 0.        , 0.0078125 ,
        0.01944444, 0.02121212, 0.00505051, 0.02298851, 0.03825137,
        0.04098361, 0.02525253, 0.00833333, 0.02277778, 0.02238806,
        0.00128205, 0.02690058, 0.00396825, 0.03278689, 0.00520833,
        0.02254098, 0.04275362, 0.        , 0.00454545, 0.01461988,
        0.02584453, 0.00897436, 0.00373134, 0.00793651, 0.01375661,
        0.03914141, 0.01147541, 0.00818182, 0.01272727, 0.01639344,
        0.00862069, 0.02155172, 0.01449275, 0.01984127, 0.00861079,
        0.02083333, 0.02595628, 0.05208333, 0.00223214, 0.01679713,
        0.02910798, 0.        , 0.00757576, 0.01875   , 0.00995025,
        0.        , 0.01641414, 0.02213542, 0.        , 0.        ,
        0.03825137, 0.00511727, 0.01867816, 0.02176527, 0.03535354,
        0.01102941, 0.00948345, 0.00292398, 0.01724138, 0.00662393,
        0.        , 0.        , 0.03098291, 0.01442308, 0.00909091,
        0.00874317, 0.00568182, 0.01589744, 0.01474747, 0.01904762,
        0.02083333, 0.03173454, 0.03208812, 0.01449275, 0.        ,
        0.00535714, 0.03333333, 0.02469136, 0.        , 0.01298425]))

In [None]:
np.average(cb_result[0])

In [None]:
np.std(cb_result[0])

In [None]:
np.average(cb_result[1])

In [None]:
np.std(cb_result[1])