In [1]:
import logging
import math
import os
import sys
from collections import defaultdict

import numpy as np
import torch
from scipy.sparse import coo_matrix, csr_matrix

from util_vCF.helper import (
    add_and_fit,
    load_npy_ckpt,
    load_target_items,
    load_test_data,
    load_train_data_with_coo,
    log_config_file,
    prepare_test_complementary,
    save_ckpt,
    save_ckpt_pytorch,
    save_model_opt_state_dict,
    save_compressed,
    str2bool,
    train_wrap,
)

  from .autonotebook import tqdm as notebook_tqdm
Failure while loading azureml_run_type_providers. Failed to load entrypoint azureml.scriptrun = azureml.core.script_run:ScriptRun._from_run_dto with exception (pyOpenSSL 22.0.0 (/home/core/miniconda3/envs/simplex_new_env/lib/python3.7/site-packages), Requirement.parse('pyopenssl<21.0.0')).


In [2]:
#sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from util.args import init_arguments  # noqa: E402
from util.evaluation import evaluate  # noqa: E402
from util.helper import (  # noqa: E402
    get_logger_dir,
    get_max_item_index_to_save,
    load_train_data,
    load_train_data_weighted_by_time,
    train_wrap,
)
from util.logger import setup_logger  # noqa: E402
from util.metrics import matrix_query  # noqa: E402

In [3]:
logger = logging.getLogger(__name__)

In [4]:
def get_swing_i2imatrix_csr(train_user_item_csr, alpha=0.001):

    num_users, num_items = train_user_item_csr.shape
    weight = np.zeros(num_users)
    items = defaultdict(set)
    users = defaultdict(list)

    train_user_item_coo = train_user_item_csr.tocoo()
    num_data = len(train_user_item_coo.data)
    for n in range(num_data):
        items[train_user_item_coo.row[n]].add(train_user_item_coo.col[n])
        users[train_user_item_coo.col[n]].append(train_user_item_coo.row[n])

    for u in range(num_users):
        weight[u] = 1 / math.sqrt(len(items[u]))

    swing_dict = defaultdict(int)
    for i in range(num_items):
        for p, u in enumerate(users[i]):
            for v in users[i][p + 1 :]:
                common_items_uv = list(items[u] & items[v])
                k = len(common_items_uv)
                for j in common_items_uv:
                    swing = weight[u] * weight[v] * 1 / (alpha + k)
                    swing_dict[i, j] += swing
                    swing_dict[j, i] += swing
    keys = list(swing_dict.keys())
    rows, cols = list(zip(*keys))
    data = list(swing_dict.values())
    swing_i2imatrix_csr = csr_matrix((data, (rows, cols)), shape=(num_items, num_items))

    return swing_i2imatrix_csr

In [5]:
#args = init_arguments()

args= {}

args['input_path'] =  '/home/core/shahjaidev/my_data/3d_train_csv'
args['input_delimiter'] = ','
args['rating_scale'] = 1.0
args['rating_offset'] = 0.0
args['weight_choice'] = 'ctr'

In [6]:

#train_user_item_csr, _, _, _, _, _ = load_train_data(args, input_path_3d_train, input_delimiter, logger_in=logger)

train_user_item_coo, train_user_item_csr, train_df, item_number, idfs, item_sums, _ = load_train_data_with_coo(
        args['input_path'], args['input_delimiter'], args['rating_scale'], args['rating_offset'], args['weight_choice']
    )


In [13]:
num_users, num_items = train_user_item_csr.shape
weight = np.zeros(num_users)
items = defaultdict(set)
users = defaultdict(list)

train_user_item_coo = train_user_item_csr.tocoo()
num_data = len(train_user_item_coo.data)
for n in range(num_data):
    if n%1000000 ==0:
        print(f"{n} items processed")
    items[train_user_item_coo.row[n]].add(train_user_item_coo.col[n])
    users[train_user_item_coo.col[n]].append(train_user_item_coo.row[n])
print("creation of items and users dict done")


0 items processed
1000000 items processed
2000000 items processed
3000000 items processed
4000000 items processed
5000000 items processed
6000000 items processed
7000000 items processed
8000000 items processed
9000000 items processed
10000000 items processed
11000000 items processed
12000000 items processed
13000000 items processed
14000000 items processed
15000000 items processed
16000000 items processed
17000000 items processed
18000000 items processed
19000000 items processed
20000000 items processed
21000000 items processed
22000000 items processed
23000000 items processed
24000000 items processed
25000000 items processed
26000000 items processed
27000000 items processed
28000000 items processed
29000000 items processed
30000000 items processed
31000000 items processed
32000000 items processed
33000000 items processed
34000000 items processed
35000000 items processed
36000000 items processed
37000000 items processed
creation of items and users dict done


In [15]:
def get_swing_i2imatrix_csr(users, items, train_user_item_csr, alpha=0.001):
    num_users, num_items = train_user_item_csr.shape
    weight = np.zeros(num_users)
    
    for u in range(num_users):
        weight[u] = 1 / math.sqrt(len(items[u]))

    swing_dict = defaultdict(int)
    for i in range(num_items):
        if i%10000 ==0:
            print(f"{i} items processed")

        for p, u in enumerate(users[i]):
            for v in users[i][p + 1 :]:
                common_items_uv = list(items[u] & items[v])
                k = len(common_items_uv)
                for j in common_items_uv:
                    swing = weight[u] * weight[v] * 1 / (alpha + k)
                    swing_dict[i, j] += swing
                    swing_dict[j, i] += swing
    keys = list(swing_dict.keys())
    rows, cols = list(zip(*keys))
    data = list(swing_dict.values())
    swing_i2imatrix_csr = csr_matrix((data, (rows, cols)), shape=(num_items, num_items))

    return swing_i2imatrix_csr

In [16]:
res_swing_dict = get_swing_i2imatrix_csr(users, items, train_user_item_csr, alpha=0.001)

0 items processed
10000 items processed
20000 items processed
30000 items processed
40000 items processed
50000 items processed
60000 items processed
70000 items processed
80000 items processed
90000 items processed


In [14]:
"""def get_swing_i2imatrix_csr(train_user_item_csr, alpha=0.001):

    num_users, num_items = train_user_item_csr.shape
    weight = np.zeros(num_users)
    items = defaultdict(set)
    users = defaultdict(list)

    train_user_item_coo = train_user_item_csr.tocoo()
    num_data = len(train_user_item_coo.data)
    for n in range(num_data):
        items[train_user_item_coo.row[n]].add(train_user_item_coo.col[n])
        users[train_user_item_coo.col[n]].append(train_user_item_coo.row[n])
    print("creation of items and users dict done")

    for u in range(num_users):
        weight[u] = 1 / math.sqrt(len(items[u]))

    swing_dict = defaultdict(int)
    for i in range(num_items):
        if i%10000 ==0:
            print(f"{i} items processed")

        for p, u in enumerate(users[i]):
            for v in users[i][p + 1 :]:
                common_items_uv = list(items[u] & items[v])
                k = len(common_items_uv)
                for j in common_items_uv:
                    swing = weight[u] * weight[v] * 1 / (alpha + k)
                    swing_dict[i, j] += swing
                    swing_dict[j, i] += swing
    keys = list(swing_dict.keys())
    rows, cols = list(zip(*keys))
    data = list(swing_dict.values())
    swing_i2imatrix_csr = csr_matrix((data, (rows, cols)), shape=(num_items, num_items))

    return swing_i2imatrix_csr"""

'def get_swing_i2imatrix_csr(train_user_item_csr, alpha=0.001):\n\n    num_users, num_items = train_user_item_csr.shape\n    weight = np.zeros(num_users)\n    items = defaultdict(set)\n    users = defaultdict(list)\n\n    train_user_item_coo = train_user_item_csr.tocoo()\n    num_data = len(train_user_item_coo.data)\n    for n in range(num_data):\n        items[train_user_item_coo.row[n]].add(train_user_item_coo.col[n])\n        users[train_user_item_coo.col[n]].append(train_user_item_coo.row[n])\n    print("creation of items and users dict done")\n\n    for u in range(num_users):\n        weight[u] = 1 / math.sqrt(len(items[u]))\n\n    swing_dict = defaultdict(int)\n    for i in range(num_items):\n        if i%10000 ==0:\n            print(f"{i} items processed")\n\n        for p, u in enumerate(users[i]):\n            for v in users[i][p + 1 :]:\n                common_items_uv = list(items[u] & items[v])\n                k = len(common_items_uv)\n                for j in common_item

In [8]:
swing_scores_i2i_matrix = get_swing_i2imatrix_csr(train_user_item_csr)

KeyboardInterrupt: 

In [None]:
print("done")