# co_visit_add_rank
co_visitation_matrixにrankを付与

In [1]:
import os
import sys
import traceback
import gc
import random
import pickle
import pathlib
import subprocess
from dataclasses import dataclass
from dotenv import load_dotenv
load_dotenv
sys.path.append(os.getenv('UTILS_PATH'))
from tqdm import tqdm
import multiprocessing
import inspect

import pandas as pd
import numpy as np
import polars as pl
import itertools
import cudf
import lightgbm as lgb
import matplotlib.pyplot as plt
import seaborn as sns
import line_notify
import my_logger
from noglobal import noglobal

# 設定

In [2]:
@dataclass
class Cfg:
    loglevel = "INFO"
    exp_name = ""
    seed = 42
    k = 20
    cand_n = 15
    negative_sample = 1
    train_chunk_n = 1
    test_chunk_n = 2
    type2id = {"clicks":0, "carts":1, "orders":2}
    id2type = {0:"clicks", 1:"carts", 2:"orders"}
    train_weeks = ["week3"]
    valid_week = "week4"
    valid_session_n = 100_000
    input_dir = os.getenv('INPUT_DIR')
    output_dir = os.getenv('OUTPUT_DIR')
    prep_dir = os.getenv("PREP_DIR")

    clicks_params = {'objective': 'binary', 'boosting': 'gbdt', 'learning_rate': 0.1, 'metric': 'binary_logloss', 'seed': 42, 'feature_pre_filter': False, 'lambda_l1': 5.485903737168179, 'lambda_l2': 0.005594683492536064, 'num_leaves': 79, 'feature_fraction': 0.552, 'bagging_fraction': 0.9295272232672004, 'bagging_freq': 2, 'min_child_samples': 10}
    carts_params = {'objective': 'binary', 'boosting': 'gbdt', 'learning_rate': 0.1, 'metric': 'binary_logloss', 'seed': 42, 'feature_pre_filter': False, 'lambda_l1': 8.709050252544463, 'lambda_l2': 0.06935262036337767, 'num_leaves': 252, 'feature_fraction': 0.4, 'bagging_fraction': 1.0, 'bagging_freq': 0, 'min_child_samples': 5}
    orders_params = {'objective': 'binary', 'boosting': 'gbdt', 'learning_rate': 0.1, 'metric': 'binary_logloss', 'seed': 42, 'feature_pre_filter': False, 'lambda_l1': 9.356310279757256, 'lambda_l2': 1.3120983078968551e-08, 'num_leaves': 174, 'feature_fraction': 0.5, 'bagging_fraction': 1.0, 'bagging_freq': 0, 'min_child_samples': 20}

cfg = Cfg()
random.seed(cfg.seed)

In [3]:
co_visit_names = [
    "co_visitation_matrix",
    "co_visitation_matrix_time_weighted",
    "co_visitation_matrix_type_weighted",
    "co_visitation_matrix_clicks2carts",
    "co_visitation_matrix_clicks2orders",
    "co_visitation_matrix_1w",
    "co_visitation_matrix_time_weighted_1w",
    "co_visitation_matrix_type_weighted_1w",
    "co_visitation_matrix_clicks2carts_1w",
    "co_visitation_matrix_clicks2orders_1w",
    ]

wt_cols = [
    "cnt",
    "wt",
    "wt",
    "cnt",
    "cnt",
    "cnt",
    "wt",
    "wt",
    "cnt",
    "cnt"
]

weeks = [
    None,
    "week3"
]

for co_visit_name, wt_col in zip(co_visit_names, wt_cols):
    for week in weeks:
        if week is None:
            file_path = cfg.prep_dir + f"{co_visit_name}.parquet"
        else:
            file_path = cfg.prep_dir + f"{co_visit_name}_{week}.parquet"
        print(file_path)
        co_visit_df = pd.read_parquet(file_path)
        co_visit_df = co_visit_df.sort_values(["aid_x", wt_col], ascending=(True, False), ignore_index=True)
        co_visit_df[f"rank"] = co_visit_df.groupby(["aid_x"]).cumcount().astype("int32")
        co_visit_df.to_parquet(file_path)

/mnt/otto-recommender-system/prep/co_visitation_matrix.parquet
/mnt/otto-recommender-system/prep/co_visitation_matrix_week3.parquet
/mnt/otto-recommender-system/prep/co_visitation_matrix_time_weighted.parquet
/mnt/otto-recommender-system/prep/co_visitation_matrix_time_weighted_week3.parquet
/mnt/otto-recommender-system/prep/co_visitation_matrix_type_weighted.parquet
/mnt/otto-recommender-system/prep/co_visitation_matrix_type_weighted_week3.parquet
/mnt/otto-recommender-system/prep/co_visitation_matrix_clicks2carts.parquet
/mnt/otto-recommender-system/prep/co_visitation_matrix_clicks2carts_week3.parquet
/mnt/otto-recommender-system/prep/co_visitation_matrix_clicks2orders.parquet
/mnt/otto-recommender-system/prep/co_visitation_matrix_clicks2orders_week3.parquet
/mnt/otto-recommender-system/prep/co_visitation_matrix_1w.parquet
/mnt/otto-recommender-system/prep/co_visitation_matrix_1w_week3.parquet
/mnt/otto-recommender-system/prep/co_visitation_matrix_time_weighted_1w.parquet
/mnt/otto-re