In [5]:
# !pip install --upgrade pip
# !pip install numpy pandas rectools 'rectools[lightfm]' seaborn

# ⬇️ IMPORTS

In [1]:
# System Imports
import warnings
warnings.filterwarnings("ignore")

# Datetime
from datetime import datetime as dt

# Data Processing
import json
import numpy as np
import pandas as pd

# RecTools
from rectools.dataset import Dataset
from rectools.models import model_from_config

# Visualizations
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook

# 🌆 Environment

In [2]:
TOP_K = 10
NUM_THREADS = 6
RANDOM_STATE = 23

# Randomization
np.random.seed(RANDOM_STATE)

# 💎 Данные

In [3]:
df = pd.read_csv('../data/debank_data.csv')
df.drop(columns=["Unnamed: 0"], inplace=True)
df.dropna(inplace=True)

# Basic statistics
print("Basic Statistics:")
print(df.shape)
print(f"Number of unique users: {df['user'].nunique():,}")
print(f"Number of unique pools: {df['pool'].nunique():,}")
print(f"Number of unique types: {df['types'].nunique():,}")
print(f"Number of unique chains: {df['chain'].nunique():,}")
print(f"Number of unique protocols: {df['protocol'].nunique():,}")

df.head()

Basic Statistics:
(217256, 7)
Number of unique users: 19,378
Number of unique pools: 17,032
Number of unique types: 9
Number of unique chains: 54
Number of unique protocols: 2,057


Unnamed: 0,user,protocol,chain,pool,balance,types,balance_share
0,0xfd09f0296af88ac777c137ecd92d85583a9b9e4a,ftm_pwawallet,ftm,FTM,1053008.0,common,1.0
1,0x637b935cba030aeb876eae07aa7ff637166de4d6,avax_unifiprotocol,avax,UNFI/AVAX,1.319143,common,2e-06
2,0x637b935cba030aeb876eae07aa7ff637166de4d6,balancer,eth,DAI/YFID,85.36821,common,0.000111
3,0x637b935cba030aeb876eae07aa7ff637166de4d6,bsc_acryptos,bsc,ACSI,388.8207,common,0.000504
4,0x637b935cba030aeb876eae07aa7ff637166de4d6,bsc_acryptos,bsc,ACS/WAV/BNB,3686.864,common,0.004783


# 🔄 Предобработка данных

In [4]:
# Coluns
USER_COLUMNS = ["user"]
ITEM_COLUMNS = ["protocol", "chain", "pool", "types"]
FINAL_COLUMNS = ["user_id", "item_id", "datetime", "weight"]

# Prepare dataframe
df["datetime"] = pd.to_datetime(dt.now())
df.rename(columns={"balance_share": "weight"}, inplace=True)
df["item"] = df[ITEM_COLUMNS].apply(lambda x: '_'.join(x), axis=1)

# Get Unique values
uniq_users = df['user'].unique().tolist()
uniq_items = df["item"].unique().tolist()

# Create Mappings
ID_TO_USER = dict(enumerate(uniq_users))
ID_TO_ITEM = dict(enumerate(uniq_items))
USER_TO_ID = {user: idx for idx, user in ID_TO_USER.items()}
ITEM_TO_ID = {item: idx for idx, item in ID_TO_ITEM.items()}

# Map columns
df["user_id"] = df["user"].map(USER_TO_ID)
df["item_id"] = df["item"].map(ITEM_TO_ID)
df_reco = df[FINAL_COLUMNS].copy()

In [5]:
print(df["user"].nunique())
print(df["item"].nunique())

19378
28909


## ( `SVD` ) Final Model

## 🔥 Hot Users

In [6]:
svd_config = {
    'cls': 'PureSVDModel',
    'factors': 103,
    'maxiter': 25,
    'random_state': 23,
    'recommend_n_threads': 0
}

In [7]:
dataset = Dataset.construct(df_reco)
model = model_from_config(svd_config)
model.fit(dataset)

<rectools.models.pure_svd.PureSVDModel at 0x337622810>

In [8]:
users = df_reco["user_id"].unique()
recos = model.recommend(k=25, users=users, dataset=dataset, filter_viewed=True)
reco_json = recos.groupby(["user_id"])["item_id"].apply(list).to_dict()

In [9]:
reco_json = {
    ID_TO_USER[user_id]: [ID_TO_ITEM[item_id] for item_id in item_ids]
    for user_id, item_ids in reco_json.items()
}

In [10]:
with open('../data/svd_recos' + '.json', "w", encoding="utf-8") as file:
    json.dump(reco_json, file, ensure_ascii=False, indent=4)

In [29]:
# Counting
n_users = df_reco["user_id"].nunique()
n_items = df_reco["item_id"].nunique()
print("n_users: {n_users:_} | n_items: {n_items:_}")

# Get vectors
user_vectors, item_vectors = model.get_vectors()
print(f"Shape users: {user_vectors.shape}")
print(f"Shape items: {item_vectors.shape}")

n_users: {n_users:_} | n_items: {n_items:_}
Shape users: (19378, 103)
Shape items: (28909, 103)


## 🧊 Cold Users

In [37]:
TOP_K = 25

In [38]:
popular = (
    df_reco
    .groupby(["item_id"])
    .agg({
        "user_id": "nunique",
        "datetime": "count",
    })
    .rename(columns={
        "user_id": "au",
        "datetime": "cnt"
    })
    .sort_values("au", ascending=False)
    .reset_index()
)[:TOP_K]["item_id"].tolist()

In [39]:
[ID_TO_ITEM[item_id] for item_id in popular]

['lido_eth_ETH_common',
 'arb_gmx_arb_GMX_common',
 'arb_gmx_arb_WBTC/WETH/USDC/LINK/UNI/USDT/MIM/FRAX/DAI_common',
 'arb_gmx_arb_esGMX_common',
 'avax_wonderland_avax_TIME_common',
 'aave2_eth_AAVE_common',
 'bsc_pancakeswap_bsc_Cake_common',
 'op_synthetix_op_SNX_lending',
 'looksrare_eth_LOOKS_common',
 'matic_quickswap_matic_QUICK_common',
 'convex_eth_cvxCRV_common',
 'curve_eth_DAI/USDC/USDT_common',
 'blur_eth_ETH_common',
 'curve_eth_CRV_locked',
 'arb_camelot_arb_GRAIL_common',
 'ftm_geist_ftm_GEIST_common',
 'era_syncswap_era_USDC/ETH_common',
 'bsc_mdex_bsc_MDX_common',
 'arb_arbitrum_arb_ARB_common',
 'bsc_belt_bsc_DAI/USDC/USDT/BUSD_common',
 'matic_klimadao_matic_KLIMA_common',
 'olympusdao_eth_OHM_common',
 'arb_radiantcapital2_arb_RDNT/WETH_locked',
 'avax_gmx_avax_AVAX/WBTC.e/WETH.e/MIM/USDC.e/USDC/BTC.b_common',
 'rocketpool_eth_ETH_common']