# Import

In [1]:
from collections.abc import Iterator
from datetime import date, timedelta
from scipy.sparse import csc_matrix
from web3 import Web3
 
import boto3
import botocore
import logging
import multiprocess
import numpy as np
import os
import pandas as pd
import string

# Download Data 

In [2]:
def iterate_dates(start_date: date, end_date: date) -> Iterator:
    '''Yields date in range of [start_date, end_date]

    Args:
        start_date: inclusive start date
        end_date: inclusive end date
    '''
    for n in range(int((end_date - start_date).days)+1):
        yield start_date + timedelta(n)

In [3]:
def download_nft_transfers(start_date: date, end_date: date, local_path: string) -> None:
    '''Downloads nft token transfer data from start_date to end_date

    Args:
        start_date: inclusive start date
        end_date: inclusive end date
        local_path: download path

    Raises:
        FileNotFoundError: if local_path does not exist
    '''
    
    if not os.path.exists(local_path):
        raise FileNotFoundError(f"The {local_path} does not exist")

    s3_client = boto3.client('s3')
    bucket_name = "nimble-data-warehouse-dev"
    for download_date in iterate_dates(start_date, end_date):
        str_date = download_date.strftime("%Y-%m-%d")
        remote_path = f"etl/ethereum/nft_token_transfers/date={str_date}/nft_token_transfers.csv"
        fname = os.path.join(local_path, f"nft_token_transfers={str_date}.csv")
        if not os.path.exists(fname):
            try:
                s3_client.download_file(bucket_name, remote_path, fname)
                logging.info(f"{remote_path} downloaded")
            except botocore.exceptions.ClientError as error:
                logging.error(f'{remote_path} not found')
            except botocore.exceptions.ParamValidationError as error:
                logging.error('The parameters you provided are incorrect: {}'.format(error))

In [4]:
# %%time
# # Load transfer data for multiple dates in range [start_date, end_date]
# start_date = date(2022,8,16)
# end_date = date(2022,9,24)
local_path = "/Users/keonyonglee/Projects/nimble/nft-recommendation-data-analysis-data/nft-token-transfers"
# download_nft_transfers(start_date, end_date, local_path)

# Load Data

In [5]:
def load_transfer_data(start_date: date, end_date: date, local_path: string) -> pd.DataFrame:
    """Loads NFT token transfer data with value in ether from start_date to end_date

    Args:
        start_date: an inclusive start date for nft token transfers
        end_date: an inclusive end date for nft token transfers
        local_path: a local path where the nft toke transfer date is located
    
    Returns:
        concatenated transfers dataframe for [start_date, end_date]

    Raises:
        ValueError: if start_date is greater than end_date
        FileNotFoundError: if local_path does not exist or if no csv file is found in the local_path
    """
    if not os.path.exists(local_path):
        raise FileNotFoundError("The local_path does not exist")
    if start_date > end_date:
        raise ValueError("The 'end_date' should be equal to or greather than 'start_date'")

    transfers_list = []
    for date_ in iterate_dates(start_date, end_date):
        fname = os.path.join(local_path, "nft_token_transfers={}.csv".format(date_.strftime("%Y-%m-%d")))
        if os.path.exists(fname):
            transfers = pd.read_csv(fname, low_memory=False)
            transfers = convert_value_from_wei(transfers, 'ether')
            transfers_list.append(transfers)
        else:
            logging.error(f"{fname} does not exist")
    
    if transfers_list:
        return pd.concat(transfers_list, ignore_index=True)
    else:
        raise FileNotFoundError(f"No csv file was loaded from {local_path}")

In [6]:
def convert_value_from_wei(transfers: pd.DataFrame, unit: string) -> pd.DataFrame:
    '''Convert value in WEI to unit for transfers
        where possible units are 'kwei', 'mwei', 'gwei', 'microether', 'milliether', 'ether'

    Args:
        transfers: dataframe you want to convert the value of
        unit: unit to convert to

    Returns:
        a new dataframe with converted value

    Raises:
        ValueError if unit is not valid
    '''
    if unit not in ['kwei', 'mwei', 'gwei', 'microether', 'milliether', 'ether']:
        raise ValueError("Invalid unit")
    values_in_ether = transfers['value'].apply(lambda x: float(Web3.fromWei(int(x), unit)))
    transfers['value'] = values_in_ether
    return transfers

In [7]:
%%time
start_date = date(2022,5,1)
end_date = date(2022,9,24)
transfers = load_transfer_data(start_date, end_date, local_path)

CPU times: user 4min 2s, sys: 17.7 s, total: 4min 20s
Wall time: 4min 20s


# Be One With The Data

In [8]:
transfers

Unnamed: 0,amount,block_hash,block_number,block_timestamp,contract_type,from_address,log_index,operator,to_address,token_address,token_id,transaction_hash,transaction_index,transaction_type,value,verified,is_batch
0,1,0xb58d4afaac7009f3bceb77c615aaae3af3853d8586d2...,14695010,2022-05-01T23:59:13.000Z,ERC1155,0x27792e7d87a5f05334b52c0584ad8497852aef8b,675.0,0xae9d5478814305148fe82dbea70e72c7ff0abbde,0x6d18e2fd549e5a816fbce4aee665283db243caf9,0x495f947276749ce646f68ac8c248420045cb7b5e,1785431045419628203587059284159807424926884684...,0xfa59235a4aba3f57b295090cd6f9bce2f7f7006c1136...,317.0,Single,0.000000,1,False
1,1,0xb58d4afaac7009f3bceb77c615aaae3af3853d8586d2...,14695010,2022-05-01T23:59:13.000Z,ERC721,0x59346047b015a159e5340122b0c9134043dc00e1,668.0,,0x9d08b5dea1cbd21e8eb76e49d69746909ba2239d,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258,51199,0xf75ff58cd2186a85fb3f957340b2564733da530f16a8...,312.0,Single,0.000000,1,False
2,1,0xb58d4afaac7009f3bceb77c615aaae3af3853d8586d2...,14695010,2022-05-01T23:59:13.000Z,ERC721,0x283af0b28c62c092c9727f1ee09c02ca627eb7f5,664.0,,0xf7a766f28e561fc25fe5c445fdcd8e353afc3d63,0x57f1887a8bf19b14fc0df6fd9b2acc9af147ea85,3371214892998853998010100240008899446379973043...,0xda3143b89a0c701afe5a17346625d1129c9d2ba3cfc6...,308.0,Single,0.001945,1,True
3,1,0xb58d4afaac7009f3bceb77c615aaae3af3853d8586d2...,14695010,2022-05-01T23:59:13.000Z,ERC721,0x0000000000000000000000000000000000000000,657.0,,0x283af0b28c62c092c9727f1ee09c02ca627eb7f5,0x57f1887a8bf19b14fc0df6fd9b2acc9af147ea85,3371214892998853998010100240008899446379973043...,0xda3143b89a0c701afe5a17346625d1129c9d2ba3cfc6...,308.0,Single,0.001945,1,True
4,1,0xb58d4afaac7009f3bceb77c615aaae3af3853d8586d2...,14695010,2022-05-01T23:59:13.000Z,ERC721,0xb56753544255746c4e9647df7d2e641f8b13c6ba,636.0,,0xbb14199294a45467b760db495c0a767bfc057f8a,0xd9b78a2f1dafc8bb9c60961790d2beefebee56f4,2792,0x9437e96a57f91ceb71e7d971b894f6b1443894fb067a...,302.0,Single,0.310000,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68566616,1,0xbd9516fe35fc92d4bce8b6b7b31a9a81b56f34e259d3...,15599492,2022-09-24T00:00:11.000Z,ERC721,0x47efd37de4fe5e5ca9934ed6c3c5e2aab093372f,237.0,,0x07035d0e0cfb5e89218be943507694526a4ebe54,0x8f14778feb8cc049e227d01a2417a888dabb2abc,151,0x79a26bf8d4639a1b66cf336c42cab384d16e70bd847e...,158.0,Single,0.008000,1,False
68566617,1,0xbd9516fe35fc92d4bce8b6b7b31a9a81b56f34e259d3...,15599492,2022-09-24T00:00:11.000Z,ERC721,0x0000000000000000000000000000000000000000,235.0,,0xbe61f001a92045155b30bd1c3f65d8a428f1542e,0x7ca18098ea15b0e199a5dffec2a912015130896a,531,0x35b846cc1fd7b20a8610e2430515a7b8c9d4e107b7bc...,157.0,Single,0.000000,1,False
68566618,1,0xbd9516fe35fc92d4bce8b6b7b31a9a81b56f34e259d3...,15599492,2022-09-24T00:00:11.000Z,ERC721,0x283af0b28c62c092c9727f1ee09c02ca627eb7f5,102.0,,0x5192b3fe45a0a649b2709e2264d537c76190aa5c,0x57f1887a8bf19b14fc0df6fd9b2acc9af147ea85,1666219360701647988344975679758156907440319526...,0x9165174bf98f4aa577889a195c45ce64068e3382f1ad...,63.0,Single,0.008306,1,True
68566619,1,0xbd9516fe35fc92d4bce8b6b7b31a9a81b56f34e259d3...,15599492,2022-09-24T00:00:11.000Z,ERC721,0x0000000000000000000000000000000000000000,95.0,,0x283af0b28c62c092c9727f1ee09c02ca627eb7f5,0x57f1887a8bf19b14fc0df6fd9b2acc9af147ea85,1666219360701647988344975679758156907440319526...,0x9165174bf98f4aa577889a195c45ce64068e3382f1ad...,63.0,Single,0.008306,1,True


In [9]:
transfers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 68566621 entries, 0 to 68566620
Data columns (total 17 columns):
 #   Column             Dtype  
---  ------             -----  
 0   amount             object 
 1   block_hash         object 
 2   block_number       int64  
 3   block_timestamp    object 
 4   contract_type      object 
 5   from_address       object 
 6   log_index          float64
 7   operator           object 
 8   to_address         object 
 9   token_address      object 
 10  token_id           object 
 11  transaction_hash   object 
 12  transaction_index  float64
 13  transaction_type   object 
 14  value              float64
 15  verified           int64  
 16  is_batch           bool   
dtypes: bool(1), float64(3), int64(2), object(11)
memory usage: 8.2+ GB


In [10]:
transfers.isnull().sum()

amount                      0
block_hash                  0
block_number                0
block_timestamp             0
contract_type            4018
from_address                0
log_index                6966
operator             61830993
to_address                  0
token_address               0
token_id                 3465
transaction_hash            0
transaction_index       18738
transaction_type            0
value                       0
verified                    0
is_batch                    0
dtype: int64

In [11]:
transfers.contract_type.value_counts(normalize=True)

ERC721     0.901090
ERC1155    0.098576
ERC165     0.000334
Name: contract_type, dtype: float64

In [12]:
ens = "0x57f1887a8bf19b14fc0df6fd9b2acc9af147ea85".lower()
transfers[transfers.token_address == ens].shape[0]

3110073

In [13]:
memory = "0xccb9d89e0f77df3618eec9f6bf899be3b5561a89".lower()
transfers[transfers.token_address == memory].shape[0]

2903

# Preprocess

In [14]:
transfers.fillna("N/A", inplace=True)

In [15]:
transfers = transfers[(transfers.contract_type != 'ERC1155') & (transfers.contract_type != 'ERC165')]

In [16]:
transfers.contract_type.value_counts()

ERC721    61781056
N/A           4018
Name: contract_type, dtype: int64

In [17]:
transfers = transfers[transfers.token_id != 'N/A']

In [18]:
transfers.token_id.isnull().sum()

0

In [19]:
mint_address = "0x0000000000000000000000000000000000000000"
transfers = transfers[transfers.from_address != mint_address]

In [20]:
transfers[transfers.from_address == mint_address].shape[0]

0

In [21]:
burn_addresses = [
    "0x0000000000000000000000000000000000000000",
    "0x000000000000000000000000000000000000dead"
]
transfers = transfers[~transfers.to_address.isin(burn_addresses)]

In [23]:
transfers[transfers.to_address.isin(burn_addresses)].shape[0]

0

In [24]:
transfers['nft_id'] = transfers.apply(lambda row: f'{row.token_address}/{row.token_id}', axis=1)

In [25]:
transfers.nft_id

1            0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/51199
2           0x57f1887a8bf19b14fc0df6fd9b2acc9af147ea85/337...
4             0xd9b78a2f1dafc8bb9c60961790d2beefebee56f4/2792
5              0xafe12842e3703a3cc3a71d9463389b1bf2c5bc1c/358
8               0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/61
                                  ...                        
68566596      0xce0f25934deaaddd174427f1978bcd487a85e9fa/3393
68566597      0xe6408dd80ee4d7596151ead35b37bdf84d2a3b0f/2287
68566600       0xd896b402a848331e347909a03f286ce218706f43/573
68566616       0x8f14778feb8cc049e227d01a2417a888dabb2abc/151
68566618    0x57f1887a8bf19b14fc0df6fd9b2acc9af147ea85/166...
Name: nft_id, Length: 20327516, dtype: object

In [26]:
columns={'from_address': 'seller', 'to_address': 'buyer', 'token_address': 'collection_id'}
transfers.rename(columns=columns, inplace=True)

In [27]:
columns = [
    'seller',
    'buyer',
    'collection_id',
    'nft_id',
    'value',
    'is_batch',
]
transfers = transfers[columns]

In [28]:
sales = transfers[transfers.value > 0]

In [31]:
sales[sales.value == 0].shape[0]

0

In [32]:
sales

Unnamed: 0,seller,buyer,collection_id,nft_id,value,is_batch
2,0x283af0b28c62c092c9727f1ee09c02ca627eb7f5,0xf7a766f28e561fc25fe5c445fdcd8e353afc3d63,0x57f1887a8bf19b14fc0df6fd9b2acc9af147ea85,0x57f1887a8bf19b14fc0df6fd9b2acc9af147ea85/337...,0.001945,True
4,0xb56753544255746c4e9647df7d2e641f8b13c6ba,0xbb14199294a45467b760db495c0a767bfc057f8a,0xd9b78a2f1dafc8bb9c60961790d2beefebee56f4,0xd9b78a2f1dafc8bb9c60961790d2beefebee56f4/2792,0.310000,False
5,0xfc1207ab36d0aafebf9d27473326ead93e3b0cb8,0x636e5f84c3abab43802595cefe6ac3c0687da411,0xafe12842e3703a3cc3a71d9463389b1bf2c5bc1c,0xafe12842e3703a3cc3a71d9463389b1bf2c5bc1c/358,0.018000,False
8,0x35d1cea472ecb765133d0a8e043989a9bd9a6bef,0x6dd8812a762a7c37fd5aa75d9e2186482c75888e,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/61,175.000000,False
15,0xd1df5522ad6d080615d645f49be708fa4e605cae,0xcdf99625169b900bbcd172c3fed98c887bb97227,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/75253,5.100000,False
...,...,...,...,...,...,...
68566594,0x283af0b28c62c092c9727f1ee09c02ca627eb7f5,0x4d1c6fe9d1d2cdc92a30004e55c2ee98ca31fe91,0x57f1887a8bf19b14fc0df6fd9b2acc9af147ea85,0x57f1887a8bf19b14fc0df6fd9b2acc9af147ea85/103...,0.004153,True
68566597,0xeea477ae1312bb5bc00ef3d9d1f8da8c69e2e9da,0x12523d7753bb3d74d2900ef3af384c14d7f494a6,0xe6408dd80ee4d7596151ead35b37bdf84d2a3b0f,0xe6408dd80ee4d7596151ead35b37bdf84d2a3b0f/2287,0.165000,False
68566600,0x0b579d10fc3cfa5b592ffc503751ddba359b2e6d,0xeb6d919f8421783053c0944498a434e75d20653d,0xd896b402a848331e347909a03f286ce218706f43,0xd896b402a848331e347909a03f286ce218706f43/573,0.005000,False
68566616,0x47efd37de4fe5e5ca9934ed6c3c5e2aab093372f,0x07035d0e0cfb5e89218be943507694526a4ebe54,0x8f14778feb8cc049e227d01a2417a888dabb2abc,0x8f14778feb8cc049e227d01a2417a888dabb2abc/151,0.008000,False


In [34]:
sales.shape[0] / 68599921

0.1710519316778805

# NFT Sales Volume Analysis

In [35]:
non_batch_sales = sales[sales.is_batch == False][['nft_id', 'value']]

In [36]:
non_batch_sales

Unnamed: 0,nft_id,value
4,0xd9b78a2f1dafc8bb9c60961790d2beefebee56f4/2792,0.310000
5,0xafe12842e3703a3cc3a71d9463389b1bf2c5bc1c/358,0.018000
8,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/61,175.000000
15,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/75253,5.100000
59,0x07ce5d64b7b845c8ff645e9a3825f94e6bdd82f1/2974,0.399999
...,...,...
68566578,0xefed2a58cc6a5b81f9158b231847f005cf086c01/253,2.700000
68566586,0xbaf13560f86d65e7a3b982bfc8838791cbc8f3af/469,0.240000
68566597,0xe6408dd80ee4d7596151ead35b37bdf84d2a3b0f/2287,0.165000
68566600,0xd896b402a848331e347909a03f286ce218706f43/573,0.005000


In [37]:
non_batch_sales.shape[0] / sales.shape[0]

0.4914860890210274

In [38]:
total_sales_volume = non_batch_sales.groupby('nft_id').sum().rename(columns={'value': 'total_sales_volume'})

In [39]:
total_sales_volume

Unnamed: 0_level_0,total_sales_volume
nft_id,Unnamed: 1_level_1
0x00000000001ba87a34f0d3224286643b36646d81/1002,0.015000
0x00000000001ba87a34f0d3224286643b36646d81/1007,0.008000
0x00000000001ba87a34f0d3224286643b36646d81/1008,0.008000
0x00000000001ba87a34f0d3224286643b36646d81/1012,0.016500
0x00000000001ba87a34f0d3224286643b36646d81/1013,0.005790
...,...
0xffed35bc5fb514098df353840e4eda01c4c7c776/996,0.025000
0xffed35bc5fb514098df353840e4eda01c4c7c776/997,0.035900
0xfff46beea0dca55cc6bf39ec48e559160191093e/1000,0.002000
0xfff46beea0dca55cc6bf39ec48e559160191093e/3234,0.000001


In [40]:
total_sales_volume.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
total_sales_volume,4267995.0,0.878374,346.976687,1e-18,0.012889,0.0499,0.18,628496.607033


# Unique Buyer Count Analysis

In [41]:
num_unique_buyers = sales[['nft_id', 'buyer']]\
                        .groupby('nft_id')\
                        .nunique('buyer')\
                        .rename(columns={'buyer': 'num_unique_buyers'})

In [42]:
num_unique_buyers

Unnamed: 0_level_0,num_unique_buyers
nft_id,Unnamed: 1_level_1
0x00000000001ba87a34f0d3224286643b36646d81/1002,1
0x00000000001ba87a34f0d3224286643b36646d81/1003,1
0x00000000001ba87a34f0d3224286643b36646d81/1007,5
0x00000000001ba87a34f0d3224286643b36646d81/1008,2
0x00000000001ba87a34f0d3224286643b36646d81/1009,1
...,...
0xffed35bc5fb514098df353840e4eda01c4c7c776/996,1
0xffed35bc5fb514098df353840e4eda01c4c7c776/997,1
0xfff46beea0dca55cc6bf39ec48e559160191093e/1000,1
0xfff46beea0dca55cc6bf39ec48e559160191093e/3234,1


In [43]:
num_unique_buyers.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
num_unique_buyers,7717484.0,1.504217,1.04479,1.0,1.0,1.0,2.0,179.0


In [44]:
over_two_unique_buyers = num_unique_buyers[num_unique_buyers.num_unique_buyers > 2].index

In [45]:
sales[sales.nft_id.isin(over_two_unique_buyers)].shape[0] / sales.shape[0]

0.30051817136462133

In [46]:
total_sales_volume[total_sales_volume.index.isin(over_two_unique_buyers)].describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
total_sales_volume,738315.0,1.975225,388.031741,1e-18,0.0385,0.1245,0.41,257679.0


# Filters

In [47]:
sales.shape[0]

11734149

## Filter by The Number of Unique Buyers

In [48]:
reco_sales = sales[sales.nft_id.isin(over_two_unique_buyers)]

In [49]:
reco_sales.shape[0]

3526325

## Filter ENS

In [50]:
reco_sales = reco_sales[reco_sales.collection_id != ens]

In [51]:
reco_sales.shape[0]

3419350

## Filter Memory

In [52]:
reco_sales = reco_sales[reco_sales.collection_id != memory]

In [53]:
reco_sales.shape[0]

3417432

# Be One With the New Data

In [54]:
reco_sales

Unnamed: 0,seller,buyer,collection_id,nft_id,value,is_batch
68,0x29202c949515577473a4281f34bed68f4bea5d13,0x18428cc069255af80edc7f27e1e78e47922483df,0xc7457f480416e145093edb355f626faa0fc7f90e,0xc7457f480416e145093edb355f626faa0fc7f90e/2145,0.270000,False
122,0xa87b68c3bcbb3f5eee1c1500ea271d321d29dfb4,0xe201a7e6e785513fc7777ec5e6d311bffe257b26,0xc7457f480416e145093edb355f626faa0fc7f90e,0xc7457f480416e145093edb355f626faa0fc7f90e/2116,0.292000,False
165,0xb284f19ffa703daadf6745d3c655f309d17370a5,0xc8fbd2100a95d0649223416ecd8e8551514c8b25,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/91445,4.869000,False
250,0xf0b145a02139f91f998d093f2a12b9ecf2ef5405,0x74132ebf4b4826d1b596ab6f803d1a31667aff58,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/56375,6.990000,False
253,0x3b2a887ca2d31795f664cf34db66b6e5f80144ba,0xb029eef1f5505764d0df1750f55612958547de57,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/70494,5.449000,False
...,...,...,...,...,...,...
68566549,0x3f2f83260199d4259f2bd9b9b44cd5fc53d452a5,0xf6706b26278e1f1ed30d23bc20cf73f6b2fb829d,0x9c094b49a2811ba92950212c98b27c600c02758f,0x9c094b49a2811ba92950212c98b27c600c02758f/800,0.010690,False
68566551,0xc48b1613f2f691639652ff4e57e8bb4bd501345c,0x59406ad8326d539c2d9f39e1d37f2434b4c364bf,0xbe82b9533ddf0acaddcaa6af38830ff4b919482c,0xbe82b9533ddf0acaddcaa6af38830ff4b919482c/9509,0.005100,False
68566564,0x0c76a5a49fee0ff7decb8fd921ce7534b028a3ec,0x62a99e0a7fa8d5c09622104e75299897807e14e3,0x02beed1404c69e62b76af6dbdae41bd98bca2eab,0x02beed1404c69e62b76af6dbdae41bd98bca2eab/2850,0.149999,False
68566565,0xd489e21aa26c56d344764605b7675918ff6d68e5,0xf1c745b6b47d634097c78722be6a8e507f4968f3,0xe17827609ac34443b3987661f4e037642f6bd9ba,0xe17827609ac34443b3987661f4e037642f6bd9ba/6126,0.057900,False


In [56]:
reco_sales.buyer.nunique()

327177

In [57]:
reco_sales.nft_id.nunique()

878882

In [58]:
reco_sales.collection_id.nunique()

5261

# Save CSV File

In [59]:
reco_sales.to_csv(f'{local_path}/reco_sales.csv')

# Item Collaborative Filtering (CF)

We calculate each NFT pair scores by their common buyers. As the first step, we only take the purchase transaction into account but ignore everything else, such as price, date and so on. We can consider them later.

Thus, the algorithm can be summarized as follows. For item $i$ and $j$, let's denote the number of their common buyers as $N_{ij}$, and the number of buyers for item $i$ as $N_i$, and for item $j$ as $N_j$. Then, the item CF score for items $i$ and $j$ can be calculated by

$$CF(i, j) = \frac{N_{ij}}{\sqrt{N_i} \cdot \sqrt{N_j}}$$

which is essentially the cosine value between two vectors, the user purchase vector for item $i$ and for item $j$.

In [3]:
local_path = "/Users/keonyonglee/Projects/nimble/nft-recommendation-data-analysis-data/nft-token-transfers"
reco_sales = pd.read_csv(f'{local_path}/reco_sales.csv')

## Get Binary Matrix

In [4]:
buyers = reco_sales['buyer'].unique()
nfts = reco_sales['nft_id'].unique()
num_buyers = len(buyers)
num_nfts = len(nfts)
print('number of buyers', num_buyers)
print('number of nft', num_nfts)

number of buyers 327177
number of nft 878882


In [8]:
# buyer wallet_address -> index and nft_id -> index
buyers_index = dict(zip(buyers, range(num_buyers)))
nfts_index = dict(zip(nfts, range(num_nfts)))

In [9]:
%%time
vals, rows, cols = [], [], []
for _, row in reco_sales.iterrows():
    buyer_idx = buyers_index[row['buyer']]
    nft_idx = nfts_index[row['nft_id']]
    val = 1
    vals.append(val)
    rows.append(buyer_idx)
    cols.append(nft_idx)

CPU times: user 51.6 s, sys: 155 ms, total: 51.8 s
Wall time: 51.8 s


### Create a Compressed Sparse Column (CSC) matrix
- https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csc_matrix.html

#### user-2-item (buyer-2-nft) matrix with u2i_matrix(i, j) = 1 if the buyer_i has bought the nft_j

In [10]:
%%time
u2i_matrix = csc_matrix((vals, (rows, cols)), shape=(num_buyers, num_nfts))

CPU times: user 573 ms, sys: 16.4 ms, total: 590 ms
Wall time: 587 ms


#### user-2-item norm where u2i_norm(j) = sqrt(num buyers of nft_j)

In [11]:
%%time
u2i_norm = np.sqrt(u2i_matrix.sum(axis=0))

CPU times: user 26.8 ms, sys: 13.8 ms, total: 40.6 ms
Wall time: 39.5 ms


### Sanity Check

In [12]:
less_than_or_equal_to_2_buyers = np.where(u2i_norm <= np.sqrt(2))[1]
print(f'The number of less than or equal to 2 buyers nft = {len(less_than_or_equal_to_2_buyers)}')

The number of less than or equal to 2 buyers nft = 0


## Compute Top 10 Similar NFTs for Each NFT

In [13]:
def get_top_k(nft_id, k=10):
    # buyer one hot encoding for each NFT
    i_vec = u2i_matrix[:, nft_index[nft_id]]
    # the number of common buyers for each NFT 
    ip = i_vec.multiply(u2i_matrix).sum(axis=0)
    # the CF score
    # sqrt(N_i) is omitted because it is common term for all the scroes
    # and does not affect the ranking
    score = ip / u2i_norm
    
    sorted_index = np.argsort(-score)
    # 1:k+1 to omit the first item which is itself
    similar_tokens = [
        nfts[idx]
        for idx in sorted_index[0, 1:k+1].tolist()[0]]
    
    top_scores = score[0, sorted_index[0, 1:k+1]]

    n_buyer = i_vec.sum()

    return similar_tokens, top_scores, n_buyer

In [16]:
%%time
with multiprocess.Pool(processes=32) as pool:
    results = pool.map(get_top_k, nfts)

CPU times: user 17.6 s, sys: 2.15 s, total: 19.8 s
Wall time: 1h 55min 44s


In [17]:
%%time
with open(f'{local_path}/nft_i2i_v2.csv', 'w') as fp:
    for i, nft_id in enumerate(nfts):
        fp.write(','.join([
            nft_id,
            str(results[i][2]),   # number of buyers
            *results[i][0],       # top-k similar NFT
            *[str(e) for e in results[i][1].tolist()[0]],   # top-k scores
        ]) + '\n')

CPU times: user 4.6 s, sys: 427 ms, total: 5.03 s
Wall time: 5.07 s


## Filter out 0 similarity score recommendations

In [93]:
# Set up column names
names = ['nft_id', 'num_of_buyers']
names.extend([f'sim_nft_id_{x}' for  x in range(1,11)])
names.extend([f'sim_score_{x}' for x in range(1, 11)])

In [94]:
%%time
local_path = "/Users/keonyonglee/Projects/nimble/nft-recommendation-data-analysis-data/nft-token-transfers"
i2i_reco = pd.read_csv(f'{local_path}/nft_i2i_v2.csv', header=0, names=names)

CPU times: user 3.97 s, sys: 336 ms, total: 4.31 s
Wall time: 4.31 s


In [95]:
i2i_reco

Unnamed: 0,nft_id,num_of_buyers,sim_nft_id_1,sim_nft_id_2,sim_nft_id_3,sim_nft_id_4,sim_nft_id_5,sim_nft_id_6,sim_nft_id_7,sim_nft_id_8,...,sim_score_1,sim_score_2,sim_score_3,sim_score_4,sim_score_5,sim_score_6,sim_score_7,sim_score_8,sim_score_9,sim_score_10
0,0xc7457f480416e145093edb355f626faa0fc7f90e/2116,4,0x34b09150783499056b2e04a94c25814fe6ac1c7b/976,0x76b3af5f0f9b89ca5a4f9fe6c58421dbe567062d/6128,0xc7457f480416e145093edb355f626faa0fc7f90e/2989,0xc7457f480416e145093edb355f626faa0fc7f90e/4981,0x76b3af5f0f9b89ca5a4f9fe6c58421dbe567062d/6338,0x2e541cec5cb41e7678ac3c8e91acb3fc1db0da07/5947,0x54616c0815c306fc22417b96282ca4aa6f47d357/6902,0xc7457f480416e145093edb355f626faa0fc7f90e/417,...,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.500000,0.500000
1,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/91445,3,0x358f8ac9a5f8891cb98f61ce5f6466ffc8e28c68/3730,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/66473,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/91249,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/68520,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/53070,0x358f8ac9a5f8891cb98f61ce5f6466ffc8e28c68/1742,0x358f8ac9a5f8891cb98f61ce5f6466ffc8e28c68/2617,0x160c404b2b49cbc3240055ceaee026df1e8497a0/8052,...,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350
2,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/56375,3,0x1fb7b8ad0c8368db5463b34d5ea58778706d580e/8377,0x445ede1ce0a4f6d05720cc691893c5bc3ea1bed7/9928,0x1fb7b8ad0c8368db5463b34d5ea58778706d580e/778,0x1fb7b8ad0c8368db5463b34d5ea58778706d580e/779,0x1fb7b8ad0c8368db5463b34d5ea58778706d580e/6143,0x1fb7b8ad0c8368db5463b34d5ea58778706d580e/881,0x56e5003cc86e75e750fbdb4716c80a2a22d2b5c5/5917,0x160c404b2b49cbc3240055ceaee026df1e8497a0/3220,...,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350
3,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/70494,5,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/89501,0xda2686fd32c6b74d55605cfb48bef331771e7fc6/654,0x2a459947f0ac25ec28c197f09c2d88058a83f3bb/3732,0x03b8d129a8f6dc62a797b59aa5eebb11ad63dada/142,0x684e4ed51d350b4d76a3a07864df572d24e6dc4c/6531,0x0b7600ca77fc257fe7eb432f87825cccc4590037/8059,0x2a459947f0ac25ec28c197f09c2d88058a83f3bb/2624,0x394e3d3044fc89fcdd966d3cb35ac0b32b0cda91/9340,...,1.000000,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350
4,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/21312,3,0x4d928fada59f3446627c5bea707a81e006cf676f/6717,0x2ee6af0dff3a1ce3f7e3414c52c48fd50d73691e/5711,0x740c178e10662bbb050bde257bfa318defe3cabc/9453,0xdcaf23e44639daf29f6532da213999d737f15aa4/5299,0x306b1ea3ecdf94ab739f1910bbda052ed4a9f949/2534,0x2ee6af0dff3a1ce3f7e3414c52c48fd50d73691e/1291,0xb852c6b5892256c264cc2c888ea462189154d8d7/4095,0x9a38dec0590abc8c883d72e52391090e948ddf12/1305,...,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350,0.577350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
878876,0x2160dcdb088196e7e5faf12c2346456d933db34e/4035,3,0x2160dcdb088196e7e5faf12c2346456d933db34e/4035,0x6cb26df0c825fece867a84658f87b0ecbcea72f6/1860,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/59895,0x6cb26df0c825fece867a84658f87b0ecbcea72f6/2220,0x209e639a0ec166ac7a1a4ba41968fa967db30221/4393,0xca21d4228cdcc68d4e23807e5e370c07577dd152/35011,0x011c77fa577c500deedad364b8af9e8540b808c0/11355,0xdb7b094fdc04f51560a03a99f747044951b73727/2414,...,1.732051,1.664101,1.664101,1.664101,1.649916,1.603567,1.581139,1.581139,1.581139,1.581139
878877,0xf442459c8bb4b891b789e816775232b812eb2ccd/9758,3,0xf442459c8bb4b891b789e816775232b812eb2ccd/6639,0xf442459c8bb4b891b789e816775232b812eb2ccd/910,0xf442459c8bb4b891b789e816775232b812eb2ccd/6722,0xf442459c8bb4b891b789e816775232b812eb2ccd/9758,0xf442459c8bb4b891b789e816775232b812eb2ccd/8987,0xf442459c8bb4b891b789e816775232b812eb2ccd/9628,0xf442459c8bb4b891b789e816775232b812eb2ccd/9040,0xf442459c8bb4b891b789e816775232b812eb2ccd/8312,...,1.809068,1.788854,1.767767,1.732051,1.666667,1.666667,1.666667,1.666667,1.664101,1.664101
878878,0xf442459c8bb4b891b789e816775232b812eb2ccd/2922,3,0xf442459c8bb4b891b789e816775232b812eb2ccd/6639,0xf442459c8bb4b891b789e816775232b812eb2ccd/6722,0xf442459c8bb4b891b789e816775232b812eb2ccd/2922,0xf442459c8bb4b891b789e816775232b812eb2ccd/411,0xf442459c8bb4b891b789e816775232b812eb2ccd/8312,0xf442459c8bb4b891b789e816775232b812eb2ccd/9628,0xf442459c8bb4b891b789e816775232b812eb2ccd/9040,0xf442459c8bb4b891b789e816775232b812eb2ccd/8987,...,1.809068,1.767767,1.732051,1.732051,1.666667,1.666667,1.666667,1.666667,1.664101,1.664101
878879,0x3b00ca89348991a8dcede0c56d1b307643eb6dd6/403,3,0x90e95660694b82c312be5f59481d3bbb801d8c39/1018,0x90e95660694b82c312be5f59481d3bbb801d8c39/2127,0x90e95660694b82c312be5f59481d3bbb801d8c39/831,0x90e95660694b82c312be5f59481d3bbb801d8c39/97,0x40c345f02b56041d1d0ade97de5bb65158738ef3/943,0xa0bae4c0410d79398703cbd9a30d010d414ab0da/70,0xa0bae4c0410d79398703cbd9a30d010d414ab0da/69,0xa0bae4c0410d79398703cbd9a30d010d414ab0da/66,...,3.905633,3.905633,3.905633,3.905633,3.905633,3.888889,3.888889,3.888889,3.888889,3.888889


In [96]:
i2i_reco.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 878881 entries, 0 to 878880
Data columns (total 22 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   nft_id         878881 non-null  object 
 1   num_of_buyers  878881 non-null  int64  
 2   sim_nft_id_1   878881 non-null  object 
 3   sim_nft_id_2   878881 non-null  object 
 4   sim_nft_id_3   878881 non-null  object 
 5   sim_nft_id_4   878881 non-null  object 
 6   sim_nft_id_5   878881 non-null  object 
 7   sim_nft_id_6   878881 non-null  object 
 8   sim_nft_id_7   878881 non-null  object 
 9   sim_nft_id_8   878881 non-null  object 
 10  sim_nft_id_9   878881 non-null  object 
 11  sim_nft_id_10  878881 non-null  object 
 12  sim_score_1    878881 non-null  float64
 13  sim_score_2    878881 non-null  float64
 14  sim_score_3    878881 non-null  float64
 15  sim_score_4    878881 non-null  float64
 16  sim_score_5    878881 non-null  float64
 17  sim_score_6    878881 non-nul

In [55]:
id_columns = [f'sim_nft_id_{x}' for  x in range(1,11)]
score_columns = [f'sim_score_{x}' for x in range(1, 11)]

In [97]:
i2i_reco = i2i_reco[i2i_reco.sim_score_1 !=0]

In [98]:
i2i_reco.shape[0]

878176

In [99]:
one_reco_nfts = i2i_reco[i2i_reco.sim_score_2 == 0][['nft_id']+[id_columns[0]]+[score_columns[0]]]

In [100]:
one_reco_nfts.shape[0]

733

In [101]:
i2i_reco = i2i_reco[i2i_reco.sim_score_2 !=0]

In [102]:
two_reco_nfts = i2i_reco[i2i_reco.sim_score_3 == 0][['nft_id']+id_columns[:2]+score_columns[:2]]

In [103]:
two_reco_nfts.shape[0]

882

In [104]:
i2i_reco = i2i_reco[i2i_reco.sim_score_3 !=0]

In [105]:
three_reco_nfts = i2i_reco[i2i_reco.sim_score_4 == 0][['nft_id']+id_columns[:3]+score_columns[:3]]

In [106]:
three_reco_nfts.shape[0]

994

In [107]:
i2i_reco = i2i_reco[i2i_reco.sim_score_4 !=0]

In [108]:
four_reco_nfts = i2i_reco[i2i_reco.sim_score_5 == 0][['nft_id']+id_columns[:4]+score_columns[:4]]

In [109]:
four_reco_nfts.shape[0]

1074

In [110]:
i2i_reco = i2i_reco[i2i_reco.sim_score_5 !=0]

In [111]:
five_reco_nfts = i2i_reco[i2i_reco.sim_score_6 == 0][['nft_id']+id_columns[:5]+score_columns[:5]]

In [112]:
five_reco_nfts.shape[0]

1090

In [113]:
i2i_reco = i2i_reco[i2i_reco.sim_score_6 !=0]

In [114]:
six_reco_nfts = i2i_reco[i2i_reco.sim_score_7 == 0][['nft_id']+id_columns[:6]+score_columns[:6]]

In [115]:
six_reco_nfts.shape[0]

1177

In [116]:
i2i_reco = i2i_reco[i2i_reco.sim_score_7 !=0]

In [117]:
seven_reco_nfts = i2i_reco[i2i_reco.sim_score_8 == 0][['nft_id']+id_columns[:7]+score_columns[:7]]

In [118]:
seven_reco_nfts.shape[0]

1211

In [119]:
i2i_reco = i2i_reco[i2i_reco.sim_score_8 !=0]

In [120]:
eight_reco_nfts = i2i_reco[i2i_reco.sim_score_9 == 0][['nft_id']+id_columns[:8]+score_columns[:8]]

In [121]:
eight_reco_nfts.shape[0]

1285

In [122]:
i2i_reco = i2i_reco[i2i_reco.sim_score_9 !=0]

In [123]:
nine_reco_nfts = i2i_reco[i2i_reco.sim_score_10 == 0][['nft_id']+id_columns[:9]+score_columns[:9]]

In [124]:
nine_reco_nfts.shape[0]

1268

In [125]:
ten_reco_nfts = i2i_reco[i2i_reco.sim_score_10 !=0][['nft_id']+id_columns[:10]+score_columns[:10]]

In [126]:
ten_reco_nfts.shape[0]

868462

In [128]:
reco_list = [
    one_reco_nfts,
    two_reco_nfts,
    three_reco_nfts,
    four_reco_nfts,
    five_reco_nfts,
    six_reco_nfts,
    seven_reco_nfts,
    eight_reco_nfts,
    nine_reco_nfts,
    ten_reco_nfts,
]
filtered_reco = pd.concat(reco_list, join='outer').sort_index()

In [129]:
filtered_reco.shape[0]

878176

In [130]:
filtered_reco.to_csv(f'{local_path}/filtered_i2i_reco.csv')

In [131]:
filtered_reco

Unnamed: 0,nft_id,sim_nft_id_1,sim_score_1,sim_nft_id_2,sim_score_2,sim_nft_id_3,sim_score_3,sim_nft_id_4,sim_score_4,sim_nft_id_5,...,sim_nft_id_6,sim_score_6,sim_nft_id_7,sim_score_7,sim_nft_id_8,sim_score_8,sim_nft_id_9,sim_score_9,sim_nft_id_10,sim_score_10
0,0xc7457f480416e145093edb355f626faa0fc7f90e/2116,0x34b09150783499056b2e04a94c25814fe6ac1c7b/976,0.577350,0x76b3af5f0f9b89ca5a4f9fe6c58421dbe567062d/6128,0.577350,0xc7457f480416e145093edb355f626faa0fc7f90e/2989,0.577350,0xc7457f480416e145093edb355f626faa0fc7f90e/4981,0.577350,0x76b3af5f0f9b89ca5a4f9fe6c58421dbe567062d/6338,...,0x2e541cec5cb41e7678ac3c8e91acb3fc1db0da07/5947,0.577350,0x54616c0815c306fc22417b96282ca4aa6f47d357/6902,0.577350,0xc7457f480416e145093edb355f626faa0fc7f90e/417,0.577350,0xc7457f480416e145093edb355f626faa0fc7f90e/2983,0.500000,0x8513db429f5fb564f473fd2e5c523fae33331aa5/1363,0.500000
1,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/91445,0x358f8ac9a5f8891cb98f61ce5f6466ffc8e28c68/3730,0.577350,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/66473,0.577350,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/91249,0.577350,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/68520,0.577350,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/53070,...,0x358f8ac9a5f8891cb98f61ce5f6466ffc8e28c68/1742,0.577350,0x358f8ac9a5f8891cb98f61ce5f6466ffc8e28c68/2617,0.577350,0x160c404b2b49cbc3240055ceaee026df1e8497a0/8052,0.577350,0x358f8ac9a5f8891cb98f61ce5f6466ffc8e28c68/3731,0.577350,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/95193,0.577350
2,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/56375,0x1fb7b8ad0c8368db5463b34d5ea58778706d580e/8377,0.577350,0x445ede1ce0a4f6d05720cc691893c5bc3ea1bed7/9928,0.577350,0x1fb7b8ad0c8368db5463b34d5ea58778706d580e/778,0.577350,0x1fb7b8ad0c8368db5463b34d5ea58778706d580e/779,0.577350,0x1fb7b8ad0c8368db5463b34d5ea58778706d580e/6143,...,0x1fb7b8ad0c8368db5463b34d5ea58778706d580e/881,0.577350,0x56e5003cc86e75e750fbdb4716c80a2a22d2b5c5/5917,0.577350,0x160c404b2b49cbc3240055ceaee026df1e8497a0/3220,0.577350,0xc86664e7d2608f881f796ee8e24fa9d4d7598406/3744,0.577350,0x2e541cec5cb41e7678ac3c8e91acb3fc1db0da07/3632,0.577350
3,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/70494,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/89501,1.000000,0xda2686fd32c6b74d55605cfb48bef331771e7fc6/654,0.577350,0x2a459947f0ac25ec28c197f09c2d88058a83f3bb/3732,0.577350,0x03b8d129a8f6dc62a797b59aa5eebb11ad63dada/142,0.577350,0x684e4ed51d350b4d76a3a07864df572d24e6dc4c/6531,...,0x0b7600ca77fc257fe7eb432f87825cccc4590037/8059,0.577350,0x2a459947f0ac25ec28c197f09c2d88058a83f3bb/2624,0.577350,0x394e3d3044fc89fcdd966d3cb35ac0b32b0cda91/9340,0.577350,0xbc5d78bb900b16f68b512fef44cf18c2d73fdac7/79,0.577350,0x0ee80069c9b4993882fe0b3fc256260eff385982/6867,0.577350
4,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/21312,0x4d928fada59f3446627c5bea707a81e006cf676f/6717,0.577350,0x2ee6af0dff3a1ce3f7e3414c52c48fd50d73691e/5711,0.577350,0x740c178e10662bbb050bde257bfa318defe3cabc/9453,0.577350,0xdcaf23e44639daf29f6532da213999d737f15aa4/5299,0.577350,0x306b1ea3ecdf94ab739f1910bbda052ed4a9f949/2534,...,0x2ee6af0dff3a1ce3f7e3414c52c48fd50d73691e/1291,0.577350,0xb852c6b5892256c264cc2c888ea462189154d8d7/4095,0.577350,0x9a38dec0590abc8c883d72e52391090e948ddf12/1305,0.577350,0x524cab2ec69124574082676e6f654a18df49a048/12520,0.577350,0x60e4d786628fea6478f785a6d7e704777c86a7c6/11270,0.577350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
878876,0x2160dcdb088196e7e5faf12c2346456d933db34e/4035,0x2160dcdb088196e7e5faf12c2346456d933db34e/4035,1.732051,0x6cb26df0c825fece867a84658f87b0ecbcea72f6/1860,1.664101,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/59895,1.664101,0x6cb26df0c825fece867a84658f87b0ecbcea72f6/2220,1.664101,0x209e639a0ec166ac7a1a4ba41968fa967db30221/4393,...,0xca21d4228cdcc68d4e23807e5e370c07577dd152/35011,1.603567,0x011c77fa577c500deedad364b8af9e8540b808c0/11355,1.581139,0xdb7b094fdc04f51560a03a99f747044951b73727/2414,1.581139,0x1821363abc9e33f1bfbe3f96f7c68eca7f6af2bd/833,1.581139,0x4591c791790f352685a29111eca67abdc878863e/6534,1.581139
878877,0xf442459c8bb4b891b789e816775232b812eb2ccd/9758,0xf442459c8bb4b891b789e816775232b812eb2ccd/6639,1.809068,0xf442459c8bb4b891b789e816775232b812eb2ccd/910,1.788854,0xf442459c8bb4b891b789e816775232b812eb2ccd/6722,1.767767,0xf442459c8bb4b891b789e816775232b812eb2ccd/9758,1.732051,0xf442459c8bb4b891b789e816775232b812eb2ccd/8987,...,0xf442459c8bb4b891b789e816775232b812eb2ccd/9628,1.666667,0xf442459c8bb4b891b789e816775232b812eb2ccd/9040,1.666667,0xf442459c8bb4b891b789e816775232b812eb2ccd/8312,1.666667,0x6cb26df0c825fece867a84658f87b0ecbcea72f6/1860,1.664101,0x6cb26df0c825fece867a84658f87b0ecbcea72f6/2220,1.664101
878878,0xf442459c8bb4b891b789e816775232b812eb2ccd/2922,0xf442459c8bb4b891b789e816775232b812eb2ccd/6639,1.809068,0xf442459c8bb4b891b789e816775232b812eb2ccd/6722,1.767767,0xf442459c8bb4b891b789e816775232b812eb2ccd/2922,1.732051,0xf442459c8bb4b891b789e816775232b812eb2ccd/411,1.732051,0xf442459c8bb4b891b789e816775232b812eb2ccd/8312,...,0xf442459c8bb4b891b789e816775232b812eb2ccd/9628,1.666667,0xf442459c8bb4b891b789e816775232b812eb2ccd/9040,1.666667,0xf442459c8bb4b891b789e816775232b812eb2ccd/8987,1.666667,0x6cb26df0c825fece867a84658f87b0ecbcea72f6/2220,1.664101,0x34d85c9cdeb23fa97cb08333b511ac86e1c4e258/59895,1.664101
878879,0x3b00ca89348991a8dcede0c56d1b307643eb6dd6/403,0x90e95660694b82c312be5f59481d3bbb801d8c39/1018,3.905633,0x90e95660694b82c312be5f59481d3bbb801d8c39/2127,3.905633,0x90e95660694b82c312be5f59481d3bbb801d8c39/831,3.905633,0x90e95660694b82c312be5f59481d3bbb801d8c39/97,3.905633,0x40c345f02b56041d1d0ade97de5bb65158738ef3/943,...,0xa0bae4c0410d79398703cbd9a30d010d414ab0da/70,3.888889,0xa0bae4c0410d79398703cbd9a30d010d414ab0da/69,3.888889,0xa0bae4c0410d79398703cbd9a30d010d414ab0da/66,3.888889,0xa0bae4c0410d79398703cbd9a30d010d414ab0da/68,3.888889,0xa0bae4c0410d79398703cbd9a30d010d414ab0da/67,3.888889


In [133]:
filtered_reco.isna().sum()

nft_id              0
sim_nft_id_1        0
sim_score_1         0
sim_nft_id_2      733
sim_score_2       733
sim_nft_id_3     1615
sim_score_3      1615
sim_nft_id_4     2609
sim_score_4      2609
sim_nft_id_5     3683
sim_score_5      3683
sim_nft_id_6     4773
sim_score_6      4773
sim_nft_id_7     5950
sim_score_7      5950
sim_nft_id_8     7161
sim_score_8      7161
sim_nft_id_9     8446
sim_score_9      8446
sim_nft_id_10    9714
sim_score_10     9714
dtype: int64