## Imports

In [2]:
from collections.abc import Iterator
from datetime import date, timedelta
from web3 import Web3
 
import boto3
import botocore
import logging
import numpy as np
import os
import pandas as pd
import string
import warnings

warnings.filterwarnings('ignore')

In [3]:
def iterate_dates(start_date: date, end_date: date) -> Iterator:
    '''Yields date in range of [start_date, end_date]

    Args:
        start_date: inclusive start date
        end_date: inclusive end date
    '''
    for n in range(int((end_date - start_date).days)+1):
        yield start_date + timedelta(n)

def convert_value_from_wei(transfers: pd.DataFrame, unit: string) -> pd.DataFrame:
    '''Convert value in WEI to unit for transfers
        where possible units are 'kwei', 'mwei', 'gwei', 'microether', 'milliether', 'ether'

    Args:
        transfers: dataframe you want to convert the value of
        unit: unit to convert to

    Returns:
        a new dataframe with converted value

    Raises:
        ValueError if unit is not valid
    '''
    if unit not in ['kwei', 'mwei', 'gwei', 'microether', 'milliether', 'ether']:
        raise ValueError("Invalid unit")
    values_in_ether = transfers['value'].apply(lambda x: float(Web3.fromWei(int(x), unit)))
    transfers['value'] = values_in_ether
    return transfers

def download_nft_transfers(start_date: date, end_date: date, local_path: string):
    '''Downloads nft token transfer data from start_date to end_date

    Args:
        start_date: inclusive start date
        end_date: inclusive end date
        local_path: download path

    Raises:
        FileNotFoundError: if local_path does not exist
    '''
    
    if not os.path.exists(local_path):
        raise FileNotFoundError(f"The {local_path} does not exist")

    s3_client = boto3.client('s3')
    bucket_name = "nimble-data-warehouse-dev"
    for download_date in iterate_dates(start_date, end_date):
        str_date = download_date.strftime("%Y-%m-%d")
        remote_path = f"etl/ethereum/nft_token_transfers/date={str_date}/nft_token_transfers.csv"
        fname = os.path.join(local_path, f"nft_token_transfers={str_date}.csv")
        if not os.path.exists(fname):
            try:
                s3_client.download_file(bucket_name, remote_path, fname)
                logging.info(f"{remote_path} downloaded")
            except botocore.exceptions.ClientError as error:
                logging.error(f'{remote_path} not found')
            except botocore.exceptions.ParamValidationError as error:
                logging.error('The parameters you provided are incorrect: {}'.format(error))

def load_transfer_data(start_date: date, end_date: date, local_path: string) -> pd.DataFrame:
    """Loads NFT token transfer data with value in ether from start_date to end_date

    Args:
        start_date: an inclusive start date for nft token transfers
        end_date: an inclusive end date for nft token transfers
        local_path: a local path where the nft toke transfer date is located
    
    Returns:
        concatenated transfers dataframe for [start_date, end_date]

    Raises:
        ValueError: if start_date is greater than end_date
        FileNotFoundError: if local_path does not exist or if no csv file is found in the local_path
    """
    if not os.path.exists(local_path):
        raise FileNotFoundError("The local_path does not exist")
    if start_date > end_date:
        raise ValueError("The 'end_date' should be equal to or greather than 'start_date'")

    transfers_list = []
    for date_ in iterate_dates(start_date, end_date):
        fname = os.path.join(local_path, "nft_token_transfers={}.csv".format(date_.strftime("%Y-%m-%d")))
        if os.path.exists(fname):
            transfers = pd.read_csv(fname, low_memory=False)
            transfers = convert_value_from_wei(transfers, 'ether')
            transfers_list.append(transfers)
        else:
            logging.error(f"{fname} does not exist")
    
    if transfers_list:
        return pd.concat(transfers_list, ignore_index=True)
    else:
        raise FileNotFoundError(f"No csv file was loaded from {local_path}")

### From last 7 days of data, see if we have enough activities by pre-defined badges

In [6]:
%%time
start_date = date(2022,10,18)
end_date = date(2022,10,23)
local_path = "/Users/keonyonglee/Projects/nimble/nft-recommendation-data-analysis-data/nft-token-transfers"
download_nft_transfers(start_date, end_date, local_path)
transfers = load_transfer_data(start_date, end_date, local_path)

CPU times: user 7.89 s, sys: 538 ms, total: 8.43 s
Wall time: 8.44 s


In [7]:
transfers

Unnamed: 0,amount,block_hash,block_number,block_timestamp,contract_type,from_address,log_index,operator,to_address,token_address,token_id,transaction_hash,transaction_index,transaction_type,value,verified,is_batch
0,1,0x839847e372af37336e3703035c658126ae2663ee5f33...,15778434,2022-10-18T23:59:35.000Z,ERC721,0xe71311718d7813b2ff04241a0c9ba4f11623b8d8,4.0,,0x0000000000000000000000000000000000000000,0x2de8f8113bdf66b0005dc71553fc184ed8a2f392,11,0x8d122bc2083d03fa304b423c8b2927cec64c1cf96756...,3.0,Single,0.00000,1,False
1,1,0xd256088f4fbe1ba17d649d1dd8de32ab997d1e2a5a30...,15778433,2022-10-18T23:59:23.000Z,ERC721,0x83c8f28c26bf6aaca652df1dbbe0e1b56f8baba2,188.0,,0x8f54249ae4c8a73e92d44459e026c9197670f3fd,0xc6fb2f58b46c60ef9636f86d1700a9bd024926b1,863,0xbbdda2db06997f10d5cc9790e699401d4bb8be2d62d7...,93.0,Single,0.04298,1,True
2,1,0xd256088f4fbe1ba17d649d1dd8de32ab997d1e2a5a30...,15778433,2022-10-18T23:59:23.000Z,ERC721,0xb2aadf6bfc0a5213acb9c279394b46f50aea65a3,184.0,,0x83c8f28c26bf6aaca652df1dbbe0e1b56f8baba2,0xc6fb2f58b46c60ef9636f86d1700a9bd024926b1,863,0xbbdda2db06997f10d5cc9790e699401d4bb8be2d62d7...,93.0,Single,0.04298,1,True
3,1,0xd256088f4fbe1ba17d649d1dd8de32ab997d1e2a5a30...,15778433,2022-10-18T23:59:23.000Z,ERC721,0x83c8f28c26bf6aaca652df1dbbe0e1b56f8baba2,182.0,,0x8f54249ae4c8a73e92d44459e026c9197670f3fd,0xc6fb2f58b46c60ef9636f86d1700a9bd024926b1,865,0xbbdda2db06997f10d5cc9790e699401d4bb8be2d62d7...,93.0,Single,0.04298,1,True
4,1,0xd256088f4fbe1ba17d649d1dd8de32ab997d1e2a5a30...,15778433,2022-10-18T23:59:23.000Z,ERC721,0xb2aadf6bfc0a5213acb9c279394b46f50aea65a3,178.0,,0x83c8f28c26bf6aaca652df1dbbe0e1b56f8baba2,0xc6fb2f58b46c60ef9636f86d1700a9bd024926b1,865,0xbbdda2db06997f10d5cc9790e699401d4bb8be2d62d7...,93.0,Single,0.04298,1,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2216018,1,0x27fd6b959bf8f2c88bb6e4838c3f1e29b232f3228edd...,15807085,2022-10-22T23:59:59.000Z,ERC721,0x17df6c26b461e510be9130382f00e6c68bef1fc4,114.0,,0x6be55c75d61ecaab7edf7791c087858269760383,0xf048cbaad26c1a35e7a04e126fdeb9c8045e676b,9149,0x4ba18a00d62f3b99e78bcedb201797bb0fc91fb44516...,34.0,Single,0.00000,1,True
2216019,1,0x27fd6b959bf8f2c88bb6e4838c3f1e29b232f3228edd...,15807085,2022-10-22T23:59:59.000Z,ERC721,0x17df6c26b461e510be9130382f00e6c68bef1fc4,113.0,,0x6be55c75d61ecaab7edf7791c087858269760383,0xf048cbaad26c1a35e7a04e126fdeb9c8045e676b,3023,0x4ba18a00d62f3b99e78bcedb201797bb0fc91fb44516...,34.0,Single,0.00000,1,True
2216020,1,0x27fd6b959bf8f2c88bb6e4838c3f1e29b232f3228edd...,15807085,2022-10-22T23:59:59.000Z,ERC721,0x17df6c26b461e510be9130382f00e6c68bef1fc4,112.0,,0x6be55c75d61ecaab7edf7791c087858269760383,0xf048cbaad26c1a35e7a04e126fdeb9c8045e676b,4713,0x4ba18a00d62f3b99e78bcedb201797bb0fc91fb44516...,34.0,Single,0.00000,1,True
2216021,1,0x27fd6b959bf8f2c88bb6e4838c3f1e29b232f3228edd...,15807085,2022-10-22T23:59:59.000Z,ERC721,0x0000000000000000000000000000000000000000,86.0,,0x01655b5165e274b2e2a14704c6d8d5190574d853,0xe6a9826e3b6638d01de95b55690bd4ee7eff9441,14751,0xf4c0c1a77ff448e7b6abec6f6b5eaaaa777e2410e547...,23.0,Single,0.00000,1,True


In [8]:
transfers['date'] = pd.to_datetime(transfers['block_timestamp']).dt.date

In [9]:
profitable_flippers = pd.read_csv(f'{local_path}/profitable_nft_flippers.csv', header=None, names=['wallet_address'])
whales = pd.read_csv(f'{local_path}/whales.csv', header=None, names=['wallet_address'])
collectors = pd.read_csv(f'{local_path}/nft_collectors.csv', header=None, names=['wallet_address'])
early_adopters = pd.read_csv(f'{local_path}/early_adopters.csv', header=None, names=['wallet_address'])

In [16]:
print(transfers.groupby('date').size())
print(transfers.groupby('date').token_address.nunique())

date
2022-10-17        12
2022-10-18    455085
2022-10-19    377732
2022-10-20    282297
2022-10-21    391612
2022-10-22    390595
2022-10-23    318690
dtype: int64
date
2022-10-17       3
2022-10-18    6648
2022-10-19    7154
2022-10-20    7104
2022-10-21    6943
2022-10-22    6846
2022-10-23    7138
Name: token_address, dtype: int64


In [27]:
reco = transfers[transfers.to_address.isin(whales.wallet_address)] 
print(reco.groupby('date').size())
print(reco.groupby('date').token_address.nunique())
print(reco.groupby('date').size() / transfers.groupby('date').size())
print(reco.groupby('date').token_address.nunique() / transfers.groupby('date').token_address.nunique())

date
2022-10-18    3688
2022-10-19    2497
2022-10-20    1873
2022-10-21    2267
2022-10-22    2434
2022-10-23    1558
dtype: int64
date
2022-10-18    392
2022-10-19    403
2022-10-20    237
2022-10-21    407
2022-10-22    309
2022-10-23    238
Name: token_address, dtype: int64
date
2022-10-17         NaN
2022-10-18    0.008104
2022-10-19    0.006611
2022-10-20    0.006635
2022-10-21    0.005789
2022-10-22    0.006232
2022-10-23    0.004889
dtype: float64
date
2022-10-17         NaN
2022-10-18    0.058965
2022-10-19    0.056332
2022-10-20    0.033361
2022-10-21    0.058620
2022-10-22    0.045136
2022-10-23    0.033343
Name: token_address, dtype: float64


In [28]:
reco = transfers[transfers.to_address.isin(profitable_flippers.wallet_address)]
print(reco.groupby('date').size())
print(reco.groupby('date').token_address.nunique())
print(reco.groupby('date').size() / transfers.groupby('date').size())
print(reco.groupby('date').token_address.nunique() / transfers.groupby('date').token_address.nunique())

date
2022-10-18    3333
2022-10-19    2907
2022-10-20    1896
2022-10-21    2576
2022-10-22    2223
2022-10-23    1578
dtype: int64
date
2022-10-18    172
2022-10-19    195
2022-10-20    122
2022-10-21    145
2022-10-22    164
2022-10-23    101
Name: token_address, dtype: int64
date
2022-10-17         NaN
2022-10-18    0.007324
2022-10-19    0.007696
2022-10-20    0.006716
2022-10-21    0.006578
2022-10-22    0.005691
2022-10-23    0.004952
dtype: float64
date
2022-10-17         NaN
2022-10-18    0.025872
2022-10-19    0.027257
2022-10-20    0.017173
2022-10-21    0.020884
2022-10-22    0.023956
2022-10-23    0.014150
Name: token_address, dtype: float64


In [29]:
reco = transfers[transfers.to_address.isin(collectors.wallet_address)]
print(reco.groupby('date').size())
print(reco.groupby('date').token_address.nunique())
print(reco.groupby('date').size() / transfers.groupby('date').size())
print(reco.groupby('date').token_address.nunique() / transfers.groupby('date').token_address.nunique())

date
2022-10-18    25436
2022-10-19    20628
2022-10-20    14471
2022-10-21    17849
2022-10-22    19113
2022-10-23    14796
dtype: int64
date
2022-10-18    833
2022-10-19    808
2022-10-20    647
2022-10-21    795
2022-10-22    726
2022-10-23    779
Name: token_address, dtype: int64
date
2022-10-17         NaN
2022-10-18    0.055893
2022-10-19    0.054610
2022-10-20    0.051262
2022-10-21    0.045578
2022-10-22    0.048933
2022-10-23    0.046428
dtype: float64
date
2022-10-17         NaN
2022-10-18    0.125301
2022-10-19    0.112944
2022-10-20    0.091075
2022-10-21    0.114504
2022-10-22    0.106047
2022-10-23    0.109134
Name: token_address, dtype: float64


In [30]:
reco = transfers[transfers.to_address.isin(early_adopters.wallet_address)]
print(reco.groupby('date').size())
print(reco.groupby('date').token_address.nunique())
print(reco.groupby('date').size() / transfers.groupby('date').size())
print(reco.groupby('date').token_address.nunique() / transfers.groupby('date').token_address.nunique())

date
2022-10-18    20456
2022-10-19    17192
2022-10-20    15293
2022-10-21    16068
2022-10-22    17126
2022-10-23    12958
dtype: int64
date
2022-10-18    753
2022-10-19    770
2022-10-20    802
2022-10-21    814
2022-10-22    865
2022-10-23    756
Name: token_address, dtype: int64
date
2022-10-17         NaN
2022-10-18    0.044950
2022-10-19    0.045514
2022-10-20    0.054173
2022-10-21    0.041030
2022-10-22    0.043846
2022-10-23    0.040660
dtype: float64
date
2022-10-17         NaN
2022-10-18    0.113267
2022-10-19    0.107632
2022-10-20    0.112894
2022-10-21    0.117240
2022-10-22    0.126351
2022-10-23    0.105912
Name: token_address, dtype: float64


In [31]:
reco = transfers[transfers.to_address.isin(whales.wallet_address) |
                transfers.to_address.isin(profitable_flippers.wallet_address) |
                transfers.to_address.isin(collectors.wallet_address) |
                transfers.to_address.isin(early_adopters.wallet_address)]

In [32]:
print(reco.groupby('date').size())
print(reco.groupby('date').token_address.nunique())
print(reco.groupby('date').size() / transfers.groupby('date').size())
print(reco.groupby('date').token_address.nunique() / transfers.groupby('date').token_address.nunique())

date
2022-10-18    42972
2022-10-19    35763
2022-10-20    28294
2022-10-21    32424
2022-10-22    34132
2022-10-23    25882
dtype: int64
date
2022-10-18    1223
2022-10-19    1200
2022-10-20    1126
2022-10-21    1294
2022-10-22    1231
2022-10-23    1237
Name: token_address, dtype: int64
date
2022-10-17         NaN
2022-10-18    0.094426
2022-10-19    0.094678
2022-10-20    0.100228
2022-10-21    0.082796
2022-10-22    0.087385
2022-10-23    0.081214
dtype: float64
date
2022-10-17         NaN
2022-10-18    0.183965
2022-10-19    0.167738
2022-10-20    0.158502
2022-10-21    0.186375
2022-10-22    0.179813
2022-10-23    0.173298
Name: token_address, dtype: float64


### Compare combined vs separate recommendation

In [33]:
date = '2022-10-23'

In [56]:
one_day = transfers[transfers.date == pd.Timestamp(date)]
one_day = one_day[~one_day.is_batch]

whale_reco = one_day[one_day.to_address.isin(whales.wallet_address)]
whale_reco_tx_vol = whale_reco.groupby('token_address').value.sum().rename('tx_vol')
whale_reco_tx_cnt = whale_reco.groupby('token_address').size().rename('tx_cnt')
whale_reco_n_buyer = whale_reco.groupby('token_address').to_address.nunique().rename('n_buyer')
whale_reco = pd.concat([whale_reco_tx_vol, whale_reco_tx_cnt, whale_reco_n_buyer], axis=1)

flipper_reco = one_day[one_day.to_address.isin(profitable_flippers.wallet_address)]
flipper_reco_tx_vol = flipper_reco.groupby('token_address').value.sum().rename('tx_vol')
flipper_reco_tx_cnt = flipper_reco.groupby('token_address').size().rename('tx_cnt')
flipper_reco_n_buyer = flipper_reco.groupby('token_address').to_address.nunique().rename('n_buyer')
flipper_reco = pd.concat([flipper_reco_tx_vol, flipper_reco_tx_cnt, flipper_reco_n_buyer], axis=1)

collector_reco = one_day[one_day.to_address.isin(collectors.wallet_address)]
collector_reco_tx_vol = collector_reco.groupby('token_address').value.sum().rename('tx_vol')
collector_reco_tx_cnt = collector_reco.groupby('token_address').size().rename('tx_cnt')
collector_reco_n_buyer = collector_reco.groupby('token_address').to_address.nunique().rename('n_buyer')
collector_reco = pd.concat([collector_reco_tx_vol, collector_reco_tx_cnt, collector_reco_n_buyer], axis=1)

In [57]:
whale_reco.describe()

Unnamed: 0,tx_vol,tx_cnt,n_buyer
count,175.0,175.0,175.0
mean,3.104656,2.6,1.457143
std,17.680062,5.003447,1.649501
min,0.0,1.0,1.0
25%,0.0,1.0,1.0
50%,0.01,1.0,1.0
75%,0.4395,2.0,1.0
max,211.19,54.0,17.0


In [58]:
flipper_reco.describe()

Unnamed: 0,tx_vol,tx_cnt,n_buyer
count,79.0,79.0,79.0
mean,0.637009,3.759494,1.734177
std,2.246129,8.118063,2.146607
min,0.0,1.0,1.0
25%,0.0,1.0,1.0
50%,0.0,1.0,1.0
75%,0.1417,2.5,1.5
max,11.658241,58.0,16.0


In [59]:
collector_reco.describe()

Unnamed: 0,tx_vol,tx_cnt,n_buyer
count,519.0,519.0,519.0
mean,0.510418,6.1079,3.109827
std,2.861072,22.497789,8.455509
min,0.0,1.0,1.0
25%,0.0,1.0,1.0
50%,0.009,1.0,1.0
75%,0.115,3.0,2.0
max,48.234819,379.0,100.0


In [51]:
def count_intersection_2(r1, r2):
    r1_set = set(r1.index.to_list())
    r2_set = set(r2.index.to_list())
    print('{} / {}'.format(len(r1_set.intersection(r2_set)), len(r1_set.union(r2_set))))
    
def count_intersection_3(r1, r2, r3):
    print(len(set(r1.index.to_list()).intersection(set(r2.index.to_list()), set(r3.index.to_list()))))
    
def count_intersection_4(r1, r2, r3, r4):
    print(len(set(r1.index.to_list()).intersection(set(r2.index.to_list()), set(r3.index.to_list()), set(r4.index.to_list()))))

In [52]:
count_intersection_2(flipper_reco, whale_reco)

35 / 219


In [60]:
count_intersection_2(collector_reco, whale_reco)

109 / 585


In [61]:
count_intersection_2(collector_reco, flipper_reco)

73 / 525


In [62]:
def find_intersection(date):
    print(date)
    one_day = transfers[transfers.date == pd.Timestamp(date)]
    one_day = one_day[~one_day.is_batch]
    whale_reco = one_day[one_day.to_address.isin(whales.wallet_address)]
    whale_reco = whale_reco.groupby('token_address').value.sum().sort_values(ascending=False).head(20)
    flipper_reco = one_day[one_day.to_address.isin(profitable_flippers.wallet_address)]
    flipper_reco = flipper_reco.groupby('token_address').value.sum().sort_values(ascending=False).head(20)
    collector_reco = one_day[one_day.to_address.isin(collectors.wallet_address)]
    collector_reco = collector_reco.groupby('token_address').value.sum().sort_values(ascending=False).head(20)
    print('Flipper and Whale')
    count_intersection_2(flipper_reco, whale_reco)
    print('Collector and Whale')
    count_intersection_2(collector_reco, whale_reco)
    print('Collector and Flipper')
    count_intersection_2(collector_reco, flipper_reco)

In [64]:
for day in range(18,24):
    find_intersection(f"2022-10-{day}")

2022-10-18
Flipper and Whale
3 / 37
Collector and Whale
8 / 32
Collector and Flipper
10 / 30
2022-10-19
Flipper and Whale
7 / 33
Collector and Whale
10 / 30
Collector and Flipper
13 / 27
2022-10-20
Flipper and Whale
4 / 36
Collector and Whale
14 / 26
Collector and Flipper
7 / 33
2022-10-21
Flipper and Whale
6 / 34
Collector and Whale
7 / 33
Collector and Flipper
10 / 30
2022-10-22
Flipper and Whale
3 / 37
Collector and Whale
9 / 31
Collector and Flipper
6 / 34
2022-10-23
Flipper and Whale
5 / 35
Collector and Whale
10 / 30
Collector and Flipper
8 / 32


### Ranking Logic