### Imports

In [1]:
from collections.abc import Iterator
from datetime import date, timedelta
from web3 import Web3
 
import boto3
import botocore
import logging
import numpy as np
import os
import pandas as pd
import seaborn as sns
import string

In [5]:
user_local_path = "../data"
os.system('mkdir -p ../data/nft-token-transfers')

0

In [6]:
def iterate_dates(start_date: date, end_date: date) -> Iterator:
    '''Yields date in range of [start_date, end_date]

    Args:
        start_date: inclusive start date
        end_date: inclusive end date
    '''
    for n in range(int((end_date - start_date).days)+1):
        yield start_date + timedelta(n)

        
def download_nft_transfers(start_date: date, end_date: date, local_path: string) -> None:
    '''Downloads nft token transfer data from start_date to end_date

    Args:
        start_date: inclusive start date
        end_date: inclusive end date
        local_path: download path

    Raises:
        FileNotFoundError: if local_path does not exist
    '''
    
    if not os.path.exists(local_path):
        raise FileNotFoundError(f"The {local_path} does not exist")

    s3_client = boto3.client('s3')
    bucket_name = "nimble-data-warehouse-dev"
    for download_date in iterate_dates(start_date, end_date):
        str_date = download_date.strftime("%Y-%m-%d")
        remote_path = f"etl/ethereum/nft_token_transfers/date={str_date}/nft_token_transfers.csv"
        fname = os.path.join(local_path, f"nft_token_transfers={str_date}.csv")
        if not os.path.exists(fname):
            try:
                s3_client.download_file(bucket_name, remote_path, fname)
                logging.info(f"{remote_path} downloaded")
            except botocore.exceptions.ClientError as error:
                logging.error(f'{remote_path} not found')
            except botocore.exceptions.ParamValidationError as error:
                logging.error('The parameters you provided are incorrect: {}'.format(error))

                
def convert_value_from_wei(transfers: pd.DataFrame, unit: string) -> pd.DataFrame:
    '''Convert value in WEI to unit for transfers
        where possible units are 'kwei', 'mwei', 'gwei', 'microether', 'milliether', 'ether'

    Args:
        transfers: dataframe you want to convert the value of
        unit: unit to convert to

    Returns:
        a new dataframe with converted value

    Raises:
        ValueError if unit is not valid
    '''
    if unit not in ['kwei', 'mwei', 'gwei', 'microether', 'milliether', 'ether']:
        raise ValueError("Invalid unit")
    values_in_ether = transfers['value'].apply(lambda x: float(Web3.from_wei(int(x), unit)))
    transfers['value'] = values_in_ether
    return transfers


def load_transfer_data(start_date: date, end_date: date, local_path: string) -> pd.DataFrame:
    """Loads NFT token transfer data with value in ether from start_date to end_date

    Args:
        start_date: an inclusive start date for nft token transfers
        end_date: an inclusive end date for nft token transfers
        local_path: a local path where the nft toke transfer date is located
    
    Returns:
        concatenated transfers dataframe for [start_date, end_date]

    Raises:
        ValueError: if start_date is greater than end_date
        FileNotFoundError: if local_path does not exist or if no csv file is found in the local_path
    """
    if not os.path.exists(local_path):
        raise FileNotFoundError("The local_path does not exist")
    if start_date > end_date:
        raise ValueError("The 'end_date' should be equal to or greather than 'start_date'")

    transfers_list = []
    for date_ in iterate_dates(start_date, end_date):
        fname = os.path.join(local_path, "nft_token_transfers={}.csv".format(date_.strftime("%Y-%m-%d")))
        if os.path.exists(fname):
            transfers = pd.read_csv(fname, low_memory=False)
            transfers = convert_value_from_wei(transfers, 'ether')
            transfers_list.append(transfers)
        else:
            logging.error(f"{fname} does not exist")
    
    if transfers_list:
        return pd.concat(transfers_list, ignore_index=True)
    else:
        raise FileNotFoundError(f"No csv file was loaded from {local_path}")

### Load data

In [None]:
end_date = date(2022,10,25)
start_date = end_date - timedelta(days=30)
local_path = user_local_path + "/nft-token-transfers"
download_nft_transfers(start_date, end_date, local_path)

### Compare 2 loading methods

In [5]:
%%time
df = load_transfer_data(start_date, end_date, local_path)

CPU times: user 42.8 s, sys: 3.5 s, total: 46.3 s
Wall time: 46.4 s


In [6]:
start_date

datetime.date(2022, 9, 25)

In [7]:
end_date

datetime.date(2022, 10, 25)

In [8]:
df

Unnamed: 0,amount,block_hash,block_number,block_timestamp,contract_type,from_address,log_index,operator,to_address,token_address,token_id,transaction_hash,transaction_index,transaction_type,value,verified,is_batch
0,1,0x438a438a9dffff20e3c5c3e618bf21aaf19b1a0b7bf5...,15613826,2022-09-25T23:59:47.000Z,ERC721,0x0000000000000000000000000000000000000000,239.0,,0xc6bdfd67f722298332969c52f1c1215398f4ec22,0xd4621d73a6c63dd5616b97c9045dfc50a4cc3233,7095,0x37a0540d90b6d5b71998ec2cdc037cae68a3c0ace690...,69.0,Single,0.00000,1,True
1,1,0x438a438a9dffff20e3c5c3e618bf21aaf19b1a0b7bf5...,15613826,2022-09-25T23:59:47.000Z,ERC721,0x0000000000000000000000000000000000000000,238.0,,0xc6bdfd67f722298332969c52f1c1215398f4ec22,0xd4621d73a6c63dd5616b97c9045dfc50a4cc3233,7094,0x37a0540d90b6d5b71998ec2cdc037cae68a3c0ace690...,69.0,Single,0.00000,1,True
2,1,0x438a438a9dffff20e3c5c3e618bf21aaf19b1a0b7bf5...,15613826,2022-09-25T23:59:47.000Z,ERC721,0x0eee3a65abd51492ae9ac5c43046cfb9799cc66a,193.0,,0x1240b283c420621015f69f292d8fdb523a52772e,0xd1169e5349d1cb9941f3dcba135c8a4b9eacfdde,171000100996,0xacfbbfe5ef26c3e70114709bf06e4a58c7eda6a3e384...,55.0,Single,0.58999,1,False
3,1,0x438a438a9dffff20e3c5c3e618bf21aaf19b1a0b7bf5...,15613826,2022-09-25T23:59:47.000Z,ERC721,0x28701e73d358aa0ab40c2078ece3bf7d56948618,178.0,,0xff452cc8ff7deb713b6cf5ac5aa6085c0a606786,0x79471bb4cbb351a17fa1eadb5b77aa76795c423a,1810,0xa9b2d9ae04c6703044ed192253299b5fc532b3beab72...,50.0,Single,0.01870,1,False
4,1,0x438a438a9dffff20e3c5c3e618bf21aaf19b1a0b7bf5...,15613826,2022-09-25T23:59:47.000Z,ERC721,0x0000000000000000000000000000000000000000,175.0,,0x685fbcf87d20ce3668f8b85ae61fbe54cce32114,0x7bdfec34c329b47542038114ae82d3f89aad5c13,425,0xae7d438982513b2044fc7ca670df81c1894ac114eb36...,49.0,Single,0.00000,1,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12469142,1,0x75ac773ba895f9f137f9e6ff6dd72306ed19448c83e5...,15821368,2022-10-24T23:59:59.000Z,ERC721,0x3a7a1cfa5ae8ef3e0f7ab5689c4804ec34edd52f,102.0,,0xb1a5e908cbaabfea9f0baada6614583d03b74b08,0xd88b8b8c45a0531229dbbf93b8c503bfbe17e86a,1286,0x01f94a9ac8ba5410e2dde6dbec6fb883b0075ef0b3e1...,35.0,Single,0.03800,1,False
12469143,1,0x75ac773ba895f9f137f9e6ff6dd72306ed19448c83e5...,15821368,2022-10-24T23:59:59.000Z,ERC721,0x8ce003fa2767fcbb35bf801f7eeb6a4b9eb0ec9e,98.0,,0xa5c90a02f955e02c21b29060d5f8a9725b50f2b2,0x1c3ec75c53eca5acf20b0a64bab2c6d163bb08cf,647,0xd2ff47427a019fa74a6374b10b7ce150df05e0182ab0...,29.0,Single,0.00740,1,False
12469144,1,0x75ac773ba895f9f137f9e6ff6dd72306ed19448c83e5...,15821368,2022-10-24T23:59:59.000Z,ERC721,0xcda72070e455bb31c7690a170224ce43623d0b6f,93.0,,0xee432788ab11935ab66e5a71b8ecc333694957ac,0xf8e37640d57b5719f14452c8afc29e50606e36c1,55,0x43fe01954da65a133d9487ed9b75e553ebe66951f33c...,27.0,Single,0.01000,1,False
12469145,1,0x75ac773ba895f9f137f9e6ff6dd72306ed19448c83e5...,15821368,2022-10-24T23:59:59.000Z,ERC721,0x0000000000000000000000000000000000000000,85.0,,0xd43a279416b96702bb8be822cc5c570d5e4467a3,0xb4fedc003053c22ac8b808bb424f3e1787f30cf2,16640,0x37b97b0e222e268cc25235f74221aab6f1af2acc5736...,22.0,Single,0.00000,1,False


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12469147 entries, 0 to 12469146
Data columns (total 17 columns):
 #   Column             Dtype  
---  ------             -----  
 0   amount             object 
 1   block_hash         object 
 2   block_number       int64  
 3   block_timestamp    object 
 4   contract_type      object 
 5   from_address       object 
 6   log_index          float64
 7   operator           object 
 8   to_address         object 
 9   token_address      object 
 10  token_id           object 
 11  transaction_hash   object 
 12  transaction_index  float64
 13  transaction_type   object 
 14  value              float64
 15  verified           int64  
 16  is_batch           bool   
dtypes: bool(1), float64(3), int64(2), object(11)
memory usage: 1.5+ GB


In [10]:
df.isna().sum()

amount                      0
block_hash                  0
block_number                0
block_timestamp             0
contract_type             670
from_address                0
log_index                6254
operator             10565202
to_address                  0
token_address               0
token_id                   77
transaction_hash            0
transaction_index       13520
transaction_type            0
value                       0
verified                    0
is_batch                    0
dtype: int64

In [11]:
%%time
df = df[~df.token_id.isna()]
df = df[~df.contract_type.isin(['ERC1155', 'ERC165'])]
df['nft_id'] = df['token_address'].str.cat(df['token_id'], sep="/")
df['block_timestamp'] = pd.to_datetime(df.block_timestamp)
df = df[~df.is_batch]
df = df[df.value > 0]

CPU times: user 7.04 s, sys: 1.57 s, total: 8.61 s
Wall time: 8.69 s


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1205442 entries, 2 to 12469144
Data columns (total 18 columns):
 #   Column             Non-Null Count    Dtype              
---  ------             --------------    -----              
 0   amount             1205442 non-null  object             
 1   block_hash         1205442 non-null  object             
 2   block_number       1205442 non-null  int64              
 3   block_timestamp    1205442 non-null  datetime64[ns, UTC]
 4   contract_type      1205299 non-null  object             
 5   from_address       1205442 non-null  object             
 6   log_index          1204383 non-null  float64            
 7   operator           0 non-null        object             
 8   to_address         1205442 non-null  object             
 9   token_address      1205442 non-null  object             
 10  token_id           1205442 non-null  object             
 11  transaction_hash   1205442 non-null  object             
 12  transaction_i

In [13]:
def load_non_batch_sales_data(start_date: date, end_date: date, local_path: string) -> pd.DataFrame:
    """Loads non batch sales from NFT token transfer data 
        with value in ether from start_date to end_date

    Args:
        start_date: an inclusive start date for nft token transfers
        end_date: an inclusive end date for nft token transfers
        local_path: a local path where the nft toke transfer date is located
    
    Returns:
        concatenated transfers dataframe for [start_date, end_date]

    Raises:
        ValueError: if start_date is greater than end_date
        FileNotFoundError: if local_path does not exist or if no csv file is found in the local_path
    """
    if not os.path.exists(local_path):
        raise FileNotFoundError("The local_path does not exist")
    if start_date > end_date:
        raise ValueError("The 'end_date' should be equal to or greather than 'start_date'")

    transfers_list = []
    for date_ in iterate_dates(start_date, end_date):
        fname = os.path.join(local_path, "nft_token_transfers={}.csv".format(date_.strftime("%Y-%m-%d")))
        if os.path.exists(fname):
            transfers = pd.read_csv(fname, low_memory=False)
            transfers = convert_value_from_wei(transfers, 'ether')
            transfers = transfers[~transfers.is_batch]
            transfers = transfers[transfers.value > 0]
            transfers_list.append(transfers)
        else:
            logging.error(f"{fname} does not exist")
    
    if transfers_list:
        return pd.concat(transfers_list, ignore_index=True)
    else:
        raise FileNotFoundError(f"No csv file was loaded from {local_path}")

In [14]:
%%time
df = load_non_batch_sales_data(start_date, end_date, local_path)

CPU times: user 42.8 s, sys: 2.65 s, total: 45.5 s
Wall time: 45.5 s


In [15]:
%%time
df = df[~df.token_id.isna()]
df = df[~df.contract_type.isin(['ERC1155', 'ERC165'])]
df['nft_id'] = df['token_address'].str.cat(df['token_id'], sep="/")
df['block_timestamp'] = pd.to_datetime(df.block_timestamp)

CPU times: user 895 ms, sys: 58.5 ms, total: 953 ms
Wall time: 952 ms


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1205442 entries, 0 to 1313798
Data columns (total 18 columns):
 #   Column             Non-Null Count    Dtype              
---  ------             --------------    -----              
 0   amount             1205442 non-null  object             
 1   block_hash         1205442 non-null  object             
 2   block_number       1205442 non-null  int64              
 3   block_timestamp    1205442 non-null  datetime64[ns, UTC]
 4   contract_type      1205299 non-null  object             
 5   from_address       1205442 non-null  object             
 6   log_index          1204383 non-null  float64            
 7   operator           0 non-null        object             
 8   to_address         1205442 non-null  object             
 9   token_address      1205442 non-null  object             
 10  token_id           1205442 non-null  object             
 11  transaction_hash   1205442 non-null  object             
 12  transaction_in

### Get purchases and sales

In [17]:
genesis_addresses = [
    "0x0000000000000000000000000000000000000002",
    "0x0000000000000000000000000000000000000003",
    "0x0000000000000000000000000000000000000004",
    "0x0000000000000000000000000000000000000005",
    "0x0000000000000000000000000000000000000006",
    "0x0000000000000000000000000000000000000007",
    "0x0000000000000000000000000000000000000008",
    "0x0000000000000000000000000000000000000009",
    "0x0000000000000000000000000000000000000000",
    "0x3333333333333333333333333333333333333333",
    "0x4444444444444444444444444444444444444444",
    "0x8888888888888888888888888888888888888888",
]
burn_addresses = [
    "0x000000000000000000000000000000000000cdad",
    "0x000000000000000000000000000000000000dead",
    "0x0000000000000000000000000000000000000001",
    "0x0000000000000000000000000000000000000002",
    "0x0000000000000000000000000000000000000003",
    "0x0000000000000000000000000000000000000004",
    "0x0000000000000000000000000000000000000005",
    "0x0000000000000000000000000000000000000006",
    "0x0000000000000000000000000000000000000007",
    "0x0000000000000000000000000000000000000008",
    "0x0000000000000000000000000000000000000009",
    "0x0000000000000000000000000000000000000000",
    "0x00000000000000000000045261d4ee77acdb3286",
    "0x0123456789012345678901234567890123456789",
    "0x1111111111111111111111111111111111111111",
    "0x1234567890123456789012345678901234567890",
    "0x2222222222222222222222222222222222222222",
    "0x3333333333333333333333333333333333333333",
    "0x4444444444444444444444444444444444444444",
    "0x6666666666666666666666666666666666666666",
    "0x8888888888888888888888888888888888888888",
    "0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
    "0xbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
    "0xeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee",
    "0xffffffffffffffffffffffffffffffffffffffff",
    "0xdead000000000000000042069420694206942069",
]
null_addresses = list(set(genesis_addresses + burn_addresses))

In [18]:
%%time
nft_purchases = df[~df.to_address.isin(null_addresses)]\
                    [['to_address', 'block_timestamp', 'token_address', 'value', 'nft_id']]\
                    .rename(columns={'to_address': 'wallet_address',
                                     'block_timestamp': 'purchase_time', 
                                     'token_address': 'collection_id', 
                                     'value': 'purchase_value'})
nft_purchases = nft_purchases[~nft_purchases.index.duplicated(keep='first')]

CPU times: user 510 ms, sys: 39.2 ms, total: 549 ms
Wall time: 549 ms


In [19]:
%%time
nft_sales = df[~df.from_address.isin(null_addresses)]\
                    [['from_address', 'block_timestamp', 'value', 'nft_id']]\
                    .rename(columns={'from_address': 'wallet_address',
                                     'block_timestamp': 'sale_time',
                                     'value': 'sale_value'})
nft_sales = nft_sales[~nft_sales.index.duplicated(keep='first')]

CPU times: user 260 ms, sys: 27 ms, total: 287 ms
Wall time: 286 ms


In [20]:
%%time
nft_pns = nft_purchases.merge(nft_sales, how='outer', on=['wallet_address','nft_id'])

CPU times: user 899 ms, sys: 78.8 ms, total: 978 ms
Wall time: 987 ms


In [21]:
nft_pns.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1954643 entries, 0 to 1954642
Data columns (total 7 columns):
 #   Column          Dtype              
---  ------          -----              
 0   wallet_address  object             
 1   purchase_time   datetime64[ns, UTC]
 2   collection_id   object             
 3   purchase_value  float64            
 4   nft_id          object             
 5   sale_time       datetime64[ns, UTC]
 6   sale_value      float64            
dtypes: datetime64[ns, UTC](2), float64(2), object(3)
memory usage: 119.3+ MB


In [22]:
%%time
nft_pns['hold_time'] = nft_pns['sale_time'] - nft_pns['purchase_time']
nft_pns = nft_pns[(nft_pns.hold_time.isna()) | (nft_pns.hold_time > pd.Timedelta(0,'s'))]

CPU times: user 190 ms, sys: 36.6 ms, total: 226 ms
Wall time: 226 ms


In [23]:
%%time
is_owned = nft_pns.sale_time.isna().rename('is_owned')
nft_pns = nft_pns.join(is_owned)

CPU times: user 96 ms, sys: 17.4 ms, total: 113 ms
Wall time: 112 ms


### Whales and NFT Collectors

In [24]:
candidate_pns = nft_pns[nft_pns.is_owned].copy()

In [25]:
candidate_pns

Unnamed: 0,wallet_address,purchase_time,collection_id,purchase_value,nft_id,sale_time,sale_value,hold_time,is_owned
0,0x1240b283c420621015f69f292d8fdb523a52772e,2022-09-25 23:59:47+00:00,0xd1169e5349d1cb9941f3dcba135c8a4b9eacfdde,0.58999,0xd1169e5349d1cb9941f3dcba135c8a4b9eacfdde/171...,NaT,,NaT,True
2,0xbaaf84e8e2467f4c71506cd8ea54d367f53d9ee9,2022-09-25 23:59:47+00:00,0xc101916cd9ddeac5a6f915eed033b1b6e4a637cb,0.41000,0xc101916cd9ddeac5a6f915eed033b1b6e4a637cb/7021,NaT,,NaT,True
3,0x7103964efae2acd944ce03ea0e3c3f4ad2c4a2ae,2022-09-25 23:59:47+00:00,0xc36442b4a4522e871399cd717abdd847ab11fe88,0.01500,0xc36442b4a4522e871399cd717abdd847ab11fe88/323988,NaT,,NaT,True
4,0x560745f9a1cec04879c4a85299859db336da903c,2022-09-25 23:59:47+00:00,0xb9b2267d3c35d1fb0bbf752b6205eea91a82cd84,0.00400,0xb9b2267d3c35d1fb0bbf752b6205eea91a82cd84/4213,NaT,,NaT,True
6,0xf77bb93d483b991a3bcb72e8d17f619774582944,2022-09-25 23:59:35+00:00,0xfd4b8f7522c9398c0ad38ad63548cd8b226feb93,0.01200,0xfd4b8f7522c9398c0ad38ad63548cd8b226feb93/291,NaT,,NaT,True
...,...,...,...,...,...,...,...,...,...
1215359,0x092cff73c77a9de794d25b0088ded0e430733dbb,2022-10-24 23:59:59+00:00,0x67421c8622f8e38fe9868b4636b8dc855347d570,0.10990,0x67421c8622f8e38fe9868b4636b8dc855347d570/2629,NaT,,NaT,True
1215360,0xa3dbde2b97f164771c39ac5f8cbdc9a7e158f483,2022-10-24 23:59:59+00:00,0x3c9075f574edc2f630f918ebac5ccf1095d82cc2,0.01000,0x3c9075f574edc2f630f918ebac5ccf1095d82cc2/2558,NaT,,NaT,True
1215361,0xb1a5e908cbaabfea9f0baada6614583d03b74b08,2022-10-24 23:59:59+00:00,0xd88b8b8c45a0531229dbbf93b8c503bfbe17e86a,0.03800,0xd88b8b8c45a0531229dbbf93b8c503bfbe17e86a/1286,NaT,,NaT,True
1215362,0xa5c90a02f955e02c21b29060d5f8a9725b50f2b2,2022-10-24 23:59:59+00:00,0x1c3ec75c53eca5acf20b0a64bab2c6d163bb08cf,0.00740,0x1c3ec75c53eca5acf20b0a64bab2c6d163bb08cf/647,NaT,,NaT,True


In [26]:
candidate_pns['is_high_value'] = candidate_pns.purchase_value > candidate_pns.purchase_value.quantile(.975)

In [27]:
candidates = candidate_pns.groupby('wallet_address').is_high_value.sum().rename('num_high_value').to_frame()

In [28]:
candidates['num_collections'] = candidate_pns.groupby('wallet_address').collection_id.nunique()

In [29]:
candidates['num_owned'] = candidate_pns.groupby('wallet_address').is_owned.sum()

In [30]:
hv_cut = candidates.num_high_value.quantile(.95)

In [31]:
nc_cut = candidates.num_collections.quantile(.9)

In [32]:
nc_cut

5.0

In [33]:
candidates

Unnamed: 0_level_0,num_high_value,num_collections,num_owned
wallet_address,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0x0000000000006e543164be036824fcf832e67e47,0,1,1
0x000000000000d34c44564053af35e4fe271d0caa,12,1,12
0x00000000000a01757661bd51345cd95c40320480,0,2,2
0x00000000008c1ed3b68ae8bd7e74e595bd11b066,0,2,2
0x0000000000a0136c855efb4596d1da5f5b703262,0,1,1
...,...,...,...
0xfffed1a6b1e22c7ddb1ee12be2958dfd32083e36,0,2,10
0xffff41988852d624b0e640e895eb4d18f7da077e,0,1,1
0xffff431202204ea74698b0fb1ebfe6a841206bcb,0,1,1
0xffffe388e1e4cfaab94f0b883d28b8a424cb45a1,0,2,2


In [34]:
whales = candidates[(candidates['num_high_value'] > hv_cut) & (candidates['num_collections'] > nc_cut)]

In [35]:
whales.describe()

Unnamed: 0,num_high_value,num_collections,num_owned
count,679.0,679.0,679.0
mean,4.521355,13.369661,33.322533
std,7.775371,16.705157,65.140492
min,2.0,6.0,6.0
25%,2.0,7.0,12.0
50%,3.0,9.0,19.0
75%,4.0,13.0,34.5
max,149.0,305.0,1280.0


In [36]:
whales

Unnamed: 0_level_0,num_high_value,num_collections,num_owned
wallet_address,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0x003ef9575e9535636651c0c15ea376266daa09f8,2,7,28
0x0064f02799ea7748a9b51b5e78bcd274d9e7d0a1,5,6,6
0x00e484da1156202e9dd341ad7ea9c908bb919e96,7,13,74
0x013598ea3fd28608c23ebac199c5b4218e2ade3a,12,16,38
0x016099f9c894285b4bcf82424a4dbde44195ebbc,5,8,131
...,...,...,...
0xff1777854bdc3507d581e3a762b0f85832302611,6,6,13
0xff567d26a66a556afe5b9183db01370aa78d7bda,7,12,25
0xffb6d97bd1e7b7bd08595096d15037401a1f416b,5,6,10
0xffba913bb056544b75e57312ec3eae2528c285e1,2,6,8


In [37]:
whales.to_csv(user_local_path+'/whales.csv')

In [38]:
no_cut = candidates['num_owned'].quantile(.98)

In [39]:
collectors = candidates[(candidates['num_collections'] > nc_cut) & (candidates['num_owned'] > no_cut)]

In [40]:
collectors

Unnamed: 0_level_0,num_high_value,num_collections,num_owned
wallet_address,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0x0013b99e50372d609f1e1a8af83544878e99fcf7,0,19,50
0x0013fadb2294a00005b334f40f767a3eb4f6bbc4,0,12,30
0x001a181ab8c41045e26dd2245ffcc12818ea742f,0,30,73
0x002c43d42c425e20052f0db04fd01dc66d43c8f5,0,12,32
0x00385f60f4b5234e96989805af7328f7afd742b1,0,30,39
...,...,...,...
0xff67ddc700a58450edcfd7055028e190b1b31e45,0,10,28
0xffc226d4c72e637582166ae68b7c638d46176585,0,15,49
0xffd19e673ca7d6780640c9c659dacb07578557e8,0,20,27
0xffdef5e9eeaceac0800a2f784105105dbaf1d884,1,107,175


In [41]:
collectors.to_csv(user_local_path+'/collectors.csv')

In [42]:
collectors.describe()

Unnamed: 0,num_high_value,num_collections,num_owned
count,4058.0,4058.0,4058.0
mean,0.463036,21.439133,51.011582
std,3.42823,14.42813,44.889678
min,0.0,6.0,26.0
25%,0.0,12.0,30.0
50%,0.0,19.0,38.0
75%,0.0,26.0,55.0
max,149.0,305.0,1280.0


In [43]:
len(set(collectors.index).intersection(set(whales.index)))

243

### NFT Flippers

In [44]:
candidate_pns = nft_pns[~nft_pns['hold_time'].isna()].copy()

In [45]:
candidate_pns.hold_time.quantile(.3)

Timedelta('0 days 02:04:12')

In [46]:
candidate_pns['is_flipped'] = candidate_pns.hold_time < candidate_pns.hold_time.quantile(.3)

In [47]:
candidate_pns['profit'] = candidate_pns['sale_value'] - candidate_pns['purchase_value']

In [48]:
candidate_pns['is_profit'] = candidate_pns['profit'] > 0

In [49]:
candidate_pns

Unnamed: 0,wallet_address,purchase_time,collection_id,purchase_value,nft_id,sale_time,sale_value,hold_time,is_owned,is_flipped,profit,is_profit
1,0xff452cc8ff7deb713b6cf5ac5aa6085c0a606786,2022-09-25 23:59:47+00:00,0x79471bb4cbb351a17fa1eadb5b77aa76795c423a,0.01870,0x79471bb4cbb351a17fa1eadb5b77aa76795c423a/1810,2022-10-01 09:04:47+00:00,0.0125,5 days 09:05:00,False,False,-0.00620,False
5,0x92237f6261db6081421da708f1c0c58fd784e6f3,2022-09-25 23:59:35+00:00,0x57f1887a8bf19b14fc0df6fd9b2acc9af147ea85,0.00080,0x57f1887a8bf19b14fc0df6fd9b2acc9af147ea85/389...,2022-10-16 23:49:23+00:00,0.0008,20 days 23:49:48,False,False,0.00000,False
9,0x3a1309b56e851e4ce35f06bf8a59026508b1bb66,2022-09-25 23:59:35+00:00,0x9121c7f5976692bc7f97cd380d19a7a7e1d0e0f5,0.00100,0x9121c7f5976692bc7f97cd380d19a7a7e1d0e0f5/8725,2022-10-08 11:18:11+00:00,0.0030,12 days 11:18:36,False,False,0.00200,True
17,0xcb37c667383e37cf11a0c1f30bed85db84ee7c80,2022-09-25 23:59:11+00:00,0xf13f29330dca76be26a6c7e268da836aef978e11,0.19300,0xf13f29330dca76be26a6c7e268da836aef978e11/280,2022-09-26 14:24:35+00:00,0.2155,0 days 14:25:24,False,False,0.02250,True
25,0x81147564788a81bb4aa78b7ecb35ce849fcb9aa9,2022-09-25 23:58:59+00:00,0xf13f29330dca76be26a6c7e268da836aef978e11,0.20000,0xf13f29330dca76be26a6c7e268da836aef978e11/649,2022-09-26 00:40:59+00:00,0.3330,0 days 00:42:00,False,True,0.13300,True
...,...,...,...,...,...,...,...,...,...,...,...,...
1214954,0x763d39179b63fe0ad612a7e7c963a6e69b2c9c95,2022-10-25 00:24:47+00:00,0x248139afb8d3a2e16154fbe4fb528a3a214fd8e7,0.13000,0x248139afb8d3a2e16154fbe4fb528a3a214fd8e7/1079,2022-10-25 12:37:11+00:00,0.1390,0 days 12:12:24,False,False,0.00900,True
1214980,0x8e49d11acd326f242fa48509fd3b40f07c6f9e6d,2022-10-25 00:23:11+00:00,0xeb4e856f69158052ac0aaf7dc26f63dcb1ee067f,0.75000,0xeb4e856f69158052ac0aaf7dc26f63dcb1ee067f/1339,2022-10-25 03:16:59+00:00,0.7520,0 days 02:53:48,False,False,0.00200,True
1215153,0x08e1e8fee2d1862c3e45fc246488b1af179502e1,2022-10-25 00:10:35+00:00,0x386358700ecd5e1b157494c4124dc545d9654e59,0.00089,0x386358700ecd5e1b157494c4124dc545d9654e59/1831,2022-10-25 10:01:11+00:00,0.0010,0 days 09:50:36,False,False,0.00011,True
1215233,0xe58299ab678228081d9c682bc77d5dd6b1f3d6a5,2022-10-25 00:07:47+00:00,0xbbaec9eac5c2cb4464be1f09158d9d61ac429a20,0.00660,0xbbaec9eac5c2cb4464be1f09158d9d61ac429a20/262,2022-10-25 00:16:11+00:00,0.0400,0 days 00:08:24,False,True,0.03340,True


In [50]:
candidates = candidate_pns.groupby('wallet_address').is_flipped.sum().rename('num_flipped').to_frame()

In [51]:
candidates['num_collection'] = candidate_pns.groupby('wallet_address').collection_id.nunique()

In [52]:
candidates['num_profit'] = candidate_pns.groupby('wallet_address').is_profit.sum()

In [53]:
candidates['avg_profit'] = candidate_pns.groupby('wallet_address').profit.mean()
candidates['tot_profit'] = candidate_pns.groupby('wallet_address').profit.sum()

In [54]:
candidates.describe()

Unnamed: 0,num_flipped,num_collection,num_profit,avg_profit,tot_profit
count,57848.0,57848.0,57848.0,57848.0,57848.0
mean,1.000899,2.306873,2.212989,0.052212,0.246014
std,3.540099,3.642366,10.726882,0.58479,96.862618
min,0.0,1.0,0.0,-68.886936,-11965.6
25%,0.0,1.0,1.0,7.9e-05,0.0001
50%,0.0,1.0,1.0,0.015951,0.0256
75%,1.0,2.0,2.0,0.046,0.1
max,259.0,139.0,1660.0,22.367075,13319.778256


In [55]:
nc_cut = candidates.num_collection.quantile(.90)

In [56]:
nc_cut

5.0

In [57]:
nf_cut = candidates.num_flipped.quantile(.98)
nf_cut

7.0

In [58]:
nft_flippers = candidates[(candidates.num_collection > nc_cut) & (candidates.num_flipped > nf_cut)]

In [59]:
nft_flippers.to_csv(user_local_path+'/flippers.csv')

In [60]:
nft_flippers.describe()

Unnamed: 0,num_flipped,num_collection,num_profit,avg_profit,tot_profit
count,946.0,946.0,946.0,946.0,946.0
mean,17.978858,20.32241,23.403805,0.021635,0.75208
std,17.650324,13.104653,22.073464,0.091373,4.723097
min,8.0,6.0,2.0,-0.860067,-50.743963
25%,9.0,11.0,11.0,0.004635,0.103274
50%,12.0,17.0,17.0,0.01177,0.305311
75%,20.0,25.0,27.0,0.023722,0.755104
max,259.0,139.0,298.0,2.019387,129.24075


In [61]:
nft_flippers

Unnamed: 0_level_0,num_flipped,num_collection,num_profit,avg_profit,tot_profit
wallet_address,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0x0000a83deaa073245cfbde660fd8daf09d78de00,9,13,22,0.000052,0.002405
0x00385f60f4b5234e96989805af7328f7afd742b1,9,25,12,0.000619,0.022300
0x004eb19e583c7913e8c30e413ab11b3140b5a3ba,11,10,9,0.000799,0.010390
0x00794b7b900165f452e2b02cc191e6b8b6f8f869,9,18,14,0.000496,0.010410
0x01660cc34a2ae458dc040589f65d3b31cb08b5fb,17,18,31,0.012573,0.502900
...,...,...,...,...,...
0xff169015209d26c867edb261e702df4446674507,8,6,8,0.000500,0.008506
0xff274763062ab364fe17d17cc1cc10fa9e41e040,24,24,25,0.013865,0.388217
0xffdef5e9eeaceac0800a2f784105105dbaf1d884,31,45,70,0.029537,2.481069
0xffe973085371376e187f937abe7e2175876483e2,8,28,20,0.007299,0.248162


In [62]:
nf_cut = nft_flippers.num_profit.quantile(.5)
ap_cut = nft_flippers.avg_profit.quantile(.5)

In [63]:
profitable_nft_flippers = nft_flippers[(nft_flippers.num_flipped > nf_cut) & (nft_flippers.avg_profit > ap_cut)]

In [64]:
profitable_nft_flippers.to_csv(user_local_path+'/profitable_flippers.csv')

In [65]:
profitable_nft_flippers.describe()

Unnamed: 0,num_flipped,num_collection,num_profit,avg_profit,tot_profit
count,152.0,152.0,152.0,152.0,152.0
mean,39.197368,32.559211,49.335526,0.042923,2.68891
std,29.635543,17.328052,36.208913,0.16371,10.46972
min,18.0,9.0,16.0,0.012186,0.383121
25%,22.0,21.0,28.75,0.016277,0.75488
50%,28.0,27.5,40.0,0.022108,1.246962
75%,45.0,39.0,57.25,0.031568,2.417686
max,259.0,139.0,298.0,2.019387,129.24075
