In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests

In [3]:
from dotenv import load_dotenv
import os

load_dotenv()

True

In [4]:
OPENSEA_API_KEY = os.getenv('OPENSEA_API_KEY')
DUNE_API_KEY = os.getenv('DUNE_API_KEY')

# Trades data
I wrote a custom SQL query on Dune Analytics (open database for onchain data): https://dune.com/queries/3124232

In [2]:
from dune_client.types import QueryParameter
from dune_client.client import DuneClient
from dune_client.query import QueryBase

query = QueryBase(
    query_id = 3124232,
    params = [
        QueryParameter.text_type(
            name="nft_contract_address",
            value="0xbc4ca0eda7647a8ab7c2061c2e118a18a936f13d"
            ),
        QueryParameter.text_type(
            name="end_date",
            value="2023-09-30"
        )
    ]
)

dune = DuneClient.from_env()
df = dune.run_query_dataframe(query)

2023-10-24 19:14:56,631 INFO numexpr.utils NumExpr defaulting to 8 threads.
2023-10-24 19:14:57,712 INFO dune_client.api.base executing 3124232 on medium cluster
2023-10-24 19:14:58,819 INFO dune_client.api.base waiting for query execution 01HDJ2YFSQ3ZY6QY87W2KFBDQW to complete: ExecutionState.EXECUTING
2023-10-24 19:15:00,461 INFO dune_client.api.base waiting for query execution 01HDJ2YFSQ3ZY6QY87W2KFBDQW to complete: ExecutionState.EXECUTING
2023-10-24 19:15:01,996 INFO dune_client.api.base waiting for query execution 01HDJ2YFSQ3ZY6QY87W2KFBDQW to complete: ExecutionState.EXECUTING
2023-10-24 19:15:03,433 INFO dune_client.api.base waiting for query execution 01HDJ2YFSQ3ZY6QY87W2KFBDQW to complete: ExecutionState.EXECUTING
2023-10-24 19:15:04,931 INFO dune_client.api.base waiting for query execution 01HDJ2YFSQ3ZY6QY87W2KFBDQW to complete: ExecutionState.EXECUTING
2023-10-24 19:15:06,399 INFO dune_client.api.base waiting for query execution 01HDJ2YFSQ3ZY6QY87W2KFBDQW to complete: Execu

In [5]:
# save data as csv
df.to_csv("data/bayc.csv")

In [19]:
# re-read data (if needed)
df = pd.read_csv("data/bayc.csv", index_col=0)

In [20]:
# clean columns
df.rename(columns={'amount_original':'trade_price'}, inplace=True)
df = df[df['trade_price']!='<nil>']
df['trade_price'] = df['trade_price'].astype(float)
df['block_time'] = pd.to_datetime(df['block_time'])
df['block_date'] = pd.to_datetime(df['block_date'])
df['block_month'] = pd.to_datetime(df['block_month'])
# drop observation if trade_price above max_price or below min_price
df = df[(df['trade_price'] <= df['price_max_eth']) & (df['trade_price'] >= df['price_min_eth'])]

In [21]:
# add variables: price_lastsale, time_lastsale
# note: dataset is pre-sorted by token_id and block_time
df['last_trade_price'] = df.groupby('token_id')['trade_price'].shift(1)
df['last_trade_time'] = df.groupby('token_id')['block_time'].shift(1)
df['last_trade_timediff'] = df['block_time'] - df['last_trade_time']
df['last_trade_timediff'] = df['last_trade_timediff'].dt.total_seconds() / 86400

In [22]:
df.dtypes

project                             object
version                             object
block_date                  datetime64[ns]
block_month                 datetime64[ns]
block_time             datetime64[ns, UTC]
token_id                             int64
collection                          object
amount_usd                          object
token_standard                      object
trade_type                          object
number_of_items                     object
trade_category                      object
evt_type                            object
trade_price                        float64
currency_symbol                     object
tx_hash                             object
volume_eth                         float64
price_p5_eth                       float64
price_min_eth                      float64
price_max_eth                      float64
last_trade_price                   float64
last_trade_time        datetime64[ns, UTC]
last_trade_timediff                float64
dtype: obje

# Traits/rarity data
from Opensea API: https://docs.opensea.io/reference/get_nft

In [5]:
BAYC_ADDRESS = "0xbc4ca0eda7647a8ab7c2061c2e118a18a936f13d"

In [32]:
hdrs = {
        "accept": "application/json",
        "x-api-key": OPENSEA_API_KEY
    }

def get_traits_data(nft_address, token_id, headers=hdrs):

    url = f"https://api.opensea.io/api/v2/chain/ethereum/contract/{nft_address}/nfts/{token_id}"
    
    try:
        response = requests.get(url, headers=headers)
        data = dict(response.json())
    
        traits = {}
        traits["token_id"] = token_id
        traits['rarity_rank'] = data['nft']['rarity']['rank']
        for trait_dict in data['nft']['traits']:
            trait_type = trait_dict['trait_type']
            trait_value = trait_dict['value']
            trait_count = trait_dict['trait_count']
            traits[trait_type+"_value"] = trait_value
            traits[trait_type+"_count"] = trait_count
    
    except:
        print(f"An error occurred: {response.status_code}")
        print(response.text)
        return None

    return traits

In [7]:
get_traits_data(BAYC_ADDRESS, 5734)

{'token_id': 5734,
 'rarity_rank': 5411,
 'Eyes_value': 'Bloodshot',
 'Eyes_count': 846,
 'Hat_value': 'Short Mohawk',
 'Hat_count': 318,
 'Background_value': 'Yellow',
 'Background_count': 1283,
 'Fur_value': 'Brown',
 'Fur_count': 1370,
 'Clothes_value': 'Sleeveless Logo T',
 'Clothes_count': 144,
 'Mouth_value': 'Phoneme Vuh',
 'Mouth_count': 333}

In [33]:
import time
from tqdm import tqdm

delay = 0.5 
rows = []
for id in tqdm(set(df['token_id'].values)):
    # sleep & retry if hit rate limit
    while True:
        traits = get_traits_data(BAYC_ADDRESS, id)
        if traits is not None:
            rows.append(traits)
            break
        else:
            time.sleep(delay)

traits_df = pd.DataFrame(rows)
traits_df.to_csv("data/bayc_traits.csv")

  5%|▌         | 494/9045 [00:57<15:39,  9.10it/s]

An error occurred: 429
{"detail":"Request was throttled. Expected available in 0 seconds."}


  7%|▋         | 672/9045 [01:19<16:06,  8.66it/s]

An error occurred: 429
{"detail":"Request was throttled. Expected available in 0 seconds."}


  8%|▊         | 694/9045 [01:22<17:47,  7.82it/s]

An error occurred: 429
{"detail":"Request was throttled. Expected available in 0 seconds."}


 34%|███▎      | 3035/9045 [06:17<13:02,  7.68it/s]

An error occurred: 429
{"detail":"Request was throttled. Expected available in 0 seconds."}


 36%|███▌      | 3226/9045 [06:43<11:45,  8.25it/s]

An error occurred: 429
{"detail":"Request was throttled. Expected available in 0 seconds."}


 36%|███▌      | 3240/9045 [06:45<12:23,  7.81it/s]

An error occurred: 429
{"detail":"Request was throttled. Expected available in 0 seconds."}


100%|██████████| 9045/9045 [19:08<00:00,  7.87it/s]
