In [1]:
import pandas as pd
import numpy as np
import random
import os
import sys
import requests
import time
import datetime as dt

from dotenv import load_dotenv
from flipside import Flipside
from prophet import Prophet

from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split, GridSearchCV

# from utils import flipside_api_results, set_random_seed
# from sql_queries.sql_scripts import three_dns_sales

Importing plotly failed. Interactive plots will not work.


In [2]:
def flipside_api_results(query, api_key):

  flipside_api_key = api_key
  flipside = Flipside(flipside_api_key, "https://api-v2.flipsidecrypto.xyz")

  query_result_set = flipside.query(query)
  # what page are we starting on?
  current_page_number = 1

  # How many records do we want to return in the page?
  page_size = 1000

  # set total pages to 1 higher than the `current_page_number` until
  # we receive the total pages from `get_query_results` given the
  # provided `page_size` (total_pages is dynamically determined by the API
  # based on the `page_size` you provide)

  total_pages = 2


  # we'll store all the page results in `all_rows`
  all_rows = []

  while current_page_number <= total_pages:
    results = flipside.get_query_results(
      query_result_set.query_id,
      page_number=current_page_number,
      page_size=page_size
    )

    total_pages = results.page.totalPages
    if results.records:
        all_rows = all_rows + results.records

    current_page_number += 1

  return pd.DataFrame(all_rows)

def set_random_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    # torch.manual_seed(seed)
    # if torch.cuda.is_available():
    #     torch.cuda.manual_seed(seed)
    #     torch.cuda.manual_seed_all

In [3]:
pd.options.display.float_format = '{:,.2f}'.format
three_dns_sales = """

  SELECT
    DATE_TRUNC('HOUR', BLOCK_TIMESTAMP) AS day, tokenid, price
  FROM
    optimism.nft.ez_nft_sales
  WHERE
    NFT_ADDRESS = LOWER('0xBB7B805B257d7C76CA9435B3ffe780355E4C4B17')
    AND event_type = 'sale'
"""

In [4]:
current_directory = os.getcwd()
current_directory

'e:\\Projects\\liquid_domains'

In [5]:
load_dotenv()

True

In [6]:
seed = 20
set_random_seed(seed)

In [7]:
flipside_api_key = os.getenv('FLIPSIDE_API_KEY')
alchemy_api_key = os.getenv('ALCHEMY_API_KEY')
opensea_api_key = os.getenv('OPENSEA_API_KEY')

print(alchemy_api_key)

6AUlaGmWe505S7gRPZXVh4YEFgJdYHy5


In [8]:
def alchemy_metadata_api(api_key, network, contract_address):
    if network == 'optimism':
        network = 'opt'
    elif network == 'ethereum':
        network = 'eth'
    elif network == 'base':
        network = 'base'
    # Replace with your actual API key
    base_url = f"https://{network}-mainnet.g.alchemy.com/nft/v3/{api_key}/getNFTsForContract"
    print(f'Base URL: {base_url}')
    headers = {"accept": "application/json"}

    # Pagination parameters
    page_key = None  # Initial key for pagination
    limit = 100  # Set the limit for the number of NFTs per request
    api_data = []  # To store all NFTs

    while True:
        params = {
            "contractAddress": contract_address,
            "withMetadata": "true",
            "limit": limit
        }

        if page_key:
            params["pageKey"] = page_key

        response = requests.get(base_url, headers=headers, params=params)
        data = response.json()

        if "nfts" in data:
            api_data.extend(data["nfts"])
            # print(data["nfts"])

        # Check if there's a next page key for pagination
        page_key = data.get("pageKey", None)

        if page_key is None:
            break

        print(f'Number added: {len(data["nfts"])} | Total number of NFTs: {len(api_data)}, Next page key: {page_key}')

    # Now `api_data` contains all the NFTs retrieved from the paginated API calls
    print(f"Total NFTs retrieved: {len(api_data)}")

    # Function to get metadata from tokenUri
    def fetch_metadata(token_uri):
        try:
            response = requests.get(token_uri)
            metadata = response.json()
            return metadata
        except:
            return {'name': 'No name available'}

    # Extract tokenId, name, and tokenUri from each NFT
    nft_info = []
    for nft in api_data:
        token_id = nft.get('tokenId', 'No token ID available')
        token_name = nft.get('name', 'No token ID available')


        nft_info.append({'tokenId': token_id, 'name': token_name})

    # Create DataFrame
    df = pd.DataFrame(nft_info)

    return df

optimism_name_service_metadata = alchemy_metadata_api(alchemy_api_key, 'optimism', '0xC16aCAdf99E4540E6f4E6Da816fd6D2A2C6E1d4F')

Three_DNS_metadata = alchemy_metadata_api(alchemy_api_key, 'optimism', '0xBB7B805B257d7C76CA9435B3ffe780355E4C4B17')

optimistic_domains = alchemy_metadata_api(alchemy_api_key, 'optimism', '0xC16aCAdf99E4540E6f4E6Da816fd6D2A2C6E1d4F')

In [9]:
Optimistic_domains_path = 'data/optimistic_domains_metadata.json'
# optimistic_domains.to_json(Optimistic_domains_path, orient='records')
optimistic_domains = pd.read_json(Optimistic_domains_path, orient='records')
# optimistic_domains.drop(columns=['tokenUri'], inplace=True)
optimistic_domains

Unnamed: 0,tokenId,name
0,0,vitalik.op
1,1,cardenas.op
2,2,davidcardenas.op
3,3,bitcoin.op
4,4,daffy.op
...,...,...
1321,1321,agency.op
1322,1322,trump.op
1323,1323,send.op
1324,1324,mint.op


In [10]:
domain_path = 'data/domain-name-sales.tsv'
domain_data = pd.read_csv(domain_path, delimiter='\t')

In [11]:
domain_data.set_index('date', inplace=True)
domain_data = domain_data.drop(columns=['venue'])
domain_data.sort_index(inplace=True)
domain_data

Unnamed: 0_level_0,domain,price
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1999-04-01,altavista.com,3250000
1999-04-01,bingo.com,1100000
1999-11-01,fly.com,1500000
1999-12-01,autos.com,2200000
1999-12-01,england.com,2000000
...,...,...
2021-01-01,yes.movie,253
2021-01-01,yopal.com,405
2021-01-01,yougraph.com,1161
2021-01-01,zenvie.com,349


In [12]:
def fetch_event_type(api_key, collection, event_type, all_events, params, headers):
    base_url = f"https://api.opensea.io/api/v2/events/collection/{collection}"
    params['event_type'] = event_type

    # Load the last timestamp/identifier

    page_count = 0
    while True:
        response = requests.get(base_url, headers=headers, params=params)
        if response.status_code == 200:
            data = response.json()
            fetched_events = data.get("asset_events", [])
            all_events.extend(fetched_events)

            if fetched_events:
                # Update the last timestamp/identifier to the latest one fetched
                last_event_time = fetched_events[-1].get("created_date")

            page_count += 1
            next_cursor = data.get("next")
            print(f"Fetching {event_type}: Page {page_count}, Events Fetched: {len(fetched_events)}, Total Events: {len(all_events)}, next cursor: {next_cursor}")

            if next_cursor:
                params['next'] = next_cursor
            else:
                break  # No more pages to fetch

            time.sleep(1)  # Delay between requests
        else:
            print(f"Failed to fetch {event_type} data: HTTP {response.status_code}, Response: {response.text}")
            break

def clean_data(domain_df):
    domain_df['nft_identifier'] = domain_df['nft'].apply(lambda x: x.get('identifier', 'No identifier available') if x else 'No identifier available')
    domain_df['nft_name'] = domain_df['nft'].apply(lambda x: x.get('name', 'No name available') if x else 'No name available')
    domain_df['token_amt_raw'] = domain_df['payment'].apply(lambda x: x.get('quantity', 'No name available') if x else 'No name available')
    domain_df['token_symbol'] = domain_df['payment'].apply(lambda x: x.get('symbol', 'No name available') if x else 'No name available')
    domain_df['token_decimals'] = domain_df['payment'].apply(lambda x: x.get('decimals', 'No name available') if x else 'No name available')
    domain_df['dt'] = pd.to_datetime(domain_df['event_timestamp'], unit='s')

    def wei_to_ether(quantity, decimals):
        try:
            return int(quantity) / (10 ** decimals)
        except ValueError:
            return None

    domain_df['token_amt_clean'] = domain_df.apply(lambda row: wei_to_ether(row['token_amt_raw'], row['token_decimals']) if row['token_amt_raw'] != 'No name available' and row['token_decimals'] != 'No name available' else None, axis=1)
    domain_df.dropna(inplace=True)
    return domain_df

# Display the updated DataFrame




def fetch_all_events(api_key, collection):
    headers = {
        "accept": "application/json",
        "x-api-key": api_key
    }
    params = {
        "limit": 50  # Adjust the limit as needed
    }

    all_events = []

    # Fetch listings
    # fetch_event_type(api_key, collection, "listing", all_events, params.copy(), headers)

    # Fetch sales
    fetch_event_type(api_key, collection, "sale", all_events, params.copy(), headers)

    # Save the fetched events to a DataFrame
    print(f"Total events fetched: {len(all_events)}")
    df = pd.DataFrame(all_events)
    clean_df = clean_data(df)
    return clean_df





optimism_name_service_data = fetch_all_events(api_key=opensea_api_key,collection='optimism-name-service')


In [13]:
optimism_name_service_path = 'data/optimism_name_service_metadata.json'
# optimism_name_service_data.to_json(optimism_name_service_path, orient='records')
optimism_name_service_data = pd.read_json(optimism_name_service_path, orient='records')
optimism_name_service_data = optimism_name_service_data[['dt','token_symbol','token_amt_clean','nft_identifier','nft_name']]
optimism_name_service_data


Unnamed: 0,dt,token_symbol,token_amt_clean,nft_identifier,nft_name
0,1717118919000,ETH,0.00,"98,605,561,369,100,498,602,024,245,469,838,703,...",zainal.op
1,1715204651000,ETH,0.00,"5,499,786,103,543,466,330,119,275,902,716,700,8...",oksie.op
2,1711976895000,ETH,0.00,"39,754,865,402,727,223,184,320,167,547,016,687,...",ahamad.op
3,1710975355000,ETH,0.00,"5,487,829,687,511,992,584,552,359,460,892,473,9...",squanchy.op
4,1710017397000,ETH,0.00,"52,516,629,700,842,134,031,927,131,776,612,298,...",azukii.op
...,...,...,...,...,...
728,1677932343000,ETH,0.00,"12,021,927,298,880,368,312,276,012,569,694,239,...",888666.op
729,1677932205000,ETH,0.05,"21,212,897,083,645,615,095,268,562,890,835,548,...",highstreet.op
730,1677932205000,ETH,0.05,"24,844,438,455,992,396,859,894,623,792,547,027,...",bitget.op
731,1677932205000,ETH,0.05,"54,901,502,244,357,081,727,363,511,107,096,227,...",bybit.op


Three_DNS_data = fetch_all_events(api_key=opensea_api_key,collection='3dns-powered-domains')


In [14]:
three_dns_path = 'data/3dns_metadata.json'
# Three_DNS_data.to_json(three_dns_path, orient='records')
Three_DNS_data = pd.read_json(three_dns_path, orient='records')
# Three_DNS_data.dropna(inplace=True)
Three_DNS_data = Three_DNS_data[['dt','token_symbol','token_amt_clean','nft_identifier','nft_name']]
Three_DNS_data

Unnamed: 0,dt,token_symbol,token_amt_clean,nft_identifier,nft_name
0,1722706445000,WETH,0.12,"103,630,415,007,535,905,091,168,346,033,468,857...",03.box
1,1722705143000,ETH,0.00,"63,907,456,025,918,784,550,431,386,390,280,815,...",flon.chain.box
2,1722705143000,ETH,0.00,"70,494,507,210,742,404,178,780,831,369,986,847,...",eua.chain.box
3,1722705143000,ETH,0.00,"102,076,623,466,713,504,937,515,277,532,453,510...",investecriptos.chain.box
4,1722705143000,ETH,0.00,"11,812,242,816,192,290,227,614,680,555,896,161,...",drivenspyder.chain.box
...,...,...,...,...,...
153,1706970257000,WETH,0.00,"26,092,378,574,867,619,527,232,167,678,805,405,...",696.box
154,1706653465000,WETH,0.11,"56,647,991,108,577,850,953,984,972,492,538,466,...",opensea.box
155,1706203283000,ETH,0.01,"36,470,608,646,898,707,880,068,503,761,534,760,...",08000.xyz
156,1704080589000,ETH,0.15,"99,234,087,868,363,590,418,701,991,799,513,400,...",2024.finance


ens_sales_data = fetch_all_events(api_key=opensea_api_key,collection='ens')


In [15]:
ens_sales_path = 'data/ens_metadata.json'
# ens_sales_data.to_json('data/ens_metadata.json', orient='records', date_format='iso')
ens_data = pd.read_json(ens_sales_path, orient='records')

In [16]:
ens_data = ens_data[['dt','token_symbol','token_amt_clean','nft_identifier','nft_name']]

unstoppable_sales_data = fetch_all_events(api_key=opensea_api_key,collection='unstoppable-domains')


In [17]:
unstoppable_sales_path = 'data/unstoppable_metadata.json'
# unstoppable_sales_data.to_json(unstoppable_sales_path, orient='records', date_format='iso')
unstoppable_sales_data = pd.read_json(unstoppable_sales_path, orient='records')
unstoppable_sales_data = unstoppable_sales_data[['dt','token_symbol','token_amt_clean','nft_identifier','nft_name']]
unstoppable_sales_data

Unnamed: 0,dt,token_symbol,token_amt_clean,nft_identifier,nft_name
0,2024-07-05T17:01:23.000,ETH,0.00,"73,260,974,434,666,309,016,726,815,552,216,794,...",gaydream.crypto
1,2024-06-17T09:52:47.000,ETH,0.00,"79,928,895,083,408,148,097,614,756,880,763,030,...",officialdaimler.crypto
2,2024-06-17T08:47:35.000,ETH,0.00,"72,014,141,608,816,767,035,628,981,280,910,821,...",officialaudi.crypto
3,2024-06-17T08:35:23.000,ETH,0.00,"55,935,305,068,024,247,420,483,647,586,810,650,...",officialbillgates.crypto
4,2024-06-16T15:07:23.000,ETH,0.00,"91,189,952,913,906,813,838,813,506,742,274,520,...",scaryterry.crypto
...,...,...,...,...,...
3896,2019-12-24T09:10:32.000,WETH,0.10,"86,089,905,505,469,488,148,445,830,396,242,062,...",bittiez.crypto
3897,2019-12-24T09:09:42.000,WETH,0.10,"94,056,971,344,405,822,053,661,343,907,032,761,...",bitties.crypto
3898,2019-12-21T03:55:27.000,WETH,0.04,"44,616,317,507,143,517,844,109,711,444,778,473,...",impeached.crypto
3899,2019-12-19T15:13:15.000,ETH,0.07,"17,619,217,182,973,076,162,112,808,839,711,553,...",ethstaker.crypto


In [18]:
# unstoppable_sales_data['nft_identifier'] = unstoppable_sales_data['nft'].apply(lambda x: x.get('identifier', 'No identifier available') if x else 'No identifier available')
# unstoppable_sales_data['nft_name'] = unstoppable_sales_data['nft'].apply(lambda x: x.get('name', 'No name available') if x else 'No name available')
# unstoppable_sales_data.dropna(inplace=True)
# # Now you can view the DataFrame with the new columns
# print(unstoppable_sales_data[['event_type', 'closing_date', 'nft_identifier', 'nft_name']])
# unstoppable_sales_data = unstoppable_sales_data[['nft_identifier', 'nft_name']]

base_domains_metadata = fetch_all_events(api_key=opensea_api_key,collection='basedomainnames')

In [19]:
base_domains_path = 'data/base_metadata.json'
# base_domains_metadata.to_json(base_domains_path, orient='records')
base_domains_metadata_pd = pd.read_json(base_domains_path, orient='records')
base_domains_metadata_pd = base_domains_metadata_pd[['dt','token_symbol','token_amt_clean','nft_identifier','nft_name']]
base_domains_metadata_pd

Unnamed: 0,dt,token_symbol,token_amt_clean,nft_identifier,nft_name
0,1719530881000,ETH,0.00,6625,Vizzycrypto.base
1,1719530857000,ETH,0.00,6707,Cryptox.base
2,1719112413000,ETH,0.00,4863,38888.base
3,1719110431000,ETH,0.00,19245,venice.base
4,1719108841000,ETH,0.00,9711,13333.base
...,...,...,...,...,...
91,1691618587000,ETH,0.01,3505,rug.base
92,1691616539000,ETH,0.01,2704,200.base
93,1691613043000,ETH,0.00,4695,manga.base
94,1691607621000,ETH,0.00,3204,bullish.base



# Now you can view the DataFrame with the new columns
print(base_domains_metadata_pd[['dt','event_type', 'closing_date', 'nft_identifier', 'nft_name','token_amt_clean','token_symbol']])
base_domains_metadata_pd = base_domains_metadata_pd[['nft_identifier', 'nft_name','token_amt_clean','token_symbol']]

In [20]:
domain_data

Unnamed: 0_level_0,domain,price
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1999-04-01,altavista.com,3250000
1999-04-01,bingo.com,1100000
1999-11-01,fly.com,1500000
1999-12-01,autos.com,2200000
1999-12-01,england.com,2000000
...,...,...
2021-01-01,yes.movie,253
2021-01-01,yopal.com,405
2021-01-01,yougraph.com,1161
2021-01-01,zenvie.com,349


combined_metadata = pd.concat([
    base_domains_metadata_pd.dropna(),
    unstoppable_sales_data.dropna(),
    ens_data.dropna(),
    Optimistic_domains_metadata_pd.dropna(),
    Three_DNS_metadata_pd.dropna(),
    optimism_name_service_metadata_pd.dropna()
], ignore_index=True)

combined_metadata.rename(columns={"tokenId":"TOKENID"}, inplace=True)

combined_metadata['TOKENID'].describe()

# Sales

In [21]:
ens_sales = pd.read_csv('data/ens_domain_sales.csv')
optimistic_domains_sales = pd.read_csv('data/optimistic_domains_sales.csv')
optimism_domain_service_sales = pd.read_csv('data/optimism_name_service_sales.csv')
base_domains_sales = pd.read_csv('data/base_domain_names_sales.csv')
unstoppable_domains_sales = pd.read_csv('data/unstoppable_domains_sales.csv')
three_dns_sales_data = pd.read_csv('data/three_dns_sales.csv')
prices_data = pd.read_csv('data/prices.csv')

In [22]:
prices_data = prices_data.dropna()
prices_data['SYMBOL'] = prices_data['SYMBOL'].replace('WETH', 'ETH')


prices_data = prices_data.pivot(index='DT',columns='SYMBOL',values='PRICE')
prices_data = prices_data.reset_index()
prices_data

SYMBOL,DT,ETH,MATIC
0,2018-06-16 05:00:00.000,489.49,
1,2018-06-16 06:00:00.000,490.08,
2,2018-06-16 07:00:00.000,492.07,
3,2018-06-16 08:00:00.000,494.07,
4,2018-06-16 09:00:00.000,491.77,
...,...,...,...
53769,2024-08-03 14:00:00.000,3004.78,0.47
53770,2024-08-03 15:00:00.000,3003.12,0.47
53771,2024-08-03 16:00:00.000,3004.18,0.47
53772,2024-08-03 17:00:00.000,2952.17,0.46


In [23]:
combined_sales = pd.concat([
    ens_sales.dropna(),
    optimistic_domains_sales.dropna(),
    optimism_domain_service_sales.dropna(),
    base_domains_sales.dropna(),
    unstoppable_domains_sales.dropna(),
    three_dns_sales_data.dropna()
], ignore_index=True)

In [24]:
combined_sales = combined_sales.drop_duplicates()
combined_sales['DAY'] = pd.to_datetime(combined_sales['DAY'], errors='coerce')
combined_sales = combined_sales.sort_values(by='DAY')
combined_sales = combined_sales.reset_index(drop=True)
combined_sales


Unnamed: 0,DAY,TOKENID,PRICE,PRICE_USD
0,2021-12-08 10:00:00,"63,269,229,040,554,243,218,247,578,336,102,436,...",5.00,11.91
1,2022-02-14 12:00:00,"8,866,164,278,850,060,768,875,618,350,011,813,0...",5.00,8.07
2,2022-02-17 07:00:00,"12,504,755,158,386,406,737,321,687,699,430,893,...",2.50,4.52
3,2022-02-23 00:00:00,"22,085,100,995,573,792,229,696,066,667,279,430,...",3.00,4.32
4,2022-04-12 19:00:00,"12,037,330,189,325,881,504,068,036,797,548,579,...",1.99,2.80
...,...,...,...,...
106869,2024-08-02 20:00:00,"112,767,247,515,584,111,262,417,632,421,743,782...",0.00,1.36
106870,2024-08-03 03:00:00,"30,174,675,626,436,545,596,565,157,316,348,620,...",0.10,294.93
106871,2024-08-03 07:00:00,"85,316,889,377,092,979,974,953,207,492,676,631,...",0.00,0.83
106872,2024-08-03 08:00:00,"101,676,255,068,068,529,849,226,685,665,802,942...",0.01,29.84


# Full Data Set and Feature Engineering

In [25]:
optimistic_domains_sales

Unnamed: 0,DAY,TOKENID,PRICE,PRICE_USD
0,2022-07-28 16:00:00.000,850.0,0.01,8.27
1,2022-07-13 06:00:00.000,902.0,0.02,21.05
2,2022-07-13 06:00:00.000,915.0,0.03,26.31
3,2022-08-04 20:00:00.000,933.0,0.01,22.76
4,2022-08-04 20:00:00.000,932.0,0.01,21.43
5,2022-06-19 00:00:00.000,787.0,0.02,19.68
6,2022-10-28 09:00:00.000,1069.0,0.02,29.98
7,2022-12-29 07:00:00.000,759.0,0.01,13.15
8,2022-06-03 04:00:00.000,482.0,0.01,27.47
9,2022-06-04 02:00:00.000,550.0,0.01,26.64


In [26]:
optimistic_domains_sales = optimistic_domains_sales.dropna(subset=['TOKENID'])
optimistic_domains_sales['TOKENID']

0      850.00
1      902.00
2      915.00
3      933.00
4      932.00
5      787.00
6    1,069.00
7      759.00
8      482.00
9      550.00
10     549.00
11     347.00
12     731.00
13     413.00
14   1,076.00
15     576.00
16     413.00
Name: TOKENID, dtype: float64

In [27]:
optimistic_domains_sales['TOKENID'] = optimistic_domains_sales['TOKENID'].astype(int)
optimistic_domains_sales.rename(columns={"TOKENID":"tokenId"}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  optimistic_domains_sales['TOKENID'] = optimistic_domains_sales['TOKENID'].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  optimistic_domains_sales.rename(columns={"TOKENID":"tokenId"}, inplace=True)


In [28]:
optimistic_domains_sales['tokenId']

0      850
1      902
2      915
3      933
4      932
5      787
6     1069
7      759
8      482
9      550
10     549
11     347
12     731
13     413
14    1076
15     576
16     413
Name: tokenId, dtype: int64

In [29]:
optimistic_domains['tokenId']


0          0
1          1
2          2
3          3
4          4
        ... 
1321    1321
1322    1322
1323    1323
1324    1324
1325    1325
Name: tokenId, Length: 1326, dtype: int64

In [30]:
optimistic_data = pd.merge(optimistic_domains_sales, optimistic_domains, on='tokenId', how='left')
optimistic_data.rename(columns={"tokenId":"nft_identifier","name":"nft_name", "day":"dt"}, inplace=True)

In [31]:
prices_data

SYMBOL,DT,ETH,MATIC
0,2018-06-16 05:00:00.000,489.49,
1,2018-06-16 06:00:00.000,490.08,
2,2018-06-16 07:00:00.000,492.07,
3,2018-06-16 08:00:00.000,494.07,
4,2018-06-16 09:00:00.000,491.77,
...,...,...,...
53769,2024-08-03 14:00:00.000,3004.78,0.47
53770,2024-08-03 15:00:00.000,3003.12,0.47
53771,2024-08-03 16:00:00.000,3004.18,0.47
53772,2024-08-03 17:00:00.000,2952.17,0.46


In [32]:
optimism_name_service_data['dt'] = pd.to_datetime(optimism_name_service_data['dt'], unit='ms')
Three_DNS_data['dt'] = pd.to_datetime(Three_DNS_data['dt'], unit='ms')
ens_data['dt'] = pd.to_datetime(ens_data['dt'])
unstoppable_sales_data['dt'] = pd.to_datetime(unstoppable_sales_data['dt'])
base_domains_metadata_pd['dt'] = pd.to_datetime(base_domains_metadata_pd['dt'], unit='ms')


optimism_name_service_data

Unnamed: 0,dt,token_symbol,token_amt_clean,nft_identifier,nft_name
0,2024-05-31 01:28:39,ETH,0.00,"98,605,561,369,100,498,602,024,245,469,838,703,...",zainal.op
1,2024-05-08 21:44:11,ETH,0.00,"5,499,786,103,543,466,330,119,275,902,716,700,8...",oksie.op
2,2024-04-01 13:08:15,ETH,0.00,"39,754,865,402,727,223,184,320,167,547,016,687,...",ahamad.op
3,2024-03-20 22:55:55,ETH,0.00,"5,487,829,687,511,992,584,552,359,460,892,473,9...",squanchy.op
4,2024-03-09 20:49:57,ETH,0.00,"52,516,629,700,842,134,031,927,131,776,612,298,...",azukii.op
...,...,...,...,...,...
728,2023-03-04 12:19:03,ETH,0.00,"12,021,927,298,880,368,312,276,012,569,694,239,...",888666.op
729,2023-03-04 12:16:45,ETH,0.05,"21,212,897,083,645,615,095,268,562,890,835,548,...",highstreet.op
730,2023-03-04 12:16:45,ETH,0.05,"24,844,438,455,992,396,859,894,623,792,547,027,...",bitget.op
731,2023-03-04 12:16:45,ETH,0.05,"54,901,502,244,357,081,727,363,511,107,096,227,...",bybit.op


In [33]:
def hourly(df):
    df['dt'] = df['dt'].dt.strftime('%Y-%m-%d %H-00-00')
    df['dt'] = pd.to_datetime(df['dt'])
    return df


In [34]:
Three_DNS_data = hourly(Three_DNS_data)
optimism_name_service_data = hourly(optimism_name_service_data)
ens_data = hourly(ens_data)
unstoppable_sales_data = hourly(unstoppable_sales_data)
base_domains_metadata_pd = hourly(base_domains_metadata_pd)

Three_DNS_data

  df['dt'] = pd.to_datetime(df['dt'])
  df['dt'] = pd.to_datetime(df['dt'])
  df['dt'] = pd.to_datetime(df['dt'])
  df['dt'] = pd.to_datetime(df['dt'])
  df['dt'] = pd.to_datetime(df['dt'])


Unnamed: 0,dt,token_symbol,token_amt_clean,nft_identifier,nft_name
0,2024-08-03 17:00:00+00:00,WETH,0.12,"103,630,415,007,535,905,091,168,346,033,468,857...",03.box
1,2024-08-03 17:00:00+00:00,ETH,0.00,"63,907,456,025,918,784,550,431,386,390,280,815,...",flon.chain.box
2,2024-08-03 17:00:00+00:00,ETH,0.00,"70,494,507,210,742,404,178,780,831,369,986,847,...",eua.chain.box
3,2024-08-03 17:00:00+00:00,ETH,0.00,"102,076,623,466,713,504,937,515,277,532,453,510...",investecriptos.chain.box
4,2024-08-03 17:00:00+00:00,ETH,0.00,"11,812,242,816,192,290,227,614,680,555,896,161,...",drivenspyder.chain.box
...,...,...,...,...,...
153,2024-02-03 14:00:00+00:00,WETH,0.00,"26,092,378,574,867,619,527,232,167,678,805,405,...",696.box
154,2024-01-30 22:00:00+00:00,WETH,0.11,"56,647,991,108,577,850,953,984,972,492,538,466,...",opensea.box
155,2024-01-25 17:00:00+00:00,ETH,0.01,"36,470,608,646,898,707,880,068,503,761,534,760,...",08000.xyz
156,2024-01-01 03:00:00+00:00,ETH,0.15,"99,234,087,868,363,590,418,701,991,799,513,400,...",2024.finance


In [35]:
Three_DNS_data['dt']

0     2024-08-03 17:00:00+00:00
1     2024-08-03 17:00:00+00:00
2     2024-08-03 17:00:00+00:00
3     2024-08-03 17:00:00+00:00
4     2024-08-03 17:00:00+00:00
                 ...           
153   2024-02-03 14:00:00+00:00
154   2024-01-30 22:00:00+00:00
155   2024-01-25 17:00:00+00:00
156   2024-01-01 03:00:00+00:00
157   2023-12-26 21:00:00+00:00
Name: dt, Length: 158, dtype: datetime64[ns, UTC]

In [36]:
prices_data['DT'] = pd.to_datetime(prices_data['DT'])
prices_data.rename(columns={'DT':'dt'}, inplace=True)


In [37]:
prices_data['dt'] = prices_data['dt'].dt.tz_localize('UTC')
prices_data

SYMBOL,dt,ETH,MATIC
0,2018-06-16 05:00:00+00:00,489.49,
1,2018-06-16 06:00:00+00:00,490.08,
2,2018-06-16 07:00:00+00:00,492.07,
3,2018-06-16 08:00:00+00:00,494.07,
4,2018-06-16 09:00:00+00:00,491.77,
...,...,...,...
53769,2024-08-03 14:00:00+00:00,3004.78,0.47
53770,2024-08-03 15:00:00+00:00,3003.12,0.47
53771,2024-08-03 16:00:00+00:00,3004.18,0.47
53772,2024-08-03 17:00:00+00:00,2952.17,0.46


In [38]:
Three_DNS_data = Three_DNS_data.merge(prices_data, how='left', on='dt')
Three_DNS_data['price_usd'] = Three_DNS_data['token_amt_clean'] * Three_DNS_data['ETH']
Three_DNS_data

Unnamed: 0,dt,token_symbol,token_amt_clean,nft_identifier,nft_name,ETH,MATIC,price_usd
0,2024-08-03 17:00:00+00:00,WETH,0.12,"103,630,415,007,535,905,091,168,346,033,468,857...",03.box,2952.17,0.46,354.26
1,2024-08-03 17:00:00+00:00,ETH,0.00,"63,907,456,025,918,784,550,431,386,390,280,815,...",flon.chain.box,2952.17,0.46,0.83
2,2024-08-03 17:00:00+00:00,ETH,0.00,"70,494,507,210,742,404,178,780,831,369,986,847,...",eua.chain.box,2952.17,0.46,0.86
3,2024-08-03 17:00:00+00:00,ETH,0.00,"102,076,623,466,713,504,937,515,277,532,453,510...",investecriptos.chain.box,2952.17,0.46,0.89
4,2024-08-03 17:00:00+00:00,ETH,0.00,"11,812,242,816,192,290,227,614,680,555,896,161,...",drivenspyder.chain.box,2952.17,0.46,1.33
...,...,...,...,...,...,...,...,...
153,2024-02-03 14:00:00+00:00,WETH,0.00,"26,092,378,574,867,619,527,232,167,678,805,405,...",696.box,2306.16,0.79,2.31
154,2024-01-30 22:00:00+00:00,WETH,0.11,"56,647,991,108,577,850,953,984,972,492,538,466,...",opensea.box,2372.07,0.81,260.93
155,2024-01-25 17:00:00+00:00,ETH,0.01,"36,470,608,646,898,707,880,068,503,761,534,760,...",08000.xyz,2187.11,0.72,21.87
156,2024-01-01 03:00:00+00:00,ETH,0.15,"99,234,087,868,363,590,418,701,991,799,513,400,...",2024.finance,2298.24,0.99,344.74


In [39]:
optimism_name_service_data = optimism_name_service_data.merge(prices_data, how='left', on='dt')
optimism_name_service_data['price_usd'] = optimism_name_service_data['token_amt_clean'] * optimism_name_service_data['ETH']
optimism_name_service_data


Unnamed: 0,dt,token_symbol,token_amt_clean,nft_identifier,nft_name,ETH,MATIC,price_usd
0,2024-05-31 01:00:00+00:00,ETH,0.00,"98,605,561,369,100,498,602,024,245,469,838,703,...",zainal.op,3747.70,0.70,2.62
1,2024-05-08 21:00:00+00:00,ETH,0.00,"5,499,786,103,543,466,330,119,275,902,716,700,8...",oksie.op,2996.86,0.69,1.80
2,2024-04-01 13:00:00+00:00,ETH,0.00,"39,754,865,402,727,223,184,320,167,547,016,687,...",ahamad.op,3540.43,0.96,2.48
3,2024-03-20 22:00:00+00:00,ETH,0.00,"5,487,829,687,511,992,584,552,359,460,892,473,9...",squanchy.op,3465.30,1.01,2.43
4,2024-03-09 20:00:00+00:00,ETH,0.00,"52,516,629,700,842,134,031,927,131,776,612,298,...",azukii.op,3894.49,1.13,2.73
...,...,...,...,...,...,...,...,...
728,2023-03-04 12:00:00+00:00,ETH,0.00,"12,021,927,298,880,368,312,276,012,569,694,239,...",888666.op,1570.38,1.15,3.93
729,2023-03-04 12:00:00+00:00,ETH,0.05,"21,212,897,083,645,615,095,268,562,890,835,548,...",highstreet.op,1570.38,1.15,78.52
730,2023-03-04 12:00:00+00:00,ETH,0.05,"24,844,438,455,992,396,859,894,623,792,547,027,...",bitget.op,1570.38,1.15,78.52
731,2023-03-04 12:00:00+00:00,ETH,0.05,"54,901,502,244,357,081,727,363,511,107,096,227,...",bybit.op,1570.38,1.15,78.52


In [40]:
ens_data = ens_data.merge(prices_data, how='left', on='dt')
ens_data['price_usd'] = ens_data['token_amt_clean'] * ens_data['ETH']
ens_data

Unnamed: 0,dt,token_symbol,token_amt_clean,nft_identifier,nft_name,ETH,MATIC,price_usd
0,2024-08-03 15:00:00+00:00,ETH,0.00,"86,349,095,476,171,416,204,496,000,643,597,425,...",ghaut.eth,3003.12,0.47,7.37
1,2024-08-03 14:00:00+00:00,ETH,0.01,"71,552,007,753,732,387,614,282,500,469,513,396,...",ltaliano.eth,3004.78,0.47,21.03
2,2024-08-03 13:00:00+00:00,ETH,0.00,"36,910,400,166,190,457,351,422,022,548,683,801,...",9797th.eth,2995.20,0.47,5.99
3,2024-08-03 09:00:00+00:00,WETH,0.55,"80,665,790,946,324,601,651,199,810,530,833,638,...",joule.eth,2982.29,0.47,1640.26
4,2024-08-03 08:00:00+00:00,ETH,0.01,"101,676,255,068,068,542,704,731,039,737,725,146...",domainbank.eth,2984.32,0.47,29.84
...,...,...,...,...,...,...,...,...
41151,2023-06-12 05:00:00+00:00,ETH,0.01,"26,433,520,626,094,364,145,687,966,688,793,167,...",82337.eth,1735.29,0.62,16.49
41152,2023-06-12 05:00:00+00:00,ETH,0.01,"69,249,023,450,192,320,286,030,058,028,418,970,...",60983.eth,1735.29,0.62,16.49
41153,2023-06-12 05:00:00+00:00,ETH,0.01,"90,590,888,562,707,863,584,023,800,423,822,006,...",22967.eth,1735.29,0.62,16.49
41154,2023-06-12 05:00:00+00:00,ETH,0.01,"96,531,297,334,325,189,994,337,204,354,020,381,...",55287.eth,1735.29,0.62,16.49


In [41]:
unstoppable_sales_data = unstoppable_sales_data.merge(prices_data, how='left', on='dt')
unstoppable_sales_data['price_usd'] = unstoppable_sales_data['token_amt_clean'] * unstoppable_sales_data['ETH']
unstoppable_sales_data


Unnamed: 0,dt,token_symbol,token_amt_clean,nft_identifier,nft_name,ETH,MATIC,price_usd
0,2024-07-05 17:00:00+00:00,ETH,0.00,"73,260,974,434,666,309,016,726,815,552,216,794,...",gaydream.crypto,2985.49,0.46,0.15
1,2024-06-17 09:00:00+00:00,ETH,0.00,"79,928,895,083,408,148,097,614,756,880,763,030,...",officialdaimler.crypto,3553.73,0.61,3.52
2,2024-06-17 08:00:00+00:00,ETH,0.00,"72,014,141,608,816,767,035,628,981,280,910,821,...",officialaudi.crypto,3560.49,0.61,3.52
3,2024-06-17 08:00:00+00:00,ETH,0.00,"55,935,305,068,024,247,420,483,647,586,810,650,...",officialbillgates.crypto,3560.49,0.61,3.52
4,2024-06-16 15:00:00+00:00,ETH,0.00,"91,189,952,913,906,813,838,813,506,742,274,520,...",scaryterry.crypto,3579.49,0.62,3.19
...,...,...,...,...,...,...,...,...
3896,2019-12-24 09:00:00+00:00,WETH,0.10,"86,089,905,505,469,488,148,445,830,396,242,062,...",bittiez.crypto,127.24,0.01,12.72
3897,2019-12-24 09:00:00+00:00,WETH,0.10,"94,056,971,344,405,822,053,661,343,907,032,761,...",bitties.crypto,127.24,0.01,12.72
3898,2019-12-21 03:00:00+00:00,WETH,0.04,"44,616,317,507,143,517,844,109,711,444,778,473,...",impeached.crypto,127.47,0.02,5.74
3899,2019-12-19 15:00:00+00:00,ETH,0.07,"17,619,217,182,973,076,162,112,808,839,711,553,...",ethstaker.crypto,126.46,0.01,8.85


In [42]:
base_domains_metadata_pd = base_domains_metadata_pd.merge(prices_data, how='left', on='dt')
base_domains_metadata_pd['price_usd'] = base_domains_metadata_pd['token_amt_clean'] * base_domains_metadata_pd['ETH']
base_domains_metadata_pd


Unnamed: 0,dt,token_symbol,token_amt_clean,nft_identifier,nft_name,ETH,MATIC,price_usd
0,2024-06-27 23:00:00+00:00,ETH,0.00,6625,Vizzycrypto.base,3443.16,0.57,0.03
1,2024-06-27 23:00:00+00:00,ETH,0.00,6707,Cryptox.base,3443.16,0.57,0.03
2,2024-06-23 03:00:00+00:00,ETH,0.00,4863,38888.base,3505.75,0.58,1.91
3,2024-06-23 02:00:00+00:00,ETH,0.00,19245,venice.base,3506.13,0.57,1.91
4,2024-06-23 02:00:00+00:00,ETH,0.00,9711,13333.base,3506.13,0.57,1.71
...,...,...,...,...,...,...,...,...
91,2023-08-09 22:00:00+00:00,ETH,0.01,3505,rug.base,1851.53,0.69,20.37
92,2023-08-09 21:00:00+00:00,ETH,0.01,2704,200.base,1851.52,0.68,20.37
93,2023-08-09 20:00:00+00:00,ETH,0.00,4695,manga.base,1852.54,0.69,4.63
94,2023-08-09 19:00:00+00:00,ETH,0.00,3204,bullish.base,1847.67,0.68,6.47


In [43]:
optimistic_data.rename(columns={'DAY':'dt','PRICE_USD':'price_usd','PRICE':'token_amt_clean'}, inplace=True)

In [44]:
optimistic_data['dt'] = pd.to_datetime(optimistic_data['dt'])
optimistic_data['dt'] = optimistic_data['dt'].dt.tz_localize('UTC')
optimistic_data['dt'] = pd.to_datetime(optimistic_data['dt'])


In [45]:
base_domains_metadata_pd

Unnamed: 0,dt,token_symbol,token_amt_clean,nft_identifier,nft_name,ETH,MATIC,price_usd
0,2024-06-27 23:00:00+00:00,ETH,0.00,6625,Vizzycrypto.base,3443.16,0.57,0.03
1,2024-06-27 23:00:00+00:00,ETH,0.00,6707,Cryptox.base,3443.16,0.57,0.03
2,2024-06-23 03:00:00+00:00,ETH,0.00,4863,38888.base,3505.75,0.58,1.91
3,2024-06-23 02:00:00+00:00,ETH,0.00,19245,venice.base,3506.13,0.57,1.91
4,2024-06-23 02:00:00+00:00,ETH,0.00,9711,13333.base,3506.13,0.57,1.71
...,...,...,...,...,...,...,...,...
91,2023-08-09 22:00:00+00:00,ETH,0.01,3505,rug.base,1851.53,0.69,20.37
92,2023-08-09 21:00:00+00:00,ETH,0.01,2704,200.base,1851.52,0.68,20.37
93,2023-08-09 20:00:00+00:00,ETH,0.00,4695,manga.base,1852.54,0.69,4.63
94,2023-08-09 19:00:00+00:00,ETH,0.00,3204,bullish.base,1847.67,0.68,6.47


In [46]:
combined_dataset = pd.concat([
    ens_data[['dt','nft_name','price_usd','token_amt_clean']].dropna(),
    optimistic_data[['dt','nft_name','price_usd','token_amt_clean']].dropna(),
    optimism_name_service_data[['dt','nft_name','price_usd','token_amt_clean']].dropna(),
    unstoppable_sales_data[['dt','nft_name','price_usd','token_amt_clean']].dropna(),
    base_domains_metadata_pd[['dt','nft_name','price_usd','token_amt_clean']].dropna(),
    Three_DNS_data[['dt','nft_name','price_usd','token_amt_clean']].dropna()
], ignore_index=True)

combined_dataset = combined_dataset.drop_duplicates()
combined_dataset['dt'] = pd.to_datetime(combined_dataset['dt'], errors='coerce')
combined_dataset = combined_dataset.sort_values(by='dt')
combined_dataset = combined_dataset.reset_index(drop=True)
combined_dataset


Unnamed: 0,dt,nft_name,price_usd,token_amt_clean
0,2019-12-14 08:00:00+00:00,cryptoq.crypto,10.05,0.07
1,2019-12-19 15:00:00+00:00,ethstaker.crypto,8.85,0.07
2,2019-12-21 03:00:00+00:00,impeached.crypto,5.74,0.04
3,2019-12-24 09:00:00+00:00,bitties.crypto,12.72,0.10
4,2019-12-24 09:00:00+00:00,bittiez.crypto,12.72,0.10
...,...,...,...,...
45911,2024-08-03 17:00:00+00:00,03.box,354.26,0.12
45912,2024-08-03 17:00:00+00:00,flon.chain.box,0.83,0.00
45913,2024-08-03 17:00:00+00:00,eua.chain.box,0.86,0.00
45914,2024-08-03 17:00:00+00:00,investecriptos.chain.box,0.89,0.00


In [47]:
domain_data = domain_data.reset_index()
domain_data = domain_data.rename(columns={"date":"dt","price":"price_usd"})
domain_data['dt'] = pd.to_datetime(domain_data['dt'])
domain_data['dt'] = domain_data['dt'].dt.tz_localize('UTC')
domain_data['dt'] = pd.to_datetime(domain_data['dt'])
domain_data

Unnamed: 0,dt,domain,price_usd
0,1999-04-01 00:00:00+00:00,altavista.com,3250000
1,1999-04-01 00:00:00+00:00,bingo.com,1100000
2,1999-11-01 00:00:00+00:00,fly.com,1500000
3,1999-12-01 00:00:00+00:00,autos.com,2200000
4,1999-12-01 00:00:00+00:00,england.com,2000000
...,...,...,...
348231,2021-01-01 00:00:00+00:00,yes.movie,253
348232,2021-01-01 00:00:00+00:00,yopal.com,405
348233,2021-01-01 00:00:00+00:00,yougraph.com,1161
348234,2021-01-01 00:00:00+00:00,zenvie.com,349


In [48]:
domain_data

Unnamed: 0,dt,domain,price_usd
0,1999-04-01 00:00:00+00:00,altavista.com,3250000
1,1999-04-01 00:00:00+00:00,bingo.com,1100000
2,1999-11-01 00:00:00+00:00,fly.com,1500000
3,1999-12-01 00:00:00+00:00,autos.com,2200000
4,1999-12-01 00:00:00+00:00,england.com,2000000
...,...,...,...
348231,2021-01-01 00:00:00+00:00,yes.movie,253
348232,2021-01-01 00:00:00+00:00,yopal.com,405
348233,2021-01-01 00:00:00+00:00,yougraph.com,1161
348234,2021-01-01 00:00:00+00:00,zenvie.com,349


In [49]:
combined_dataset = combined_dataset.rename(columns={'nft_name':'domain'})
combined_dataset = pd.concat([combined_dataset, domain_data], ignore_index=True)
combined_dataset = combined_dataset.drop_duplicates()
combined_dataset['dt'] = pd.to_datetime(combined_dataset['dt'], errors='coerce')
combined_dataset = combined_dataset.sort_values(by='dt')
combined_dataset = combined_dataset.reset_index(drop=True)
combined_dataset


Unnamed: 0,dt,domain,price_usd,token_amt_clean
0,1999-04-01 00:00:00+00:00,bingo.com,1100000.00,
1,1999-04-01 00:00:00+00:00,altavista.com,3250000.00,
2,1999-11-01 00:00:00+00:00,fly.com,1500000.00,
3,1999-12-01 00:00:00+00:00,tom.com,2500000.00,
4,1999-12-01 00:00:00+00:00,england.com,2000000.00,
...,...,...,...,...
394147,2024-08-03 17:00:00+00:00,03.box,354.26,0.12
394148,2024-08-03 17:00:00+00:00,flon.chain.box,0.83,0.00
394149,2024-08-03 17:00:00+00:00,eua.chain.box,0.86,0.00
394150,2024-08-03 17:00:00+00:00,investecriptos.chain.box,0.89,0.00


# Feature Engineering

In [50]:
## ETH Price

prices_data



SYMBOL,dt,ETH,MATIC
0,2018-06-16 05:00:00+00:00,489.49,
1,2018-06-16 06:00:00+00:00,490.08,
2,2018-06-16 07:00:00+00:00,492.07,
3,2018-06-16 08:00:00+00:00,494.07,
4,2018-06-16 09:00:00+00:00,491.77,
...,...,...,...
53769,2024-08-03 14:00:00+00:00,3004.78,0.47
53770,2024-08-03 15:00:00+00:00,3003.12,0.47
53771,2024-08-03 16:00:00+00:00,3004.18,0.47
53772,2024-08-03 17:00:00+00:00,2952.17,0.46


In [51]:
combined_dataset = combined_dataset.drop(columns=['token_amt_clean'])

In [52]:
combined_dataset

Unnamed: 0,dt,domain,price_usd
0,1999-04-01 00:00:00+00:00,bingo.com,1100000.00
1,1999-04-01 00:00:00+00:00,altavista.com,3250000.00
2,1999-11-01 00:00:00+00:00,fly.com,1500000.00
3,1999-12-01 00:00:00+00:00,tom.com,2500000.00
4,1999-12-01 00:00:00+00:00,england.com,2000000.00
...,...,...,...
394147,2024-08-03 17:00:00+00:00,03.box,354.26
394148,2024-08-03 17:00:00+00:00,flon.chain.box,0.83
394149,2024-08-03 17:00:00+00:00,eua.chain.box,0.86
394150,2024-08-03 17:00:00+00:00,investecriptos.chain.box,0.89


In [53]:
# Calculate 7-day and 30-day rolling average price and sales volume
combined_dataset['7d_rolling_avg_price'] = combined_dataset['price_usd'].rolling(window=7).mean().fillna(0)
combined_dataset['30d_rolling_avg_price'] = combined_dataset['price_usd'].rolling(window=30).mean().fillna(0)

combined_dataset['7d_sales_volume'] = combined_dataset['price_usd'].rolling(window=7).sum().fillna(0)
combined_dataset['30d_sales_volume'] = combined_dataset['price_usd'].rolling(window=30).sum().fillna(0)

combined_dataset['cumulative_rolling_avg_price'] = combined_dataset['price_usd'].expanding().mean()

combined_dataset['7d_domains_sold'] = combined_dataset['price_usd'].rolling(window=7).count().fillna(0)
combined_dataset['30d_domains_sold'] = combined_dataset['price_usd'].rolling(window=30).count().fillna(0)
combined_dataset['60d_domains_sold'] = combined_dataset['price_usd'].rolling(window=60).count().fillna(0)
combined_dataset['90d_domains_sold'] = combined_dataset['price_usd'].rolling(window=90).count().fillna(0)

combined_dataset['7d_rolling_std_dev'] = combined_dataset['price_usd'].rolling(window=7).std().fillna(0)
combined_dataset['30d_rolling_std_dev'] = combined_dataset['price_usd'].rolling(window=30).std().fillna(0)

combined_dataset['7d_rolling_median_price'] = combined_dataset['price_usd'].rolling(window=7).median().fillna(0)
combined_dataset['30d_rolling_median_price'] = combined_dataset['price_usd'].rolling(window=30).median().fillna(0)

combined_dataset['cumulative_sum_sales_volume'] = combined_dataset['price_usd'].expanding().sum().fillna(0)

# Print the resulting dataframe
print(combined_dataset[['dt', 'domain', 'price_usd', '7d_rolling_avg_price', '30d_rolling_avg_price', '7d_sales_volume', '30d_sales_volume','cumulative_rolling_avg_price']])

                              dt                    domain    price_usd  \
0      1999-04-01 00:00:00+00:00                 bingo.com 1,100,000.00   
1      1999-04-01 00:00:00+00:00             altavista.com 3,250,000.00   
2      1999-11-01 00:00:00+00:00                   fly.com 1,500,000.00   
3      1999-12-01 00:00:00+00:00                   tom.com 2,500,000.00   
4      1999-12-01 00:00:00+00:00               england.com 2,000,000.00   
...                          ...                       ...          ...   
394147 2024-08-03 17:00:00+00:00                    03.box       354.26   
394148 2024-08-03 17:00:00+00:00            flon.chain.box         0.83   
394149 2024-08-03 17:00:00+00:00             eua.chain.box         0.86   
394150 2024-08-03 17:00:00+00:00  investecriptos.chain.box         0.89   
394151 2024-08-03 17:00:00+00:00    drivenspyder.chain.box         1.33   

        7d_rolling_avg_price  30d_rolling_avg_price  7d_sales_volume  \
0                       0.0

In [54]:
combined_dataset

Unnamed: 0,dt,domain,price_usd,7d_rolling_avg_price,30d_rolling_avg_price,7d_sales_volume,30d_sales_volume,cumulative_rolling_avg_price,7d_domains_sold,30d_domains_sold,60d_domains_sold,90d_domains_sold,7d_rolling_std_dev,30d_rolling_std_dev,7d_rolling_median_price,30d_rolling_median_price,cumulative_sum_sales_volume
0,1999-04-01 00:00:00+00:00,bingo.com,1100000.00,0.00,0.00,0.00,0.00,1100000.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,1100000.00
1,1999-04-01 00:00:00+00:00,altavista.com,3250000.00,0.00,0.00,0.00,0.00,2175000.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,4350000.00
2,1999-11-01 00:00:00+00:00,fly.com,1500000.00,0.00,0.00,0.00,0.00,1950000.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,5850000.00
3,1999-12-01 00:00:00+00:00,tom.com,2500000.00,0.00,0.00,0.00,0.00,2087500.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,8350000.00
4,1999-12-01 00:00:00+00:00,england.com,2000000.00,0.00,0.00,0.00,0.00,2070000.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,10350000.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394147,2024-08-03 17:00:00+00:00,03.box,354.26,294.24,208.27,2059.66,6248.23,2995.95,7.00,30.00,60.00,90.00,607.09,614.73,21.03,9.25,1180848462.60
394148,2024-08-03 17:00:00+00:00,flon.chain.box,0.83,290.09,208.05,2030.64,6241.65,2995.94,7.00,30.00,60.00,90.00,609.29,614.80,7.37,9.25,1180848463.42
394149,2024-08-03 17:00:00+00:00,eua.chain.box,0.86,55.89,206.74,391.24,6202.26,2995.94,7.00,30.00,60.00,90.00,131.76,615.22,5.99,7.47,1180848464.28
394150,2024-08-03 17:00:00+00:00,investecriptos.chain.box,0.89,55.16,202.74,386.14,6082.12,2995.93,7.00,30.00,60.00,90.00,132.10,616.18,0.90,6.68,1180848465.17


In [55]:
combined_dataset['domain_length'] = combined_dataset['domain'].apply(len)
combined_dataset['num_vowels'] = combined_dataset['domain'].apply(lambda x: sum([1 for char in x if char in 'aeiou']))
combined_dataset['num_consonants'] = combined_dataset['domain'].apply(lambda x: sum([1 for char in x if char.isalpha() and char not in 'aeiou']))
combined_dataset['tld'] = combined_dataset['domain'].apply(lambda x: x.split('.')[-1])  # Extract TLD


tld_weights = {
    'com': 1000000000,
    'net': 8,
    'org': 7,
    'box': 3,
    'eth': 2
    # Add more TLDs and their weights as needed
}

default_tld_weight = 1

combined_dataset['tld_weight'] = combined_dataset['tld'].map(tld_weights).fillna(default_tld_weight)  # Default weight is 1 if tld is not in tld_weights


In [56]:
target = 'price_usd'
features = combined_dataset.drop(columns=target).columns
print(f'target:{target},\n features:{features}')

target:price_usd,
 features:Index(['dt', 'domain', '7d_rolling_avg_price', '30d_rolling_avg_price',
       '7d_sales_volume', '30d_sales_volume', 'cumulative_rolling_avg_price',
       '7d_domains_sold', '30d_domains_sold', '60d_domains_sold',
       '90d_domains_sold', '7d_rolling_std_dev', '30d_rolling_std_dev',
       '7d_rolling_median_price', '30d_rolling_median_price',
       'cumulative_sum_sales_volume', 'domain_length', 'num_vowels',
       'num_consonants', 'tld'],
      dtype='object')


In [57]:
numeric_data = combined_dataset.select_dtypes(include=[float, int])

# Calculate correlation of 'price_usd' with other numeric columns
correlation_with_target = numeric_data.corr()[target]

# Print the correlations
print(correlation_with_target.sort_values())

90d_domains_sold               -0.10
60d_domains_sold               -0.09
30d_domains_sold               -0.06
7d_domains_sold                -0.02
num_vowels                     -0.00
domain_length                  -0.00
num_consonants                 -0.00
cumulative_sum_sales_volume     0.00
cumulative_rolling_avg_price    0.07
30d_rolling_median_price        0.08
7d_rolling_median_price         0.11
30d_rolling_std_dev             0.19
30d_rolling_avg_price           0.20
30d_sales_volume                0.20
7d_rolling_std_dev              0.38
7d_rolling_avg_price            0.39
7d_sales_volume                 0.39
price_usd                       1.00
Name: price_usd, dtype: float64


In [58]:
columns_to_drop = ['90d_domains_sold', '60d_domains_sold', '30d_domains_sold', '7d_domains_sold', 'cumulative_sum_sales_volume',
                   'cumulative_rolling_avg_price','30d_rolling_median_price','7d_rolling_median_price',
                   '30d_rolling_std_dev']

prophet_columns_to_drop = ['dt','90d_domains_sold', '60d_domains_sold', '30d_domains_sold', '7d_domains_sold', 'cumulative_sum_sales_volume',
                   'cumulative_rolling_avg_price','30d_rolling_median_price','7d_rolling_median_price',
                   '30d_rolling_std_dev']

# Drop columns from Index
gen_features = features.difference(columns_to_drop)
gen_features

Index(['30d_rolling_avg_price', '30d_sales_volume', '7d_rolling_avg_price',
       '7d_rolling_std_dev', '7d_sales_volume', 'domain', 'domain_length',
       'dt', 'num_consonants', 'num_vowels', 'tld'],
      dtype='object')

In [59]:
prophet_features = features.difference(prophet_columns_to_drop)
prophet_features

Index(['30d_rolling_avg_price', '30d_sales_volume', '7d_rolling_avg_price',
       '7d_rolling_std_dev', '7d_sales_volume', 'domain', 'domain_length',
       'num_consonants', 'num_vowels', 'tld'],
      dtype='object')

In [60]:
X = combined_dataset[gen_features]
y = combined_dataset[target]

## Ridge Regression

# Preprocess categorical data (TLD) and handle missing values
preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())
        ]), ['domain_length', 'num_vowels', 'num_consonants']),
        ('cat', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))
        ]), ['tld'])
    ]
)

# Create a pipeline with Ridge regression
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', Ridge())
])

# Define the parameter grid for hyperparameter tuning
param_grid = {
    'regressor__alpha': [0.1, 1.0, 10.0, 100.0, 1000.0]
}

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_root_mean_squared_error')
grid_search.fit(X_train, y_train)

# Best model from grid search
best_model = grid_search.best_estimator_

# Predict and evaluate
y_pred = best_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Best Alpha: {grid_search.best_params_["regressor__alpha"]}')
print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'R²: {r2}')

## Prophet

df_prophet = combined_dataset.copy()
df_prophet.rename(columns={"dt":"ds","price_usd":"y"}, inplace=True)    
df_prophet['ds'] = df_prophet['ds'].dt.tz_localize(None)

target = 'y'
features = df_prophet[features].select_dtypes(include=[np.number]).columns

train_df, test_df = train_test_split(df_prophet, test_size=0.2, shuffle=False, random_state=42)


model = Prophet()

for feature in features:
    model.add_regressor(feature)

model.fit(train_df)

future = test_df[['ds']].copy()
for feature in features:
    # Use historical values for features from the training set
    future[feature] = test_df[feature].values

forecast = model.predict(future)

y_true = test_df['y'].values
y_pred = forecast['yhat'].values

r2 = r2_score(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))

print(f"R²: {r2}")
print(f"MAE: {mae}")
print(f"RMSE: {rmse}")

In [61]:
def train_ridge_model(X, y):

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', Pipeline(steps=[
                ('imputer', SimpleImputer(strategy='median')),
                ('scaler', StandardScaler())
            ]), ['domain_length', 'num_vowels', 'num_consonants']),
            ('cat', Pipeline(steps=[
                ('imputer', SimpleImputer(strategy='most_frequent')),
                ('onehot', OneHotEncoder(handle_unknown='ignore'))
            ]), ['tld'])
        ]
    )

    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('regressor', Ridge(alpha=1000.0))  # Set the best alpha value from grid search
    ])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f'MAE: {mae}')
    print(f'MSE: {mse}')
    print(f'R²: {r2}')

    return pipeline, features



In [62]:
def train_randomforest_model(X, y):

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', Pipeline(steps=[
                ('imputer', SimpleImputer(strategy='median')),
                ('scaler', StandardScaler())
            ]), ['domain_length', 'num_vowels', 'num_consonants']),
            ('cat', Pipeline(steps=[
                ('imputer', SimpleImputer(strategy='most_frequent')),
                ('onehot', OneHotEncoder(handle_unknown='ignore'))
            ]), ['tld'])
        ]
    )

    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('regressor', RandomForestRegressor(n_estimators=200, max_depth=20, min_samples_split=5, random_state=seed))  # Set the best alpha value from grid search
    ])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f'MAE: {mae}')
    print(f'MSE: {mse}')
    print(f'R²: {r2}')

    return pipeline, features

In [63]:
def train_prophet_model(features):
    df_prophet = combined_dataset.copy()
    print(df_prophet.columns)
    df_prophet.rename(columns={"dt": "ds", "price_usd": "y"}, inplace=True)
    df_prophet['ds'] = df_prophet['ds'].dt.tz_localize(None)

    target = 'y'
    features = df_prophet[features].select_dtypes(include=[np.number]).columns

    train_df, test_df = train_test_split(df_prophet, test_size=0.2, shuffle=False, random_state=seed)

    model = Prophet()

    for feature in features:
        model.add_regressor(feature)

    model.fit(train_df)

    future = test_df[['ds']].copy()
    for feature in features:
        future[feature] = test_df[feature].values

    forecast = model.predict(future)

    y_true = test_df['y'].values
    y_pred = forecast['yhat'].values

    r2 = r2_score(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))

    print(f"R²: {r2}")
    print(f"MAE: {mae}")
    print(f"RMSE: {rmse}")

    return model, features

# Valuation Model

In [64]:
from sklearn.ensemble import RandomForestRegressor

In [65]:
class Domain_Valuator():
    def __init__(self, domain, X, y, features, features_data, seed):
        self.domain = domain
        self.features = features
        self.features_data = features_data
        self.model = None
        self.data = None
        self.X = X
        self.y = y
        self.seed = seed

    def model_prep(self):
        # Prepare the domain DataFrame
        domain_df = pd.DataFrame({'domain': [self.domain]})
        domain_df['domain_length'] = domain_df['domain'].apply(len)
        domain_df['num_vowels'] = domain_df['domain'].apply(lambda x: sum([1 for char in x if char in 'aeiou']))
        domain_df['num_consonants'] = domain_df['domain'].apply(lambda x: sum([1 for char in x if char.isalpha() and char not in 'aeiou']))
        domain_df['tld'] = domain_df['domain'].apply(lambda x: x.split('.')[-1])
        # domain_df['tld_weight'] = domain_df['tld'].map(self.tld_weights).fillna(self.default_tld_weight)

        # Include today’s date
        today = pd.Timestamp.now().normalize()
        print(f'Domain DataFrame Columns: {domain_df.columns}')
        print(f'Feature Data (latest entry): {self.features_data.iloc[-1]}')

        # Prepare features with missing ones from features_data
        missing_features = [feature for feature in self.features if feature not in domain_df.columns]
        print(f'domain df before adding features {domain_df}')
        print(f'domain df cols {domain_df.columns}')
        print(f'missing features {missing_features}')
        if missing_features:
            for feature in missing_features:
                if feature in self.features_data.columns:
                    domain_df[feature] = self.features_data[feature].iloc[-1]
                else:
                    raise ValueError(f"Feature {feature} is missing from features_data")

        # Ensure all features are present
        all_features = [feature for feature in self.features if feature != 'ds']
        domain_features = domain_df[all_features].iloc[0]
        print(f'domain features {domain_features}')

        self.data = pd.DataFrame({
            'ds': [today],
            **domain_features.to_dict()
        })

        print(f"Prepared data for prediction: {self.data}")
        print(f"Prepared col for prediction: {self.data.columns}")

    def value_domain(self, model):
        self.model = model
        print(f'data: {self.data}')
        domain_x = self.data[self.features]
        value = self.model.predict(domain_x)
        print(f'domain: {self.domain} \npredicted value: {value[0]}')
        return value[0]

In [66]:
class Prophet_Domain_Valuator():
    def __init__(self, domain, features, features_data):
        self.domain = domain
        self.features = features
        self.features_data = features_data
        self.data = None

    def model_prep(self):
        # Prepare the domain DataFrame
        domain_df = pd.DataFrame({'domain': [self.domain]})
        domain_df['domain_length'] = domain_df['domain'].apply(len)
        domain_df['num_vowels'] = domain_df['domain'].apply(lambda x: sum([1 for char in x if char in 'aeiou']))
        domain_df['num_consonants'] = domain_df['domain'].apply(lambda x: sum([1 for char in x if char.isalpha() and char not in 'aeiou']))
        domain_df['tld'] = domain_df['domain'].apply(lambda x: x.split('.')[-1])

        # Include today’s date
        today = pd.Timestamp.now().normalize()
        print(f'Domain DataFrame Columns: {domain_df.columns}')
        print(f'Feature Data (latest entry): {self.features_data.iloc[-1]}')

        # Prepare features with missing ones from features_data
        missing_features = [feature for feature in self.features if feature not in domain_df.columns]
        if missing_features:
            for feature in missing_features:
                if feature in self.features_data.columns:
                    domain_df[feature] = self.features_data[feature].iloc[-1]
                else:
                    raise ValueError(f"Feature {feature} is missing from features_data")

        # Ensure all features are present
        all_features = [feature for feature in self.features if feature != 'ds']
        domain_features = domain_df[all_features].iloc[0]

        self.data = pd.DataFrame({
            'ds': [today],
            **domain_features.to_dict()
        })

        print(f"Prepared data for prediction: {self.data}")

    def value_domain(self, model):
        self.model = model
        # Ensure the feature data includes the latest information
        future = self.data.copy()
        for feature in self.features:
            if feature not in future.columns:
                if feature in self.features_data.columns:
                    future[feature] = self.features_data[feature].iloc[-1]
                else:
                    raise ValueError(f"Feature {feature} is missing from features_data")

        # Predict using the fitted model
        forecast = self.model.predict(future)
        value = forecast['yhat'].values[0]
        print(f'Domain: {self.domain} \nPredicted value: {value}')
        return value

In [67]:
prophet_model, prophet_features = train_prophet_model(prophet_features)

Index(['dt', 'domain', 'price_usd', '7d_rolling_avg_price',
       '30d_rolling_avg_price', '7d_sales_volume', '30d_sales_volume',
       'cumulative_rolling_avg_price', '7d_domains_sold', '30d_domains_sold',
       '60d_domains_sold', '90d_domains_sold', '7d_rolling_std_dev',
       '30d_rolling_std_dev', '7d_rolling_median_price',
       '30d_rolling_median_price', 'cumulative_sum_sales_volume',
       'domain_length', 'num_vowels', 'num_consonants', 'tld'],
      dtype='object')


09:56:58 - cmdstanpy - INFO - Chain [1] start processing
09:59:15 - cmdstanpy - INFO - Chain [1] done processing


R²: 0.14339222499722082
MAE: 19664.848925905786
RMSE: 838587.039188687


In [68]:
ridge_model, ridge_features = train_ridge_model(X, y)

MAE: 5199.8114161640615
MSE: 13443190679.518866
R²: -0.00011991461091431788


In [69]:
randomforest_model, random_forest_features = train_randomforest_model(X, y)

MAE: 5822.157495115425
MSE: 136586595190.82391
R²: -9.16149939220615


In [70]:
from sklearn.ensemble import VotingRegressor

class EnsemblePredictor:
    def __init__(self, prophet_model, rf_model, ridge_model, features):
        self.prophet_model = prophet_model
        self.rf_model = rf_model
        self.ridge_model = ridge_model
        self.features = features

    def predict(self, X, df_prophet):
        # Prepare the input for Prophet
        future = X[['ds']].copy()
        for feature in self.features:
            future[feature] = X[feature].values

        forecast = self.prophet_model.predict(future)
        prophet_preds = forecast['yhat'].values

        # Prepare the input for RandomForest and Ridge
        X_rf = X.drop(columns=['ds'])  # Drop 'ds' for RF and Ridge
        rf_preds = self.rf_model.predict(X_rf)
        ridge_preds = self.ridge_model.predict(X_rf)

        # Create a DataFrame to store predictions
        predictions = pd.DataFrame({
            'prophet': prophet_preds,
            'rf': rf_preds,
            'ridge': ridge_preds
        })

        # Aggregate predictions (mean of predictions)
        predictions['ensemble'] = predictions.mean(axis=1)

        # Return the latest prediction
        latest_prediction = predictions.iloc[-1]['ensemble']
        predictions = predictions['ensemble']
        return latest_prediction, predictions

In [71]:
domain = 'env.eth'

In [72]:
# Ensure features_data is up to date and correctly formatted
prophet_features_data = combined_dataset.copy()
prophet_features_data.rename(columns={"dt": "ds", "price_usd": "y"}, inplace=True)
# prophet_features_data['ds'] = pd.to_datetime(features_data['ds']).dt.tz_localize(None)  # Remove timezone if present

# Initialize the Domain_Valuator
prophet_valuator = Prophet_Domain_Valuator(domain, prophet_features, prophet_features_data)

# Prepare the model and get the domain value
prophet_valuator.model_prep()
prophet_domain_value = prophet_valuator.value_domain(prophet_model)

Domain DataFrame Columns: Index(['domain', 'domain_length', 'num_vowels', 'num_consonants', 'tld'], dtype='object')
Feature Data (latest entry): ds                              2024-08-03 17:00:00+00:00
domain                             drivenspyder.chain.box
y                                                    1.33
7d_rolling_avg_price                                55.22
30d_rolling_avg_price                              202.53
7d_sales_volume                                    386.56
30d_sales_volume                                 6,075.89
cumulative_rolling_avg_price                     2,995.92
7d_domains_sold                                      7.00
30d_domains_sold                                    30.00
60d_domains_sold                                    60.00
90d_domains_sold                                    90.00
7d_rolling_std_dev                                 132.07
30d_rolling_std_dev                                616.25
7d_rolling_median_price                    

In [73]:
# Right now doesnt work w/ .com
features_data = combined_dataset.copy()
features_data['dt'] = features_data['dt'].dt.tz_localize(None)
features_data = features_data[features]
ridge_valuator = Domain_Valuator(domain, X, y, gen_features, features_data, seed)

# Prepare the model and get the domain value
ridge_valuator.model_prep()
ridge_domain_value = ridge_valuator.value_domain(ridge_model)

Domain DataFrame Columns: Index(['domain', 'domain_length', 'num_vowels', 'num_consonants', 'tld'], dtype='object')
Feature Data (latest entry): dt                                 2024-08-03 17:00:00
domain                          drivenspyder.chain.box
7d_rolling_avg_price                             55.22
30d_rolling_avg_price                           202.53
7d_sales_volume                                 386.56
30d_sales_volume                              6,075.89
cumulative_rolling_avg_price                  2,995.92
7d_domains_sold                                   7.00
30d_domains_sold                                 30.00
60d_domains_sold                                 60.00
90d_domains_sold                                 90.00
7d_rolling_std_dev                              132.07
30d_rolling_std_dev                             616.25
7d_rolling_median_price                           1.33
30d_rolling_median_price                          5.98
cumulative_sum_sales_volume   

In [74]:
# Right now doesnt work w/ .com
features_data = combined_dataset.copy()
features_data['dt'] = features_data['dt'].dt.tz_localize(None)
features_data = features_data[features]
randomforest_valuator = Domain_Valuator(domain, X, y, gen_features, features_data, seed)

# Prepare the model and get the domain value
randomforest_valuator.model_prep()
randomforest_domain_value = randomforest_valuator.value_domain(randomforest_model)

Domain DataFrame Columns: Index(['domain', 'domain_length', 'num_vowels', 'num_consonants', 'tld'], dtype='object')
Feature Data (latest entry): dt                                 2024-08-03 17:00:00
domain                          drivenspyder.chain.box
7d_rolling_avg_price                             55.22
30d_rolling_avg_price                           202.53
7d_sales_volume                                 386.56
30d_sales_volume                              6,075.89
cumulative_rolling_avg_price                  2,995.92
7d_domains_sold                                   7.00
30d_domains_sold                                 30.00
60d_domains_sold                                 60.00
90d_domains_sold                                 90.00
7d_rolling_std_dev                              132.07
30d_rolling_std_dev                             616.25
7d_rolling_median_price                           1.33
30d_rolling_median_price                          5.98
cumulative_sum_sales_volume   

In [75]:
individual_predictions = [
    prophet_domain_value,
    ridge_domain_value,
    randomforest_domain_value
]

In [76]:
ensemble_domain_value = np.mean(individual_predictions)

print(f"Ensemble Domain Value: {ensemble_domain_value}")
print(prophet_domain_value)
print(ridge_domain_value)
print(randomforest_domain_value)

Ensemble Domain Value: 7613.723275549994
13098.136748628154
7706.23524447116
2036.7978335506675


In [77]:
X['dt'] = X['dt'].dt.tz_localize(None)
X = X.rename(columns={'dt':'ds'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['dt'] = X['dt'].dt.tz_localize(None)


In [78]:
ensemble_predictor = EnsemblePredictor(prophet_model, randomforest_model, ridge_model, prophet_features)
ensemble_domain_value, ensemble_predictions = ensemble_predictor.predict(X, combined_dataset)
print(ensemble_domain_value)

MemoryError: Unable to allocate 2.94 GiB for an array with shape (1000, 394152) and data type float64

In [None]:
print(ensemble_domain_value)
print(prophet_domain_value)
print(ridge_domain_value)
print(randomforest_domain_value)

3868.0653204993564
4912.311237478739
7764.412171233063
515.328128839416


In [None]:
# ensemble_predictor = EnsemblePredictor(prophet_model, randomforest_model, ridge_model, prophet_features)

# Prepare data for prediction (X should be in the correct format for Prophet)
X = combined_dataset.copy()  # Ensure this DataFrame is correctly formatted
X['dt'] = X['dt'].dt.tz_localize(None)
X.rename(columns={'dt':'ds'}, inplace=True)

# Predict using the ensemble
# test_ensemble_predictions = ensemble_predictor.predict(X, combined_dataset)

y_true = combined_dataset['price_usd'].values
r2 = r2_score(y_true, ensemble_predictions)
mae = mean_absolute_error(y_true, ensemble_predictions)
rmse = np.sqrt(mean_squared_error(y_true, ensemble_predictions))

print(f"Ensemble R²: {r2}")
print(f"Ensemble MAE: {mae}")
print(f"Ensemble RMSE: {rmse}")

Ensemble R²: 0.21678373713201482
Ensemble MAE: 4873.568683392907
Ensemble RMSE: 365309.8801858052
