In [1]:
import pandas as pd
import numpy as np
import random
import os
import sys
import requests
import time
import datetime as dt


from dotenv import load_dotenv
from flipside import Flipside
from prophet import Prophet

from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor

# from sql_queries.sql_scripts import eth_price

In [2]:
Optimistic_Domains_Sales_query = """
  SELECT
      DATE_TRUNC('HOUR', BLOCK_TIMESTAMP) AS day, tokenid, price
    FROM
      optimism.nft.ez_nft_sales
    WHERE
      NFT_ADDRESS = LOWER('0xC16aCAdf99E4540E6f4E6Da816fd6D2A2C6E1d4F')
      AND event_type = 'sale'

"""

eth_price = """

  select
    hour as dt,
    symbol,
    price
  from
    ethereum.price.ez_prices_hourly
  where
    symbol in('WETH', 'MATIC')
    AND date_trunc('day', dt) >= '	2022-06-01'
  order by
    dt DESC

"""

In [3]:
os.chdir('..')
print(os.getcwd())

e:\Projects\liquid_domains


In [4]:
from scripts.utils import flipside_api_results, set_random_seed
from scripts.pull_data import pull_data
from scripts.data_processing import process_data

In [5]:
os.chdir('Notebooks')

In [6]:
pd.options.display.float_format = '{:,.2f}'.format

In [7]:
current_directory = os.getcwd()
current_directory

'e:\\Projects\\liquid_domains\\Notebooks'

In [8]:
load_dotenv()

True

In [9]:
seed = 20
set_random_seed(seed)

In [10]:
flipside_api_key = os.getenv('FLIPSIDE_API_KEY')
alchemy_api_key = os.getenv('ALCHEMY_API_KEY')
opensea_api_key = os.getenv('OPENSEA_API_KEY')

# print(alchemy_api_key)

In [11]:
api = False
web2_data = True #Includes web2 data in training
threshold = None #Correlation value for correlation analysis
temporals = True #Tends to inflate estimation
fine_tuning_web3 = True #Train again on web3 data only
correlation_analysis = False
last_dataset = True

# To preview web2 domain data; Kaggle dataset and namebio dataset

domain_path = '../data/domain-name-sales.tsv'  
domain_data = pd.read_csv(domain_path, delimiter='\t')

domain_data.set_index('date', inplace=True)
domain_data = domain_data.drop(columns=['venue'])
domain_data.sort_index(inplace=True)
domain_data

domain_data = domain_data.reset_index()
domain_data = domain_data.rename(columns={"date":"dt","price":"price_usd"})
domain_data['dt'] = pd.to_datetime(domain_data['dt'])
domain_data['dt'] = domain_data['dt'].dt.tz_localize('UTC')
domain_data['dt'] = pd.to_datetime(domain_data['dt'])

domain_data['web3'] = False

namebio_path = '../data/namebio_sales.csv'
namebio_data = pd.read_csv(namebio_path)
namebio_data.set_index('Date', inplace=True)
namebio_data = namebio_data.drop(columns=['Venue'])
namebio_data.sort_index(inplace=True)

namebio_data = namebio_data.reset_index()
namebio_data = namebio_data.rename(columns={"Date":"dt","Price":"price_usd","Domain":"domain"})
namebio_data['dt'] = pd.to_datetime(namebio_data['dt'])
namebio_data['dt'] = namebio_data['dt'].dt.tz_localize('UTC')
namebio_data['dt'] = pd.to_datetime(namebio_data['dt'])

namebio_data['web3'] = False

domain_data = pd.concat([domain_data,namebio_data],ignore_index=True)

In [None]:
X, y, prophet_features, gen_features, target, combined_dataset, features, web3_data, X_web3, y_web3 = process_data(api=api,web2_data=web2_data,threshold=threshold,temporals=temporals,correlation_analysis=correlation_analysis)

starting process_data
6AUlaGmWe505S7gRPZXVh4YEFgJdYHy5
Loading Existing Data...
--- ens ---
Head:
  event_type                                         order_hash     chain  \
0       sale  0x61648021e425cf8f4894b69f61a404939ce4b4f4baeb...  ethereum   
1       sale                                                     ethereum   
2       sale  0xfba3e32b0bbf57a27d1fe4a07fbd60a82e2e046b01d1...  ethereum   
3       sale                                                     ethereum   
4       sale                                                     ethereum   

                             protocol_address  closing_date  \
0  0x0000000000000068f116a894984e2db1123eb395    1728693023   
1                                                1728691007   
2  0x0000000000000068f116a894984e2db1123eb395    1728681671   
3                                                1728677843   
4                                                1728666623   

                                                 nft  quanti

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  optimism_name_service_data['dt'] = pd.to_datetime(optimism_name_service_data['dt'], unit='ms')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Three_DNS_data['dt'] = pd.to_datetime(Three_DNS_data['dt'], unit='ms')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ens_data['dt'] = pd.to_datetime(ens_dat

standardized time
prices data dt: 0       2022-06-01 00:00:00+00:00
1       2022-06-01 01:00:00+00:00
2       2022-06-01 02:00:00+00:00
3       2022-06-01 03:00:00+00:00
4       2022-06-01 04:00:00+00:00
                   ...           
20745   2024-10-12 09:00:00+00:00
20746   2024-10-12 10:00:00+00:00
20747   2024-10-12 11:00:00+00:00
20748   2024-10-12 12:00:00+00:00
20749   2024-10-12 13:00:00+00:00
Name: dt, Length: 20750, dtype: datetime64[ns, UTC]
optimistic_data:                          dt  nft_identifier  price  price_usd  __row_index  \
0   2022-06-03T04:00:00.000Z             482   0.01      27.47            0   
1   2022-06-04T02:00:00.000Z             549   0.05      88.79            1   
2   2022-06-04T02:00:00.000Z             550   0.01      26.64            2   
3   2022-06-06T14:00:00.000Z             576   0.05      95.46            3   
4   2022-06-19T00:00:00.000Z             787   0.02      19.68            4   
5   2022-07-13T06:00:00.000Z             902   0.0

In [1]:
combined_dataset

NameError: name 'combined_dataset' is not defined

In [13]:
combined_dataset[combined_dataset['word_count']>1]


Unnamed: 0,dt,domain,price_usd,web3,domain_length,num_vowels,num_consonants,tld,word_count,has_numbers,tld_length,is_brandable,levenshtein_distance,is_subdomain,domain_entropy,rank,tld_weight
15,2020-01-11 18:00:00+00:00,reseller-test-paul00.crypto,0.15,True,27,7,15,crypto,3,True,6,0,12,0,3.63,unranked,1
167,2020-03-21 21:00:00+00:00,dark-market.crypto,13.19,True,18,4,12,crypto,2,False,6,0,3,0,3.57,unranked,1
168,2020-03-21 22:00:00+00:00,alt-coin.crypto,20.52,True,15,4,9,crypto,2,False,6,1,3,0,3.51,unranked,1
217,2020-03-31 20:00:00+00:00,satoshi-lottery.crypto,79.89,True,22,6,14,crypto,2,False,6,0,5,0,3.61,unranked,1
298,2020-04-08 06:00:00+00:00,awcdik-dik.crypto,0.86,True,17,4,11,crypto,2,False,6,0,6,0,3.62,unranked,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45373,2024-07-24 19:00:00+00:00,girls-on.chain.box,2.69,True,18,5,10,box,2,False,3,0,4,1,3.73,unranked,1
45399,2024-07-25 14:00:00+00:00,memecoins-on.chain.box,1.20,True,22,8,11,box,2,False,3,1,6,1,3.57,unranked,1
45474,2024-07-26 17:00:00+00:00,e-transaction.eth,2.61,True,17,6,9,eth,2,False,3,0,5,0,3.45,unranked,1
45729,2024-07-31 05:00:00+00:00,nintendo-pictures.eth,3.29,True,21,7,12,eth,2,False,3,0,9,0,3.62,unranked,1


In [14]:
combined_dataset[combined_dataset['rank']!='unranked']


Unnamed: 0,dt,domain,price_usd,web3,domain_length,num_vowels,num_consonants,tld,word_count,has_numbers,tld_length,is_brandable,levenshtein_distance,is_subdomain,domain_entropy,rank,tld_weight


In [15]:
domain_rankings = pd.read_csv('../data/tranco_5863N.csv')
google_rank = pd.DataFrame({'rank': [1], 'domain': ['google.com']})
domain_rankings.columns = ['rank','domain']

# Concatenate the new row with the original domain rankings
domain_rankings = pd.concat([google_rank, domain_rankings], ignore_index=True)

# Reset the index and display the updated rankings
domain_rankings.reset_index(drop=True, inplace=True)

domain_rankings['domain'].head(500).values

array(['google.com', 'amazonaws.com', 'microsoft.com', 'facebook.com',
       'akamai.net', 'apple.com', 'root-servers.net', 'a-msedge.net',
       'youtube.com', 'googleapis.com', 'azure.com', 'akamaiedge.net',
       'twitter.com', 'cloudflare.com', 'instagram.com', 'gstatic.com',
       'office.com', 'linkedin.com', 'live.com', 'gtld-servers.net',
       'tiktokcdn.com', 'googletagmanager.com', 'googlevideo.com',
       'akadns.net', 'windowsupdate.com', 'googleusercontent.com',
       'doubleclick.net', 'fbcdn.net', 'microsoftonline.com',
       'amazon.com', 'trafficmanager.net', 'wikipedia.org', 'bing.com',
       'l-msedge.net', 'apple-dns.net', 'fastly.net', 'office.net',
       'googlesyndication.com', 'mail.ru', 'icloud.com', 'wordpress.org',
       'sharepoint.com', 't-msedge.net', 'youtu.be', 'github.com',
       'netflix.com', 'aaplimg.com', 'whatsapp.net', 'domaincontrol.com',
       'pinterest.com', 'yahoo.com', 'digicert.com', 's-msedge.net',
       'appsflyersdk.com', 

# Sales

In [16]:
from models.model import train_model



domain_rankings = pd.read_csv('../data/tranco_5863N.csv')

domain_rankings.columns

google_rank = pd.DataFrame({'rank': [1], 'domain': ['google.com']})

domain_rankings.columns = ['rank', 'domain']


# Concatenate the new row with the original domain rankings
domain_rankings = pd.concat([google_rank, domain_rankings], ignore_index=True)
# Reset the index and display the updated rankings
domain_rankings.reset_index(drop=True, inplace=True)
domain_rankings

results = train_model(X, y, prophet_features, gen_features, target, combined_dataset, features, web3_data, X_web3, y_web3, seed=seed, web3=fine_tuning_web3 )

In [18]:
# os.chdir('..')
print(os.getcwd())

e:\Projects\liquid_domains\Notebooks


os.chdir('data')
print(os.getcwd())

# Valuation Model

In [19]:
from sklearn.ensemble import RandomForestRegressor
import joblib

In [20]:
prophet_model = joblib.load('../pkl/prophet_model.pkl')
ridge_model = joblib.load('../pkl/ridge_model.pkl')
randomforest_model = joblib.load('../pkl/randomforest_model.pkl')

In [21]:
from models.forecasters import Prophet_Domain_Valuator, Domain_Valuator

In [22]:
prophet_features

Index(['domain', 'domain_entropy', 'domain_length', 'has_numbers',
       'is_brandable', 'is_subdomain', 'levenshtein_distance',
       'num_consonants', 'num_vowels', 'rank', 'tld', 'tld_length',
       'tld_weight', 'web3', 'word_count'],
      dtype='object')

In [23]:
def main(domain, prophet_model, ridge_model, randomforest_model, combined_dataset, prophet_features, gen_features, features, X, y,seed=seed ):
    set_random_seed(seed)

    prophet_features_data = combined_dataset.copy()
    prophet_features_data.rename(columns={"dt": "ds", "price_usd": "y"}, inplace=True)

    prophet_valuator = Prophet_Domain_Valuator(domain, prophet_features, prophet_features_data)
    prophet_valuator.model_prep()
    prophet_domain_value = prophet_valuator.value_domain(prophet_model)

    features_data = combined_dataset.copy()
    features_data['dt'] = features_data['dt'].dt.tz_localize(None)
    features_data = features_data[features] 

    ridge_valuator = Domain_Valuator(domain, X, y, gen_features, features_data, seed)
    ridge_valuator.model_prep()
    ridge_domain_value = ridge_valuator.value_domain(ridge_model)

    randomforest_valuator = Domain_Valuator(domain, X, y, gen_features, features_data, seed)
    randomforest_valuator.model_prep()
    randomforest_domain_value = randomforest_valuator.value_domain(randomforest_model)

    individual_predictions = [
        prophet_domain_value,
        ridge_domain_value,
        randomforest_domain_value
    ]

    ensemble_domain_value = np.median(individual_predictions)
    print(f'individual valuations: {individual_predictions}')
    print(f'ensamble value: {ensemble_domain_value}')

    if ensemble_domain_value < 0:
        print(f'Defaulting from Negative to $0')
        ensemble_domain_value = 0


    return ensemble_domain_value

In [24]:
domain = 'google.com'

In [25]:
combined_dataset[combined_dataset['rank']!='unranked']

Unnamed: 0,dt,domain,price_usd,web3,domain_length,num_vowels,num_consonants,tld,word_count,has_numbers,tld_length,is_brandable,levenshtein_distance,is_subdomain,domain_entropy,rank,tld_weight


In [26]:
value = main(domain=domain, prophet_model=prophet_model, ridge_model=ridge_model, randomforest_model=randomforest_model, combined_dataset=combined_dataset, prophet_features=prophet_features, gen_features=gen_features, features=features, X=X, y=y)

Domain DataFrame Columns: Index(['domain', 'domain_length', 'num_vowels', 'num_consonants', 'tld'], dtype='object')
Feature Data (latest entry): ds                      2024-08-03 17:00:00+00:00
domain                     drivenspyder.chain.box
y                                            1.33
web3                                         True
domain_length                                  22
num_vowels                                      6
num_consonants                                 14
tld                                           box
word_count                                      1
has_numbers                                 False
tld_length                                      3
is_brandable                                    0
levenshtein_distance                            6
is_subdomain                                    1
domain_entropy                               3.91
rank                                     unranked
tld_weight                                      1
Name:

from sklearn.model_selection import GridSearchCV

# Define TLD weight grid
param_grid = {
    'tld_weight': [{'com': w_com, 'net': w_net, 'org': w_org, 'eth': w_eth}
                   for w_com in [1.0, 1.5, 2.0]
                   for w_net in [0.8, 1.2, 1.5]
                   for w_org in [1.0, 1.5, 2.0]
                   for w_eth in [1.0, 2.0, 3.0]]
}

grid_search = GridSearchCV(estimator=model_pipeline, param_grid=param_grid, cv=5)
grid_search.fit(X, y)
best_weights = grid_search.best_params_


In [27]:
params = {
    "New API Data":api,
    "Used web2 Data":web2_data,
    "Used threshold for correlation value":threshold,
    "Used temporals":temporals,
    "Fine tuned on web3 data":fine_tuning_web3,
    "Domain":domain,
    "Ensamble Value":value
}

params_df = pd.DataFrame([params])

In [28]:
today = dt.datetime.today().strftime("%d-%m-%y-%H-%M-%S")

today = str(today)

# results_df = pd.DataFrame([results])

# combined_df = pd.concat([results_df, params_df], axis=1)

combined_df = params_df.copy()

combined_df.to_csv(f'../data/results_{today}.csv')

print(combined_df)

   New API Data  Used web2 Data Used threshold for correlation value  \
0         False           False                                 None   

   Used temporals  Fine tuned on web3 data      Domain  Ensamble Value  
0           False                    False  google.com           63.60  


In [29]:
liquidity_discount = 0.3 #If we want to discount 90%, we only take 10% of value 

discounted_value = value * liquidity_discount

liquidity_discount_filtered = 1 - liquidity_discount

print(f'Domain: {domain} \nValue: ${value:,.2f} \nLiquidity Discount: {liquidity_discount_filtered*100:.2f}% \nDiscounted Price w/ Liquidity Discount: ${discounted_value:,.2f}')

Domain: google.com 
Value: $63.60 
Liquidity Discount: 70.00% 
Discounted Price w/ Liquidity Discount: $19.08


In [105]:
subdomain_values = combined_dataset[combined_dataset['is_subdomain']==1]['domain'].unique()

In [111]:
subdomain_struct = enumerate(subdomain_values)


<enumerate at 0x25769238630>

for idx, val in enumerate(subdomain_values):
    print(f'val: {val}')

In [31]:
combined_dataset['tld'].unique()

array(['crypto', 'scottishcryptoloans', '888', 'x', 'coin', 'nft',
       'wallet', 'bitcoin', 'dao', 'op', 'blockchain', 'eth', 'eth ‚ö†Ô∏è',
       'Unknown ENS name', '',
       '12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220',
       'base', 'finance', 'xyz',
       'yoteprefierofueradefocoinalcanzableyoteprefieroirreversiblecasiintocabletusropascaenlentamentesoyunespiaunespectadoryelventiladordesgarrandotesequeteexcitapensarhastadondellegareesdificildecreercreoquenuncalopodresabersoloasiyotevereatravesdemipersianaamericanaesunacondenaa

In [81]:
def search(domain, tld, liquidity_discount=0.3):
    web3_tlds = ['.op', '.crypto', '.box', '.eth', '.base', '.nft', '.wallet', '.coin', '.finance', '.xyz', '.dao', '.bitcoin', '.blockchain']

    print(f'domain: {domain}')
    print(f'tld: {tld}')

    # Allow subdomains, so no error for dot in domain
    # Check if the TLD ends with any of the Web3 TLDs
    if not any(tld.endswith(web3_tld) for web3_tld in web3_tlds):
        raise ValueError('Error: Cannot Value Non-Web3 Domain')

    full_domain = domain + tld
    value = main(domain=full_domain, prophet_model=prophet_model, ridge_model=ridge_model, randomforest_model=randomforest_model, combined_dataset=combined_dataset, prophet_features=prophet_features, gen_features=gen_features, features=features, X=X, y=y)

    discounted_value = value * liquidity_discount
    liquidity_discount_filtered = 1 - liquidity_discount

    print(f'Domain: {full_domain} \nValue: ${value:,.2f} \nLiquidity Discount: {liquidity_discount_filtered*100:.2f}% \nDiscounted Price w/ Liquidity Discount: ${discounted_value:,.2f}')
    
    return value, discounted_value

    

In [102]:
domain='five'
tld='.super.eth'

In [103]:
search(domain,tld,liquidity_discount=0.05)

domain: five
tld: .super.eth
Domain DataFrame Columns: Index(['domain', 'domain_length', 'num_vowels', 'num_consonants', 'tld'], dtype='object')
Feature Data (latest entry): ds                      2024-08-03 17:00:00+00:00
domain                     drivenspyder.chain.box
y                                            1.33
web3                                         True
domain_length                                  22
num_vowels                                      6
num_consonants                                 14
tld                                           box
word_count                                      1
has_numbers                                 False
tld_length                                      3
is_brandable                                    0
levenshtein_distance                            6
is_subdomain                                    1
domain_entropy                               3.91
rank                                     unranked
tld_weight                

(np.float64(20.31117575691378), np.float64(1.015558787845689))

dcc.Input(
            id='valuator-input',
            value='example',
            type='text',
            style={
                'padding': '10px',
                'borderRadius': 'var(--wcm-input-border-radius)',
                'border': '1px solid var(--color-border)',
                'marginRight': '10px'
            },
            pattern='[^.]*'  # Regex pattern to disallow '.' character

Optimistic_Domains_Sales_query = """
  SELECT
  DATE_TRUNC('HOUR', BLOCK_TIMESTAMP) AS day,
  tokenid,
  price,
  price_usd
FROM
  optimism.nft.ez_nft_sales
WHERE
  NFT_ADDRESS = LOWER('0xC16aCAdf99E4540E6f4E6Da816fd6D2A2C6E1d4F')
  AND event_type = 'sale'
order by
  DATE_TRUNC('HOUR', BLOCK_TIMESTAMP) asc

"""

optimistic_domains_sales_path = '../data/optimistic_domains_sales.csv'

optimistic_domains_sales = flipside_api_results(Optimistic_Domains_Sales_query,flipside_api_key)
optimistic_domains_sales.to_csv(optimistic_domains_sales_path, index=False)

from scripts.apis import alchemy_metadata_api

prices_path = '../data/prices.csv'
Optimistic_domains_path = '../data/optimistic_domains_metadata.json'

prices = flipside_api_results(eth_price, flipside_api_key)
prices.to_csv(prices_path,index=False)

optimistic_domains = alchemy_metadata_api(alchemy_api_key, 'optimism', '0xC16aCAdf99E4540E6f4E6Da816fd6D2A2C6E1d4F')
optimistic_domains.to_json(Optimistic_domains_path, orient='records')