#### Imports

In [20]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline

from sklearn.svm import LinearSVR
from sklearn.svm import SVR

In [2]:
df = pd.read_csv('./data/model_data_v1.csv')
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values(by='date')
df.shape

(19784, 58)

In [3]:
df.head(3)

Unnamed: 0,date,token_id,payment,buyer,seller,price,priceUsd,1/1,4:20 watch,background,...,shirt_score,short_hair_score,smoke_score,type_score,rarity_score,1d_avg_price,7d_avg_price,30d_avg_price,average_price,floor_price
0,2021-11-30 21:27:04+00:00,367,ETH,0xa76ae53049e3c5f8f30954f804da503b15040cc1,0xb3d6dbc54ed91bcc84da7871fc9e146e7f13c496,1.0,4650.538658,0,sub lantern (green),graveyard,...,33.112583,0.0,2.677376,112.359551,536.950704,3.235565,4.122053,2.510842,1.147349,2.79
1,2021-11-30 21:29:41+00:00,5039,ETH,0xde06ffb76c61b51d11aa593b5fe175a59d43e78f,0x6a61925dcdf27d8b28c11ec76228b4195a978069,0.69,3208.871674,0,sub lantern (green),orange,...,18.14882,0.0,2.624672,1.690331,247.762786,3.235565,4.122053,2.510842,1.147349,2.79
2,2021-11-30 21:32:17+00:00,5172,ETH,0x11f8704d371012c691e786341f8f9ab20d63e629,0xd15b046342bc19ea520254b0c16c6a3fcaa372b1,0.46,2139.247783,0,0,blue,...,0.0,0.0,13.386881,1.690331,32.150504,3.235565,4.122053,2.510842,1.147349,2.79


In [11]:
df_lim = pd.read_csv('./data/model_single_txns.csv')
df_lim['date'] = pd.to_datetime(df_lim['date'])
df_lim = df_lim.sort_values(by='date')
df_lim.shape

(8983, 58)

In [5]:
df_lim.head(3)

Unnamed: 0,date,token_id,payment,buyer,seller,price,priceUsd,1/1,4:20 watch,background,...,shirt_score,short_hair_score,smoke_score,type_score,rarity_score,1d_avg_price,7d_avg_price,30d_avg_price,average_price,floor_price
0,2021-11-30 21:33:22+00:00,5150,ETH,0xcc1eab8d0fc6313780a86d9713396307ea6a0416,0xdd74a5f502167c1bdd88968d058834170175277b,0.399,1855.564924,0,sub black,orange,...,0.0,0.0,2.677376,2.526529,36.700389,3.235565,4.122053,2.510842,1.147349,2.79
1,2021-11-30 21:33:55+00:00,6533,ETH,0xba9f8f95481cd383868721a856c00b8a8f35a3d3,0x7c121489e50e6672bbb59cc4c3fc86eaa8fb8364,0.35,1627.68853,0,sub red,orange,...,0.0,0.0,2.624672,1.690331,42.985527,3.235565,4.122053,2.510842,1.147349,2.79
2,2021-11-30 21:34:02+00:00,6816,ETH,0xd6a4eb571e6812d72f86076e9d2f902fef288278,0x60cd5cd13e7be6f92cb1e13867fa2d4212531256,0.39,1813.710077,0,sub lantern (green),orange,...,0.0,0.0,2.677376,2.526529,44.464909,3.235565,4.122053,2.510842,1.147349,2.79


In [9]:
score_cols = [
    "1of1_score","watch_score","background_score",
    "beard_score","chain_score","eyes_score",
    "hat_over_headphones_score","hat_under_headphones_score","headphones_score",
    "long_hair_score","mouth_score","shirt_score",
    "short_hair_score","smoke_score","type_score","rarity_score"
]

rarity_cols = [
    "1of1_rarity","watch_rarity","background_rarity",
    "beard_rarity","chain_rarity","eyes_rarity",
    "hat_over_headphones_rarity","hat_under_headphones_rarity","headphones_rarity",
    "long_hair_rarity","mouth_rarity","shirt_rarity","short_hair_rarity","smoke_rarity"
]

feature_cols = [
    '1/1','4:20 watch','background','beard', 'chain', 'eyes','hat over headphones','hat under headphones','headphones','long hair','mouth','shirt','short hair','smoke','type'
]

non_cols = ['date', 'token_id', 'payment', 'buyer', 'seller', 'price', 'priceUsd']

In [10]:
#Parameter Tuning: GridSearchCV w/SVR model

X = df.drop(columns=non_cols+feature_cols+rarity_cols)
y = df['priceUsd']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

z=StandardScaler()
Z_train = z.fit_transform(X_train)
Z_test = z.transform(X_test)

params={
    "C":[0.01,0.1,0.5,1.0],
    'gamma':[1,0.1,0.01,0.001],
    'kernel':['linear','rbf', 'poly'],
}
gs=GridSearchCV(
    SVR(),
    params,
    verbose=2
)
gs.fit(Z_train, y_train)
gs.score(Z_train, y_train), gs.score(Z_test, y_test)

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV] END .....................C=0.01, gamma=1, kernel=linear; total time=   8.8s
[CV] END .....................C=0.01, gamma=1, kernel=linear; total time=   8.8s
[CV] END .....................C=0.01, gamma=1, kernel=linear; total time=   8.8s
[CV] END .....................C=0.01, gamma=1, kernel=linear; total time=   8.5s
[CV] END .....................C=0.01, gamma=1, kernel=linear; total time=   8.5s
[CV] END ........................C=0.01, gamma=1, kernel=rbf; total time=  16.5s
[CV] END ........................C=0.01, gamma=1, kernel=rbf; total time=  16.5s
[CV] END ........................C=0.01, gamma=1, kernel=rbf; total time=  16.5s
[CV] END ........................C=0.01, gamma=1, kernel=rbf; total time=  16.3s
[CV] END ........................C=0.01, gamma=1, kernel=rbf; total time=  17.0s
[CV] END .......................C=0.01, gamma=1, kernel=poly; total time=   9.7s
[CV] END .......................C=0.01, gamma=1

(0.12576304522415271, -0.028618686485174294)

In [12]:
gs.best_estimator_

SVR(C=0.01, gamma=1, kernel='poly')

-----

In [15]:
dfv2 = pd.read_csv('./data/model_data_v2.csv')
dfv2.head()

Unnamed: 0,date,token_id,payment,buyer,seller,price,priceUsd,1/1,4:20 watch,background,...,30d_avg_price,average_price,floor_price,last_10_avg,last_10_max,last_10_min,last_25_avg,last_25_max,last_25_min,last_sale_price
0,2021-11-30 21:35:15+00:00,1848,ETH,0xdd6d3324c75a5ae1480ef9f3394702dc28adb65e,0x97da20dc2bd46174da79d71a385b7248527fbee6,0.349,1623.037992,0,sub rose,blue,...,2.510842,1.147349,2.79,1833.707393,2325.269329,1581.183144,2232.053932,4650.538658,1581.183144,0.34
1,2021-11-30 21:35:15+00:00,6531,ETH,0xc2690edde996e5c56c9b7fafcdbdf257a780446b,0x7c121489e50e6672bbb59cc4c3fc86eaa8fb8364,0.35,1627.68853,0,sub black,blue,...,2.510842,1.147349,2.79,1819.755777,2325.269329,1581.183144,2111.139927,4418.011725,1581.183144,0.349
2,2021-11-30 21:35:15+00:00,4147,ETH,0xa7856843b4298a55a23a902ced79cc63c880d0d7,0x9e9b7dc5a0cfa97e3cc984d836d969ac13746951,0.385,1790.457383,0,sub red,red,...,2.510842,1.147349,2.79,1792.317599,2325.269329,1581.183144,2054.403355,4418.011725,1581.183144,0.35
3,2021-11-30 21:35:15+00:00,6176,ETH,0xd6a4eb571e6812d72f86076e9d2f902fef288278,0xf50131d7d2b5239fe1e934658fe3f6131532a437,0.45,2092.742396,0,0,blue,...,2.510842,1.147349,2.79,1769.064905,2325.269329,1581.183144,2052.54314,4418.011725,1581.183144,0.385
4,2021-11-30 21:35:15+00:00,7061,ETH,0x2ab5cf2e5b79a96bf70302de6f799c80e2a4b7cf,0xdf3fb2a51568f84c2fbafea5f8ad039b8d931b12,0.4,1860.215463,0,0,green,...,2.510842,1.147349,2.79,1785.341791,2325.269329,1581.183144,1998.596891,4418.011725,1581.183144,0.45


In [17]:
#fitting SVR on v2 model data with higher performing features

X = dfv2.drop(columns=non_cols+feature_cols+rarity_cols)
y = dfv2['priceUsd']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

z=StandardScaler()
Z_train = z.fit_transform(X_train)
Z_test = z.transform(X_test)

svr = LinearSVR()
svr.fit(Z_train, y_train)
svr.score(Z_train, y_train), svr.score(Z_test, y_test)

(0.742385600207627, 0.6747116404603739)

In [21]:
pipe=Pipeline([
    ('ss', StandardScaler()),
    ('svr', LinearSVR())
])

cross_val_score(pipe, X, y, cv=5)

array([0.68274468, 0.58441122, 0.60738359, 0.49946569, 0.4325298 ])

In [22]:
#GridSearchCV w/SVR model on dfv2 data

X = dfv2.drop(columns=non_cols+feature_cols+rarity_cols)
y = dfv2['priceUsd']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

z=StandardScaler()
Z_train = z.fit_transform(X_train)
Z_test = z.transform(X_test)

params={
    "C":[0.001,0.01,0.1],
    'gamma':[1,0.1,0.01],
    'kernel':['linear','rbf', 'poly'],
}
gs=GridSearchCV(
    SVR(),
    params,
    verbose=2
)
gs.fit(Z_train, y_train)
gs.score(Z_train, y_train), gs.score(Z_test, y_test)

Fitting 5 folds for each of 27 candidates, totalling 135 fits
[CV] END ....................C=0.001, gamma=1, kernel=linear; total time=   7.1s
[CV] END ....................C=0.001, gamma=1, kernel=linear; total time=   7.0s
[CV] END ....................C=0.001, gamma=1, kernel=linear; total time=   7.0s
[CV] END ....................C=0.001, gamma=1, kernel=linear; total time=   6.9s
[CV] END ....................C=0.001, gamma=1, kernel=linear; total time=   6.9s
[CV] END .......................C=0.001, gamma=1, kernel=rbf; total time=  12.5s
[CV] END .......................C=0.001, gamma=1, kernel=rbf; total time=  12.4s
[CV] END .......................C=0.001, gamma=1, kernel=rbf; total time=  12.4s
[CV] END .......................C=0.001, gamma=1, kernel=rbf; total time=  12.4s
[CV] END .......................C=0.001, gamma=1, kernel=rbf; total time=  12.5s
[CV] END ......................C=0.001, gamma=1, kernel=poly; total time=   8.3s
[CV] END ......................C=0.001, gamma=1

(0.8625250995842414, 0.8632503439566752)

In [24]:
gs.best_estimator_

SVR(C=0.01, gamma=1, kernel='poly')