### Cryptopunk Price Regressor

In [69]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn_pandas import DataFrameMapper
from sklearn.preprocessing import LabelBinarizer
from sklearn.feature_extraction.text import CountVectorizer

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout

from gazpacho import Soup

Load everything:

In [7]:
adf = pd.read_csv("data/attributes.csv")
tdf = pd.read_csv("data/transactions.csv", parse_dates=[0])

Join and filter: 

In [8]:
sold = tdf[tdf['type'] == "Sold"]
sold90 = sold[(pd.Timestamp("now") - sold["date"]).dt.days <= 90]
sold90 = sold90[["id", "eth"]]
df = pd.merge(sold90, adf, on="id")
df = df.dropna()

Quick peek:

In [3]:
df.head()

Unnamed: 0,id,eth,attributes
0,285,29.99,Female + Dark Hair + Purple Lipstick
1,285,31.99,Female + Dark Hair + Purple Lipstick
2,329,22.95,Female + Bandana + Eye Patch
3,329,24.9,Female + Bandana + Eye Patch
4,486,39.0,Female + Straight Hair Blonde + Hot Lipstick


Split:

In [4]:
target = "eth"
y = df[target]
X = df.drop(target, axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

Transforms:

In [9]:
mapper = DataFrameMapper([
    ("attributes", [CountVectorizer(tokenizer=lambda x: x.split(" + ")), LabelBinarizer()])
], df_out=True)

In [10]:
mapper.fit(X_train)

DataFrameMapper(df_out=True,
                features=[('attributes',
                           [CountVectorizer(tokenizer=<function <lambda> at 0x7f8f5aee8700>),
                            LabelBinarizer()])])

In [15]:
attribute_order = mapper.features[0][1][0].get_feature_names()
print(attribute_order)

['3d glasses', 'alien', 'ape', 'bandana', 'beanie', 'big beard', 'big shades', 'black lipstick', 'blonde bob', 'blonde short', 'blue eye shadow', 'buck teeth', 'cap', 'cap forward', 'chinstrap', 'choker', 'cigarette', 'classic shades', 'clown eyes blue', 'clown eyes green', 'clown hair green', 'clown nose', 'cowboy hat', 'crazy hair', 'dark hair', 'do-rag', 'earring', 'eye mask', 'eye patch', 'fedora', 'female', 'front beard', 'front beard dark', 'frown', 'frumpy hair', 'goat', 'gold chain', 'green eye shadow', 'half shaved', 'handlebars', 'headband', 'hoodie', 'horned rim glasses', 'hot lipstick', 'knitted cap', 'luxurious beard', 'male', 'medical mask', 'messy hair', 'mohawk', 'mohawk dark', 'mohawk thin', 'mole', 'mustache', 'muttonchops', 'nerd glasses', 'normal beard', 'normal beard black', 'orange side', 'peak spike', 'pigtails', 'pilot helmet', 'pink with hat', 'pipe', 'police cap', 'purple eye shadow', 'purple hair', 'purple lipstick', 'red mohawk', 'regular shades', 'rosy chee

In [12]:
Z_train = mapper.fit_transform(X_train)
Z_test = mapper.transform(X_test)

In [16]:
# not strictly neccessary
Z_train.columns = attribute_order
Z_test.columns = attribute_order

In [17]:
Z_train.shape[1]

92

Simple Sequential Model:

In [18]:
model = Sequential([
    Input(shape=(Z_train.shape[1],)),
    Dense(50, activation='relu'),
    Dense(10, activation='relu'),
    Dropout(1/4),
    Dense(1, activation="linear")
])

In [19]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.MeanAbsolutePercentageError()
)

Checkpoint and Stopping callbacks:

In [20]:
filepath = "./models/punk_regressor_live"

callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=5),
    tf.keras.callbacks.ModelCheckpoint(
        filepath=filepath,
        save_weights_only=False,
        monitor='val_loss',
        mode='min',
        save_best_only=True
    )
]

In [None]:
model.fit(Z_train, y_train, batch_size=512, epochs=500, validation_data=(Z_test, y_test), callbacks=callbacks)

In [21]:
filepath = "./models/punk_regressor"

model = tf.keras.models.load_model(
    filepath, 
)

In [22]:
model.evaluate(Z_test, y_test)



[24.066831588745117, 7.762690544128418]

In [28]:
pd.DataFrame({
    "yhat": model.predict(Z_test).flatten(),
    "y": y_test
}).sample(20)

Unnamed: 0,yhat,y
2530,14.75148,22.22
922,19.964434,20.1
3182,21.140831,24.0
239,13.078527,24.88
2164,21.431362,18.3
527,22.653889,22.88
321,20.690598,26.5
2326,21.135101,14.0
2278,16.992584,17.5
1923,18.914083,21.99


In [74]:
def scrape(punk):
    url = f"https://www.larvalabs.com/cryptopunks/details/{punk}"
    soup = Soup.get(url)
    attributes = soup.find("a", {"href": "/cryptopunks/search?query"}, mode="list")
    attributes = [a.text for a in attributes]
    di = pd.DataFrame({"attributes": " + ".join(attributes)}, index=[0])
    return di

In [76]:
X_new = scrape(99)

In [79]:
Z_new = mapper.transform(X_new)

In [81]:
model.predict(Z_new)[0][0]

21.626862