#### Imports

In [1]:
import pandas as pd
import requests

In [2]:
features = pd.read_csv('./data/clean_features.csv')
features.head()

Unnamed: 0,token_id,1/1,4:20 watch,background,beard,chain,eyes,hat over headphones,hat under headphones,headphones,long hair,mouth,shirt,short hair,smoke,type
0,1,0,sub red,red,0,0,nerd glasses,0,bandana dark gray,white headphones,0,smile,collared shirt blue,0,0,charcoal mfer
1,2,0,sub bat (blue/black),orange,0,0,regular eyes,0,headband green/white,pink headphones,long hair black,smile,0,0,cig black,charcoal mfer
2,3,0,sub red,graveyard,full beard,0,zombie eyes,0,beanie,white headphones,0,smile,0,0,cig black,zombie mfer
3,4,0,sub lantern (green),green,0,0,regular eyes,0,0,white headphones,0,smile,0,mohawk blue,cig white,plain mfer
4,5,0,0,orange,0,0,shades,0,bandana dark gray,pink headphones,long hair black,smile,0,0,cig white,plain mfer


#### Scraping Data on Features of Collection

In [3]:
#setting up requests

collection_slug = 'mfers'
url = f'https://api.opensea.io/api/v1/collection/{collection_slug}'

res = requests.get(url)
res.status_code

200

In [4]:
traits = res.json()['collection']['traits']

#### Creating Feature Rarity and Feature Rarity Scores DataFrames

In [5]:
#writing a function that takes in features df and makes rarity df (pct of tokens having features) and rarity score df ()

def feature_to_rarity(df):
    #making a copy of dataframe to transform into rarity dataframe
    rarity_df = df.copy()
    
    #transforming feature attributes to rarity (pct of nfts in collection w/that attribute)
    for col in rarity_df.columns[1:]:
        rarity_df[col] = (rarity_df[col].map(traits[col]))/10_000
        rarity_df = rarity_df.fillna(0)
        
    #making a copy of rarity_df to transform into rarity score df
    score_df = rarity_df.copy()
    
    #transforming rarity_df to rarity score
    for col in score_df.columns[1:]:
        score_df[col] = score_df[col].map(lambda x: (1/x) if (x > 0) else 0)
        score_df = score_df.fillna(0)
        
    #aggregating rarity scores as total rarity score column
    score_df['rarity_score'] = score_df[score_df.columns[1:]].sum(axis=1)
    
    return rarity_df, score_df
        

In [6]:
rarities, scores = feature_to_rarity(features)

In [7]:
rarities.head()

Unnamed: 0,token_id,1/1,4:20 watch,background,beard,chain,eyes,hat over headphones,hat under headphones,headphones,long hair,mouth,shirt,short hair,smoke,type
0,1,0.0,0.1459,0.1846,0.0,0.0,0.0613,0.0,0.0319,0.2421,0.0,0.7936,0.0565,0.0,0.0,0.3958
1,2,0.0,0.01,0.2023,0.0,0.0,0.7254,0.0,0.02,0.104,0.1839,0.7936,0.0,0.0,0.381,0.3958
2,3,0.0,0.1459,0.0089,0.1036,0.0,0.0089,0.0,0.0041,0.2421,0.0,0.7936,0.0,0.0,0.381,0.0089
3,4,0.0,0.0555,0.1974,0.0,0.0,0.7254,0.0,0.0,0.2421,0.0,0.7936,0.0,0.0224,0.3735,0.5916
4,5,0.0,0.0,0.2023,0.0,0.0,0.0526,0.0,0.0319,0.104,0.1839,0.7936,0.0,0.0,0.3735,0.5916


In [8]:
scores.head()

Unnamed: 0,token_id,1/1,4:20 watch,background,beard,chain,eyes,hat over headphones,hat under headphones,headphones,long hair,mouth,shirt,short hair,smoke,type,rarity_score
0,1,0.0,6.85401,5.417118,0.0,0.0,16.313214,0.0,31.347962,4.130525,0.0,1.260081,17.699115,0.0,0.0,2.526529,85.548553
1,2,0.0,100.0,4.943154,0.0,0.0,1.37855,0.0,50.0,9.615385,5.437738,1.260081,0.0,0.0,2.624672,2.526529,177.786107
2,3,0.0,6.85401,112.359551,9.65251,0.0,112.359551,0.0,243.902439,4.130525,0.0,1.260081,0.0,0.0,2.624672,112.359551,605.502887
3,4,0.0,18.018018,5.065856,0.0,0.0,1.37855,0.0,0.0,4.130525,0.0,1.260081,0.0,44.642857,2.677376,1.690331,78.863594
4,5,0.0,0.0,4.943154,0.0,0.0,19.011407,0.0,31.347962,9.615385,5.437738,1.260081,0.0,0.0,2.677376,1.690331,75.983434


In [9]:
old_cols = list(rarities.columns)
new_cols = [
    "token_id",
    "1of1_rarity",
    "watch_rarity",
    "background_rarity",
    "beard_rarity",
    "chain_rarity",
    "eyes_rarity",
    "hat_over_headphones_rarity",
    "hat_under_headphones_rarity",
    "headphones_rarity",
    "long_hair_rarity",
    "mouth_rarity",
    "shirt_rarity",
    "short_hair_rarity",
    "smoke_rarity",
    "type_rarity"
]
names_dict = dict(zip(old_cols, new_cols))
rarities = rarities.rename(columns=names_dict)

In [10]:
len(list(scores.columns))

17

In [11]:
old_cols = list(scores.columns)
new_cols = [
    "token_id",
    "1of1_score",
    "watch_score",
    "background_score",
    "beard_score",
    "chain_score",
    "eyes_score",
    "hat_over_headphones_score",
    "hat_under_headphones_score",
    "headphones_score",
    "long_hair_score",
    "mouth_score",
    "shirt_score",
    "short_hair_score",
    "smoke_score",
    "type_score",
    'rarity_score'
]
names_dict = dict(zip(old_cols, new_cols))
scores = scores.rename(columns=names_dict)

In [12]:
rarities.to_csv('./data/feature_rarity.csv', index=False)
scores.to_csv('./data/feature_scores.csv', index=False)

-------

#### Scraping Collection Trading Stats as Potential Features

In [13]:
#setting up requests

collection_slug = 'mfers'
url = f'https://api.opensea.io/api/v1/collection/{collection_slug}'

res = requests.get(url)
res.status_code

200

In [14]:
stats_dict = res.json()['collection']['stats']

In [15]:
stats = pd.Series(stats_dict)
stats

one_day_volume                262.080800
one_day_change                 -0.012042
one_day_sales                  81.000000
one_day_average_price           3.235565
seven_day_volume             4460.061519
seven_day_change               -0.436667
seven_day_sales              1082.000000
seven_day_average_price         4.122053
thirty_day_volume           19705.088064
thirty_day_change              15.286376
thirty_day_sales             7848.000000
thirty_day_average_price        2.510842
total_volume                23444.929392
total_sales                 20434.000000
total_supply                10020.000000
count                       10020.000000
num_owners                   5072.000000
average_price                   1.147349
num_reports                     7.000000
market_cap                  41302.972660
floor_price                     2.790000
dtype: float64

In [16]:
stats.to_csv('./data/stats.csv')

In [17]:
#to read csv in as a series:
stats = pd.read_csv('./data/stats.csv', index_col=0, squeeze=True)

In [18]:
stats['one_day_volume']

262.08079999999995

-------

#### Merging Data for Modeling

In [19]:
sales = pd.read_csv('./data/clean_sales_data.csv')
sales = sales.rename(columns={'nft':'token_id'})
sales.head(3)

Unnamed: 0,date,token_id,payment,buyer,seller,price,priceUsd
0,2022-02-19 05:46:20+00:00,37,ETH,0x6365398036d4ef045254fce2e525ac59760b186b,0xe6fe14a30631b22d3f78d9ec7869eb62d6e73daa,3.3,8370.462853
1,2022-02-24 02:15:15+00:00,37,ETH,0xcb4652bb0396880c3b2609d5a37e19df73180ede,0x6365398036d4ef045254fce2e525ac59760b186b,3.75,9653.748962
2,2022-02-07 06:38:47+00:00,69,ETH,0xc441eb1f41b06826db885e81486b086294a925fd,0xdb6cfff6c0955676bfa8ce795d5beffbab793298,6.9,17423.477522


In [20]:
features.head(3)

Unnamed: 0,token_id,1/1,4:20 watch,background,beard,chain,eyes,hat over headphones,hat under headphones,headphones,long hair,mouth,shirt,short hair,smoke,type
0,1,0,sub red,red,0,0,nerd glasses,0,bandana dark gray,white headphones,0,smile,collared shirt blue,0,0,charcoal mfer
1,2,0,sub bat (blue/black),orange,0,0,regular eyes,0,headband green/white,pink headphones,long hair black,smile,0,0,cig black,charcoal mfer
2,3,0,sub red,graveyard,full beard,0,zombie eyes,0,beanie,white headphones,0,smile,0,0,cig black,zombie mfer


In [21]:
rarities.head(3)

Unnamed: 0,token_id,1of1_rarity,watch_rarity,background_rarity,beard_rarity,chain_rarity,eyes_rarity,hat_over_headphones_rarity,hat_under_headphones_rarity,headphones_rarity,long_hair_rarity,mouth_rarity,shirt_rarity,short_hair_rarity,smoke_rarity,type_rarity
0,1,0.0,0.1459,0.1846,0.0,0.0,0.0613,0.0,0.0319,0.2421,0.0,0.7936,0.0565,0.0,0.0,0.3958
1,2,0.0,0.01,0.2023,0.0,0.0,0.7254,0.0,0.02,0.104,0.1839,0.7936,0.0,0.0,0.381,0.3958
2,3,0.0,0.1459,0.0089,0.1036,0.0,0.0089,0.0,0.0041,0.2421,0.0,0.7936,0.0,0.0,0.381,0.0089


In [22]:
scores.head(3)

Unnamed: 0,token_id,1of1_score,watch_score,background_score,beard_score,chain_score,eyes_score,hat_over_headphones_score,hat_under_headphones_score,headphones_score,long_hair_score,mouth_score,shirt_score,short_hair_score,smoke_score,type_score,rarity_score
0,1,0.0,6.85401,5.417118,0.0,0.0,16.313214,0.0,31.347962,4.130525,0.0,1.260081,17.699115,0.0,0.0,2.526529,85.548553
1,2,0.0,100.0,4.943154,0.0,0.0,1.37855,0.0,50.0,9.615385,5.437738,1.260081,0.0,0.0,2.624672,2.526529,177.786107
2,3,0.0,6.85401,112.359551,9.65251,0.0,112.359551,0.0,243.902439,4.130525,0.0,1.260081,0.0,0.0,2.624672,112.359551,605.502887


In [23]:
#there's only transaction data available for 8983 of 10000 tokens
len(sales.token_id.unique())

8983

In [24]:
sales.shape

(19784, 7)

In [25]:
#merging all numeric data on 'token_id'
df = sales.merge(rarities, how='inner', on='token_id').merge(scores, how='inner', on='token_id')

#merging all data on 'token_id'
df2 = sales.merge(features, how='inner', on='token_id').merge(rarities, how='inner', on='token_id').merge(scores, how='inner', on='token_id')

In [26]:
#checking for clean merge
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 19784 entries, 0 to 19783
Data columns (total 38 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   date                         19784 non-null  object 
 1   token_id                     19784 non-null  int64  
 2   payment                      19784 non-null  object 
 3   buyer                        19784 non-null  object 
 4   seller                       19784 non-null  object 
 5   price                        19784 non-null  float64
 6   priceUsd                     19784 non-null  float64
 7   1of1_rarity                  19784 non-null  float64
 8   watch_rarity                 19784 non-null  float64
 9   background_rarity            19784 non-null  float64
 10  beard_rarity                 19784 non-null  float64
 11  chain_rarity                 19784 non-null  float64
 12  eyes_rarity                  19784 non-null  float64
 13  hat_over_headpho

In [27]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 19784 entries, 0 to 19783
Data columns (total 53 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   date                         19784 non-null  object 
 1   token_id                     19784 non-null  int64  
 2   payment                      19784 non-null  object 
 3   buyer                        19784 non-null  object 
 4   seller                       19784 non-null  object 
 5   price                        19784 non-null  float64
 6   priceUsd                     19784 non-null  float64
 7   1/1                          19784 non-null  object 
 8   4:20 watch                   19784 non-null  object 
 9   background                   19784 non-null  object 
 10  beard                        19784 non-null  object 
 11  chain                        19784 non-null  object 
 12  eyes                         19784 non-null  object 
 13  hat over headpho

In [28]:
df.to_csv('./data/nft_numeric.csv', index=False)
df2.to_csv('./data/nft_all.csv', index=False)