#### Imports

In [28]:
import pandas as pd
import pickle
import numpy as np
import requests

#### Creating a DataFrame of Features for Each Token in Collection

In [2]:
#opening pickled "attrs" list

with open ('attributes.pkl', 'rb') as handle:
    features_list = pickle.load(handle)

In [3]:
#checking for expected deserialization
features_list[0]

{'name': 'mfer #1',
 'description': 'mfers by sartoshi',
 'attributes': [{'key': 'background', 'value': 'red'},
  {'key': 'type', 'value': 'charcoal mfer'},
  {'key': 'eyes', 'value': 'nerd glasses'},
  {'key': 'mouth', 'value': 'smile'},
  {'key': 'headphones', 'value': 'white headphones'},
  {'key': '4:20 watch', 'value': 'sub red'},
  {'key': 'hat under headphones', 'value': 'bandana dark gray'},
  {'key': 'shirt', 'value': 'collared shirt blue'}],
 'content': [{'@type': 'IMAGE',
   'url': 'https://rarible.mypinata.cloud/ipfs/QmWmgfYhDWjzVheQyV2TnpVXYnKR25oLWCB2i9JeBxsJbz',
   'representation': 'ORIGINAL',
   'mimeType': 'image/png',
   'width': 1000,
   'height': 1000}],
 'restrictions': []}

In [4]:
#making a dataframe of 10_000 entries, column names being each feature category

column_names = ['token_id','1/1','4:20 watch','background','beard','chain','eyes','hat over headphones','hat under headphones','headphones','long hair','mouth','shirt','short hair','smoke','type']
features_df = pd.DataFrame(columns=column_names, index=range(10_000))

In [5]:
#iteratively populating dataframe

for n in range(len(features_list)):
    features_df.token_id[n] = features_list[n]['name'].split('#')[-1]
    for d in features_list[n]['attributes']:
        for col in features_df.columns:
            if d['key'] == col:
                features_df[col][n] = d['value']
            #else:
                #features_df[col][n] = 0

In [6]:
features_df.head(3)

Unnamed: 0,token_id,1/1,4:20 watch,background,beard,chain,eyes,hat over headphones,hat under headphones,headphones,long hair,mouth,shirt,short hair,smoke,type
0,1,,sub red,red,,,nerd glasses,,bandana dark gray,white headphones,,smile,collared shirt blue,,,charcoal mfer
1,2,,sub bat (blue/black),orange,,,regular eyes,,headband green/white,pink headphones,long hair black,smile,,,cig black,charcoal mfer
2,3,,sub red,graveyard,full beard,,zombie eyes,,beanie,white headphones,,smile,,,cig black,zombie mfer


In [7]:
features_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 16 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   token_id              10000 non-null  object
 1   1/1                   21 non-null     object
 2   4:20 watch            7612 non-null   object
 3   background            9966 non-null   object
 4   beard                 1032 non-null   object
 5   chain                 487 non-null    object
 6   eyes                  9966 non-null   object
 7   hat over headphones   464 non-null    object
 8   hat under headphones  2949 non-null   object
 9   headphones            9909 non-null   object
 10  long hair             2580 non-null   object
 11  mouth                 9966 non-null   object
 12  shirt                 4401 non-null   object
 13  short hair            1815 non-null   object
 14  smoke                 8262 non-null   object
 15  type                  9966 non-null  

In [8]:
features_df.loc[features_df['1/1']!=0][:20]

Unnamed: 0,token_id,1/1,4:20 watch,background,beard,chain,eyes,hat over headphones,hat under headphones,headphones,long hair,mouth,shirt,short hair,smoke,type
0,1,,sub red,red,,,nerd glasses,,bandana dark gray,white headphones,,smile,collared shirt blue,,,charcoal mfer
1,2,,sub bat (blue/black),orange,,,regular eyes,,headband green/white,pink headphones,long hair black,smile,,,cig black,charcoal mfer
2,3,,sub red,graveyard,full beard,,zombie eyes,,beanie,white headphones,,smile,,,cig black,zombie mfer
3,4,,sub lantern (green),green,,,regular eyes,,,white headphones,,smile,,mohawk blue,cig white,plain mfer
4,5,,,orange,,,shades,,bandana dark gray,pink headphones,long hair black,smile,,,cig white,plain mfer
5,6,,argo black,orange,,,vr,,,black headphones,,smile,hoodie down gray,messy red,cig white,plain mfer
6,7,,oyster silver,yellow,,,regular eyes,,,white headphones,,smile,,mohawk pink,cig black,charcoal mfer
7,8,,sub lantern (green),orange,,,regular eyes,,,pink headphones,long hair black,smile,,,cig black,plain mfer
8,9,,sub black,orange,,,regular eyes,,,black headphones,,smile,collared shirt white,mohawk green,cig white,charcoal mfer
9,10,,oyster silver,red,full beard,,regular eyes,,,white headphones,,smile,hoodie down gray,,,plain mfer


In [9]:
#manually inputting missing data
new_vals={
    'token_id':646,
    '1/1':0,
    '4:20 watch':'sub blue',
    'background':'orange',
    'beard':0,
    'chain':0,
    'eyes':'regular eyes',
    'hat over headphones':'pilot helmet',
    'hat under headphones':0,
    'headphones':0,
    'long hair':'long hair yellow',
    'mouth':'flat',
    'shirt':0,
    'short hair':0,
    'smoke':'cig black',
    'type':'plain mfer'
}
features_df.iloc[645] = features_df.iloc[645].fillna(value=new_vals)

In [10]:
new_vals={
    'token_id':749,
    '1/1':0,
    '4:20 watch':0,
    'background':'orange',
    'beard':'full beard',
    'chain':0,
    'eyes':'purple shades',
    'hat over headphones':0,
    'hat under headphones':0,
    'headphones':'gold headphones',
    'long hair':'long hair black',
    'mouth':'smile',
    'shirt':'hoodie down pink',
    'short hair':0,
    'smoke':'cig black',
    'type':'plain mfer'
}
features_df.iloc[748] = features_df.iloc[748].fillna(value=new_vals)

In [11]:
new_vals={
    'token_id':870,
    '1/1':0,
    '4:20 watch':0,
    'background':'red',
    'beard':'full beard',
    'chain':0,
    'eyes':'regular eyes',
    'hat over headphones':0,
    'hat under headphones':0,
    'headphones':'green headphones',
    'long hair':0,
    'mouth':'smile',
    'shirt':'hoodie down blue',
    'short hair':0,
    'smoke':'cig black',
    'type':'plain mfer'
}
features_df.iloc[869] = features_df.iloc[869].fillna(value=new_vals)

In [12]:
new_vals={
    'token_id':1087,
    '1/1':0,
    '4:20 watch':'sub blue',
    'background':'green',
    'beard':0,
    'chain':0,
    'eyes':'regular eyes',
    'hat over headphones':0,
    'hat under headphones':0,
    'headphones':'white headphones',
    'long hair':0,
    'mouth':'flat',
    'shirt':'collared shirt white',
    'short hair':'mohawk pink',
    'smoke':'cig white',
    'type':'plain mfer'
}
features_df.iloc[1086] = features_df.iloc[1086].fillna(value=new_vals)

In [13]:
new_vals={
    'token_id':1153,
    '1/1':0,
    '4:20 watch':'sub rose',
    'background':'green',
    'beard':0,
    'chain':0,
    'eyes':'regular eyes',
    'hat over headphones':0,
    'hat under headphones':'beanie',
    'headphones':'white headphones',
    'long hair':0,
    'mouth':'smile',
    'shirt':'hoodie down green',
    'short hair':0,
    'smoke':0,
    'type':'charcoal mfer'
}
features_df.iloc[1152] = features_df.iloc[1152].fillna(value=new_vals)

In [14]:
new_vals={
    'token_id':1398,
    '1/1':0,
    '4:20 watch':'sub red',
    'background':'orange',
    'beard':0,
    'chain':0,
    'eyes':'regular eyes',
    'hat over headphones':0,
    'hat under headphones':0,
    'headphones':'pink headphones',
    'long hair':'long hair black',
    'mouth':'smile',
    'shirt':0,
    'short hair':0,
    'smoke':'pipe',
    'type':'plain mfer'
}
features_df.iloc[1397] = features_df.iloc[1397].fillna(value=new_vals)

In [15]:
new_vals={
    'token_id':1483,
    '1/1':0,
    '4:20 watch':'sub black',
    'background':'yellow',
    'beard':0,
    'chain':0,
    'eyes':'regular eyes',
    'hat over headphones':0,
    'hat under headphones':'headband pink/white',
    'headphones':'white headphones',
    'long hair':'long hair black',
    'mouth':'smile',
    'shirt':'collared shirt white',
    'short hair':0,
    'smoke':'cig white',
    'type':'plain mfer'
}
features_df.iloc[1482] = features_df.iloc[1482].fillna(value=new_vals)

In [16]:
new_vals={
    'token_id':1694,
    '1/1':0,
    '4:20 watch':0,
    'background':'yellow',
    'beard':0,
    'chain':0,
    'eyes':'eye mask',
    'hat over headphones':0,
    'hat under headphones':0,
    'headphones':'white headphones',
    'long hair':0,
    'mouth':'smile',
    'shirt':'collared shirt white',
    'short hair':0,
    'smoke':'cig white',
    'type':'plain mfer'
}
features_df.iloc[1693] = features_df.iloc[1693].fillna(value=new_vals)

In [17]:
new_vals={
    'token_id':1713,
    '1/1':0,
    '4:20 watch':0,
    'background':'yellow',
    'beard':0,
    'chain':0,
    'eyes':'regular eyes',
    'hat over headphones':0,
    'hat under headphones':'knit new york',
    'headphones':'black headphones',
    'long hair':0,
    'mouth':'smile',
    'shirt':0,
    'short hair':0,
    'smoke':'cig black',
    'type':'charcoal mfer'
}
features_df.iloc[1712] = features_df.iloc[1712].fillna(value=new_vals)

In [18]:
new_vals={
    'token_id':2045,
    '1/1':0,
    '4:20 watch':0,
    'background':'yellow',
    'beard':'full beard',
    'chain':0,
    'eyes':'nerd glasses',
    'hat over headphones':0,
    'hat under headphones':'cap monochrome',
    'headphones':'black headphones',
    'long hair':0,
    'mouth':'smile',
    'shirt':0,
    'short hair':0,
    'smoke':0,
    'type':'charcoal mfer'
}
features_df.iloc[2044] = features_df.iloc[2044].fillna(value=new_vals)

In [19]:
new_vals={
    'token_id':2240,
    '1/1':0,
    '4:20 watch':0,
    'background':'green',
    'beard':0,
    'chain':0,
    'eyes':'vr',
    'hat over headphones':0,
    'hat under headphones':0,
    'headphones':'white headphones',
    'long hair':'long hair black',
    'mouth':'flat',
    'shirt':0,
    'short hair':0,
    'smoke':'cig black',
    'type':'plain mfer'
}
features_df.iloc[2239] = features_df.iloc[2239].fillna(value=new_vals)

In [20]:
new_vals={
    'token_id':2371,
    '1/1':0,
    '4:20 watch':'sub blue',
    'background':'red',
    'beard':0,
    'chain':0,
    'eyes':'regular eyes',
    'hat over headphones':0,
    'hat under headphones':0,
    'headphones':'white headphones',
    'long hair':0,
    'mouth':'smile',
    'shirt':0,
    'short hair':0,
    'smoke':'cig white',
    'type':'charcoal mfer'
}
features_df.iloc[2370] = features_df.iloc[2370].fillna(value=new_vals)

In [21]:
new_vals={
    'token_id':2551,
    '1/1':0,
    '4:20 watch':'sub red',
    'background':'blue',
    'beard':0,
    'chain':0,
    'eyes':'regular eyes',
    'hat over headphones':0,
    'hat under headphones':'bandana dark gray',
    'headphones':'black headphones',
    'long hair':0,
    'mouth':'smile',
    'shirt':0,
    'short hair':0,
    'smoke':'cig white',
    'type':'charcoal mfer'
}
features_df.iloc[2550] = features_df.iloc[2550].fillna(value=new_vals)

In [22]:
features_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 16 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   token_id              10000 non-null  object
 1   1/1                   34 non-null     object
 2   4:20 watch            7625 non-null   object
 3   background            9979 non-null   object
 4   beard                 1045 non-null   object
 5   chain                 500 non-null    object
 6   eyes                  9979 non-null   object
 7   hat over headphones   477 non-null    object
 8   hat under headphones  2962 non-null   object
 9   headphones            9922 non-null   object
 10  long hair             2593 non-null   object
 11  mouth                 9979 non-null   object
 12  shirt                 4414 non-null   object
 13  short hair            1828 non-null   object
 14  smoke                 8275 non-null   object
 15  type                  9979 non-null  

In [23]:
features_df = features_df.fillna(0)

In [24]:
features_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 16 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   token_id              10000 non-null  object
 1   1/1                   10000 non-null  object
 2   4:20 watch            10000 non-null  object
 3   background            10000 non-null  object
 4   beard                 10000 non-null  object
 5   chain                 10000 non-null  object
 6   eyes                  10000 non-null  object
 7   hat over headphones   10000 non-null  object
 8   hat under headphones  10000 non-null  object
 9   headphones            10000 non-null  object
 10  long hair             10000 non-null  object
 11  mouth                 10000 non-null  object
 12  shirt                 10000 non-null  object
 13  short hair            10000 non-null  object
 14  smoke                 10000 non-null  object
 15  type                  10000 non-null 

In [26]:
features_df.to_csv('./data/clean_features.csv', index=False)

----