# Capstone: NFTs
## Part VI: Scratch

### 1. Imports

Resources:
- https://moscow25.medium.com/predicting-cryptopunk-prices-the-case-for-jpegs-e4fc0f0fafd1
- https://medium.com/geekculture/cosine-similarity-and-cosine-distance-48eed889a5c4
- https://raritytools.medium.com/ranking-rarity-understanding-rarity-calculation-methods-86ceaeb9b98c
- https://goodboychan.github.io/python/datacamp/natural_language_processing/2020/07/17/04-TF-IDF-and-similarity-scores.html
- citation: https://stackoverflow.com/questions/1894269/how-to-convert-string-representation-of-list-to-a-list

### 3. Get CryptoPunk Owners' Accounts

In [None]:
I will web scrap from Larva Labs to get each CryptoPunk's Owner (account #):|

In [None]:
# citation: https://www.larvalabs.com/cryptopunks/leaderboard

url = 'https://www.larvalabs.com/cryptopunks/leaderboard#'
response = requests.get(url)

response.text

soup = BeautifulSoup(response.content)
soup

In [None]:
# result set where class is equal to pinklink
small_a = soup.find_all('a', class_="pinklink")

# list comprehension to get links of all small_a
links = [link.attrs['href'] for link in small_a if link in small_a]

# empty accounts list
accounts = []

# get account number from link
for link in links:
    accounts.append(link[33:])

### 4. Get Each NFT Sale & Picture Data



I will use Open Sea's API to grab each CryptoPunk's Sale and Picture URL:

In [None]:
# there are approx. 3200 crypto punk owners at the time this data was collected
print(len(accounts))

# I will create 10 sections of the accounts list
print(3194/20)

In [None]:
# citation: https://docs.opensea.io/reference/getting-assets

all_accounts_info = pd.DataFrame()

counter = 0

for account in accounts:
    counter += 1
    
    url = "https://api.opensea.io/api/v1/assets?owner=" + account + "&order_direction=desc&offset=0&limit=20&collection=cryptopunks"
    
    # requests
    res = requests.get(url)

    # convert the request into a list of dict objects
    data = res.json()
    
    # convert to dataframe
    df = pd.DataFrame(data)

    # concat
    all_accounts_info = pd.concat([all_accounts_info, df])
        
    # reset index to correctly order indices
    all_accounts_info = all_accounts_info.reset_index().drop(columns = ['index'])
    
    # add wait time b/c can only call 20 at a time - seconds in parentheses 
    if counter % 20 == 0:
        time.sleep(2)

In [None]:
all_accounts_info

In [None]:
# create empty dataframe 
punk_info_df = pd.DataFrame()

# create dictionary for each nft
for i in range(all_accounts_info.shape[0]):
    nft_dict = {key:value for key, value in all_accounts_info.iloc[i][0].items() if ('token_id' == key) | ('num_sales' == key) | ('image_original_url' == key) | ('last_sale' == key)}
    
    # if empty sale set value to None, otherwise get price
    if nft_dict['last_sale'] is None: 
        total_price = None
        usd_price = None
        event_timestamp = None
    else: 
        # gets rid of trailing 0's
        total_price = str(nft_dict['last_sale']['total_price']).strip("0")
        # rounds price
        usd_price = np.round(float(nft_dict['last_sale']['payment_token']['usd_price']), 4)
        event_timestamp = nft_dict['last_sale']['event_timestamp']
        
    # print(nft_dict)
    
    # remake nft dictionary with only certain values
    nft_dict = {'token_id': nft_dict['token_id'],
                'num_sales': nft_dict['num_sales'],
                'image_original_url': nft_dict['image_original_url'],
                'event_timestamp': event_timestamp,
                'total_price': total_price,
                'usd_price': usd_price
               }
    
    # empty nft list
    nft_list = []
    
    # convert nft dic to list
    for i in nft_dict.values():
        nft_list.append(i)
        
    # print(nft_list)
    
    # add nft info to dataframe
    punk_info_df = punk_info_df.append(nft_dict, ignore_index=True)

### 5. Create Plot with Range Slider

In [None]:
#time series with range slider

punk_1_sold = punk_1_trans[punk_1_trans['trans'] == 'Sold']
punk_1_bid = punk_1_trans[punk_1_trans['trans'] == 'Bid']
punk_1_offered = punk_1_trans[punk_1_trans['trans'] == 'Offered']

fig.add_trace(go.Scatter(
    x=punk_1_offered['date'],
    y=punk_1_offered['usd'],
    xperiod="M1",
    xperiodalignment="middle",
    hovertemplate="Price: $%{y}k<br>Date: %{x}"
    # hover_data={"date": "|%B %d, %Y"}
))

fig.update_xaxes(rangeslider_visible=True,
                rangeselector=dict(
                    buttons=list([dict(count=1, label="1 month", step="month", stepmode="backward"),
                                  dict(count=6, label="6 months", step="month", stepmode="backward"),
                                  # dict(count=1, label="Year to Date", step="year", stepmode="todate"),
                                  dict(count=1, label="1 Year", step="year", stepmode="backward"),
                                  dict(label= 'All', step="all")])))
fig.update_yaxes(autorange="reversed")
fig.update_layout(xaxis_range=['2017-06-23','2021-11-23'])

### 6. Get Average and Max Sales, Bids, and Offers

Average Price:

In [None]:
df_sale = df[df['trans'] == 'Sold']
df_sale = df_sale[['punk_id', 'usd']]

df_avg_sale = df_sale.groupby("punk_id")[['usd']].mean()
df_avg_sale.reset_index(inplace = True)
df_avg_sale.rename(columns = {'usd':'avg_usd_sale'}, inplace = True)

Max Sale:

In [None]:
df_max_sale = df_sale.groupby("punk_id")[['usd']].max()
df_max_sale.reset_index(inplace = True)
df_max_sale.rename(columns = {'usd':'max_usd_sale'}, inplace = True)

Average Bid:

In [None]:
df_bid = df[df['trans'] == 'Bid']
df_bid = df_bid[['punk_id', 'usd']]

df_avg_bid = df_bid.groupby("punk_id")[['usd']].mean()
df_avg_bid.reset_index(inplace = True)
df_avg_bid.rename(columns = {'usd':'avg_usd_bid'}, inplace = True)

Max Bid:

In [None]:
df_max_bid = df_bid.groupby("punk_id")[['usd']].max()
df_max_bid.reset_index(inplace = True)
df_max_bid.rename(columns = {'usd':'max_usd_bid'}, inplace = True)

Average Offer:

In [None]:
df_offer = df[df['trans'] == 'Offered']
df_offer = df_offer[['punk_id', 'usd']]

df_avg_offer = df_offer.groupby("punk_id")[['usd']].mean()
df_avg_offer.reset_index(inplace = True)
df_avg_offer.rename(columns = {'usd':'avg_usd_offer'}, inplace = True)

Max Offer:

In [None]:
df_max_offer = df_offer.groupby("punk_id")[['usd']].max()
df_max_offer.reset_index(inplace = True)
df_max_offer.rename(columns = {'usd':'max_usd_offer'}, inplace = True)

In [None]:
# adapted from: https://goodboychan.github.io/python/datacamp/natural_language_processing/2020/07/17/04-TF-IDF-and-similarity-scores.html

def get_recommendations(punk_id):
    
    # check if punk is female or male to ensure the normal types are grouped together,
    # as the normal types sell for far lower than the special types
    if (new_indiv['type'].iloc[punk_id] == 'female') | (new_indiv['type'].iloc[punk_id] == 'male'):
        print("normal")
        indices = pd.Series(normal_types_indiv.index, index=normal_types_indiv['punk_id'])
    else:
        indices = pd.Series(special_types_indiv.index, index=special_types_indiv['punk_id'])
        print("special")
        
    # get accessories list column
    acc_list = special_types_indiv['clean_accessories']
        
    # instantiate tfidf
    tfidf = TfidfVectorizer()

    # construct the TF-IDF matrix
    tfidf_matrix = tfidf.fit_transform(acc_list)

    # generate the cosine similarity matrix
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
    
    # get the index of the punk that matches the punk_id
    idx = indices[punk_id]
    
    # get the pairwsie similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # sort the punks based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
   
    # get the scores for 10 most similar punks
    sim_scores = sim_scores[1:11]
    
    # get the punk indices
    punk_indices = [i[0] for i in sim_scores]
    
    # return the top 10 most similar punks
    # return new_rarity['punk_id'].iloc[punk_indices]
    
    return sim_scores

In [None]:
# adapted from: https://goodboychan.github.io/python/datacamp/natural_language_processing/2020/07/17/04-TF-IDF-and-similarity-scores.html

def get_recommendations(punk_id):
    
    # get indices
    indices = pd.Series(new_indiv.index, index=new_indiv['punk_id'])
        
    # get accessories list column
    acc_list = new_indiv['clean_accessories']
        
    # instantiate tfidf
    tfidf = TfidfVectorizer()

    # construct the TF-IDF matrix
    tfidf_matrix = tfidf.fit_transform(acc_list)

    # generate the cosine similarity matrix
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
    
    # get the index of the punk that matches the punk_id
    idx = indices[punk_id]
    
    # get the pairwsie similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # sort the punks based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # get the punk indices
    punk_indices = [i[0] for i in sim_scores]
    
    # empty list to add to
    type_sim_scores = []
   
    # if normal type only want to return normal types
    if (new_indiv['type'].iloc[punk_id] == 'female') | (new_indiv['type'].iloc[punk_id] == 'male'):
        # get the scores for 10 most similar punks
        for count, value_id in enumerate(punk_indices):
            if (new_indiv['type'].iloc[value_id] == 'female') | (new_indiv['type'].iloc[value_id] == 'male'):
                type_sim_scores.append(sim_scores[value_id])
    # if special type only want to return special types
    else:
        for count, value_id in enumerate(punk_indices):
            if (new_indiv['type'].iloc[value_id] == 'ape') | (new_indiv['type'].iloc[value_id] == 'alien') | (new_indiv['type'].iloc[value_id] == 'zombie'):
                type_sim_scores.append(sim_scores[value_id])

    type_sim_scores = type_sim_scores[1:11]
    # return the top 10 most similar punks
    # return new_rarity['punk_id'].iloc[punk_indices]
    
    return type_sim_scores

Highest Sale:

In [None]:
df_max_sale = df_sale.groupby("punk_id")[['usd']].max()
df_max_sale.reset_index(inplace = True)
df_max_sale.rename(columns = {'usd':'max_usd_sale'}, inplace = True)

In [None]:
df_max_sale

In [1]:
# params = {
#     'ridge__alpha' : [.01, .1, 1, 10, 100],
# }

# ct1 = make_column_transformer(
#     (OneHotEncoder(handle_unknown = 'ignore', sparse=False), make_column_selector(dtype_include = object)), 
#     remainder = 'passthrough'
# )

# ct1

# pipe = make_pipeline(ct1,StandardScaler(),Ridge())

# gs = GridSearchCV(pipe, params, n_jobs=-1)
# gs.fit(X_train, y_train)

# ValueError: X has 19 features, but StandardScaler is expecting 15 features as input.

# gs.score(X_train, y_train)