# Clustering Crypto

In [52]:
# Initial imports
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import hvplot.pandas
import plotly.express as px
from bokeh.models.formatters import NumeralTickFormatter
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import json

### Fetching Cryptocurrency Data

In [2]:
# Use the following endpoint to fetch json data
url = "https://min-api.cryptocompare.com/data/all/coinlist"
crypto_response = requests.get(url)
crypto_json = crypto_response.json()
crypto_data = crypto_json["Data"]

In [3]:
# Create a DataFrame 
# HINT: You will need to use the 'Data' key from the json response, then transpose the DataFrame.
crypto_df = pd.DataFrame(crypto_data).T
crypto_df.head()
crypto_df.to_csv('crypto_df.csv')

### Data Preprocessing

In [4]:
# Keep only necessary columns:
# 'CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply'
crypto_df = crypto_df[[
                'CoinName',
                'Algorithm',
                'IsTrading',
                'ProofType',
                'TotalCoinsMined', 
                'MaxSupply'
            ]]
crypto_df

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,True,PoW/PoS,0,0
300,300 token,,True,,300,300
365,365Coin,X11,True,PoW/PoS,0,0
404,404Coin,Scrypt,True,PoW/PoS,0,0
433,433 Token,,False,,,
...,...,...,...,...,...,...
ARK,ARK,DPoS,True,DPoS,1.55309e+08,-1
ARDR,Ardor,,True,PoS,998999495,998999495
AION,Aion,"Equihash210,9",True,PoW/PoS,487496874,-1
MYST,Mysterium,,True,,0,0


In [5]:
# Keep only cryptocurrencies that are trading
crypto_df = crypto_df[crypto_df["IsTrading"] == True]
crypto_df

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,True,PoW/PoS,0,0
300,300 token,,True,,300,300
365,365Coin,X11,True,PoW/PoS,0,0
404,404Coin,Scrypt,True,PoW/PoS,0,0
611,SixEleven,SHA-256,True,PoW,0,0
...,...,...,...,...,...,...
ARK,ARK,DPoS,True,DPoS,1.55309e+08,-1
ARDR,Ardor,,True,PoS,998999495,998999495
AION,Aion,"Equihash210,9",True,PoW/PoS,487496874,-1
MYST,Mysterium,,True,,0,0


In [6]:
# Keep only cryptocurrencies with a working algorithm
crypto_df = crypto_df[crypto_df.Algorithm != "N/A"]
crypto_df

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,True,PoW/PoS,0,0
365,365Coin,X11,True,PoW/PoS,0,0
404,404Coin,Scrypt,True,PoW/PoS,0,0
611,SixEleven,SHA-256,True,PoW,0,0
808,808,SHA-256,True,PoW/PoS,0,0
...,...,...,...,...,...,...
GENSTAKE,Genstake,Scrypt,True,PoW/PoS,,
BTC,Bitcoin,SHA-256,True,PoW,18627043,2.1e+07
BNB,Binance Coin,BEP2 Token,True,PoSA,1.70534e+08,1.74154e+08
ARK,ARK,DPoS,True,DPoS,1.55309e+08,-1


In [7]:
# Remove the 'IsTrading' column
crypto_df.drop(columns="IsTrading", inplace=True)
crypto_df

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,PoW/PoS,0,0
365,365Coin,X11,PoW/PoS,0,0
404,404Coin,Scrypt,PoW/PoS,0,0
611,SixEleven,SHA-256,PoW,0,0
808,808,SHA-256,PoW/PoS,0,0
...,...,...,...,...,...
GENSTAKE,Genstake,Scrypt,PoW/PoS,,
BTC,Bitcoin,SHA-256,PoW,18627043,2.1e+07
BNB,Binance Coin,BEP2 Token,PoSA,1.70534e+08,1.74154e+08
ARK,ARK,DPoS,DPoS,1.55309e+08,-1


In [8]:
# Remove rows with at least 1 null value
crypto_df.replace(["N/A", 0], np.nan, inplace=True)
crypto_df.dropna(how="any", inplace=True)
crypto_df = crypto_df.sort_index()
crypto_df

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,MaxSupply
AAC,Acute Angle Cloud,ECC 256K1,DPOS,1.000000e+09,-1.000000e+00
ADA,Cardano,Ouroboros,PoS,3.190692e+10,4.500000e+10
AEON,AEON,CryptoNight-Lite,PoW,1.777472e+07,-1.000000e+00
AION,Aion,"Equihash210,9",PoW/PoS,4.874969e+08,-1.000000e+00
AMB,Amber,Dagger,PoA,6.649843e+08,-1.000000e+00
...,...,...,...,...,...
XVG,Verge,Multiple,PoW,1.642954e+10,1.655500e+10
XWC,WhiteCoin,Scrypt,PoW/PoS,9.106302e+08,1.000000e+09
ZEC,ZCash,Equihash,PoW,1.078766e+07,2.100000e+07
ZEL,Zelcash,Equihash,PoW/PoS,1.215844e+08,2.100000e+08


In [9]:
# Store the 'CoinName'column in its own DataFrame prior to dropping it from crypto_df 
coin_names = crypto_df.CoinName
coin_names

AAC     Acute Angle Cloud
ADA               Cardano
AEON                 AEON
AION                 Aion
AMB                 Amber
              ...        
XVG                 Verge
XWC             WhiteCoin
ZEC                 ZCash
ZEL               Zelcash
ZEN               Horizen
Name: CoinName, Length: 109, dtype: object

In [10]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm
crypto_df.drop(columns="CoinName", inplace=True)
crypto_df

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,MaxSupply
AAC,ECC 256K1,DPOS,1.000000e+09,-1.000000e+00
ADA,Ouroboros,PoS,3.190692e+10,4.500000e+10
AEON,CryptoNight-Lite,PoW,1.777472e+07,-1.000000e+00
AION,"Equihash210,9",PoW/PoS,4.874969e+08,-1.000000e+00
AMB,Dagger,PoA,6.649843e+08,-1.000000e+00
...,...,...,...,...
XVG,Multiple,PoW,1.642954e+10,1.655500e+10
XWC,Scrypt,PoW/PoS,9.106302e+08,1.000000e+09
ZEC,Equihash,PoW,1.078766e+07,2.100000e+07
ZEL,Equihash,PoW/PoS,1.215844e+08,2.100000e+08


In [11]:
# Create dummy variables for text features
dummies = pd.get_dummies(crypto_df[["Algorithm", "ProofType"]])
crypto_features_df = pd.concat([dummies, crypto_df.TotalCoinsMined], axis=1)
crypto_features_df

Unnamed: 0,Algorithm_BEP2 Token,Algorithm_BLAKE256,Algorithm_BMW512 / Echo512,Algorithm_Blake2B + SHA3,Algorithm_Blake2b,Algorithm_C31,Algorithm_CryptoNight,Algorithm_CryptoNight-Heavy,Algorithm_CryptoNight-Lite,Algorithm_DPoS,...,ProofType_PoW/PoS,ProofType_PoW/PoSe,ProofType_PoW/nPoS,ProofType_Proof of Authority,ProofType_SPoS,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW,ProofType_mPoW,TotalCoinsMined
AAC,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1.000000e+09
ADA,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3.190692e+10
AEON,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1.777472e+07
AION,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,4.874969e+08
AMB,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6.649843e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
XVG,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1.642954e+10
XWC,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,9.106302e+08
ZEC,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1.078766e+07
ZEL,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,1.215844e+08


In [12]:
# Standardize data
crypto_scaled = StandardScaler().fit_transform(crypto_features_df)
print(crypto_scaled[0:2])

[[-0.09622504 -0.13671719 -0.09622504 -0.09622504 -0.13671719 -0.13671719
  -0.13671719 -0.09622504 -0.09622504 -0.2192645  -0.13671719 -0.09622504
  10.39230485 -0.09622504 -0.3        -0.09622504 -0.2192645  -0.09622504
  -0.09622504 -0.09622504 -0.09622504 -0.09622504 -0.09622504 -0.09622504
  -0.13671719 -0.09622504 -0.09622504 -0.13671719 -0.13671719 -0.09622504
  -0.16823165 -0.13671719 -0.35172623 -0.13671719 -0.09622504 -0.09622504
  -0.09622504 -0.42986348 -0.09622504 -0.09622504 -0.09622504 -0.09622504
  -0.09622504 -0.19518001 -0.09622504 -0.19518001 -0.09622504 -0.09622504
  -0.09622504 10.39230485 -0.2413554  -0.09622504 -0.09622504 -0.13671719
  -0.13671719 -0.09622504 -0.28143902 -0.09622504 -0.09622504 -1.00921678
  -0.48850421 -0.09622504 -0.09622504 -0.09622504 -0.09622504 -0.09622504
  -0.09622504 -0.09622504 -0.09622504 -0.17502319]
 [-0.09622504 -0.13671719 -0.09622504 -0.09622504 -0.13671719 -0.13671719
  -0.13671719 -0.09622504 -0.09622504 -0.2192645  -0.13671719

### Reducing Dimensions Using PCA

In [13]:
# Use PCA to reduce dimensions to 3 principal components
pca = PCA(n_components=3)
crypto_pca = pca.fit_transform(crypto_scaled)

In [14]:
# Create a DataFrame with the principal components data
df_crypto_pca = pd.DataFrame(
    data=crypto_pca, 
    columns=["PC 1", "PC 2", "PC 3"]
)
df_crypto_pca.index = crypto_df.index
df_crypto_pca

Unnamed: 0,PC 1,PC 2,PC 3
AAC,1.748882,-1.030330,2.996862
ADA,1.895165,-0.106950,-0.924198
AEON,-1.629064,0.115888,0.156843
AION,1.648128,-1.544495,-1.556438
AMB,1.829419,-0.374583,5.803898
...,...,...,...
XVG,-1.635794,0.206757,0.155187
XWC,1.011248,-1.213101,-1.247386
ZEC,-1.343103,-0.038467,0.050878
ZEL,0.560359,-0.936375,-0.816088


### Clustering Crytocurrencies Using K-Means

#### Finding the Best Value for `k` Using the Elbow Curve

In [15]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(df_crypto_pca)
    inertia.append(km.inertia_)

# Create the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(
    x="k", 
    y="inertia", 
    xticks=k, 
    title="Elbow Curve"
)

Running K-Means with `k=4`

In [16]:
# Initialize the K-Means model
model = KMeans(n_clusters=4, random_state=0)
# Fit the model
model.fit(df_crypto_pca)
# Predict clusters
predictions = model.predict(df_crypto_pca)
# Create a new DataFrame including predicted clusters and cryptocurrencies features
grouped_data = pd.concat([crypto_df, df_crypto_pca, coin_names], axis=1)
grouped_data["Class"] = predictions
grouped_data

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,MaxSupply,PC 1,PC 2,PC 3,CoinName,Class
AAC,ECC 256K1,DPOS,1.000000e+09,-1.000000e+00,1.748882,-1.030330,2.996862,Acute Angle Cloud,2
ADA,Ouroboros,PoS,3.190692e+10,4.500000e+10,1.895165,-0.106950,-0.924198,Cardano,0
AEON,CryptoNight-Lite,PoW,1.777472e+07,-1.000000e+00,-1.629064,0.115888,0.156843,AEON,1
AION,"Equihash210,9",PoW/PoS,4.874969e+08,-1.000000e+00,1.648128,-1.544495,-1.556438,Aion,0
AMB,Dagger,PoA,6.649843e+08,-1.000000e+00,1.829419,-0.374583,5.803898,Amber,2
...,...,...,...,...,...,...,...,...,...
XVG,Multiple,PoW,1.642954e+10,1.655500e+10,-1.635794,0.206757,0.155187,Verge,1
XWC,Scrypt,PoW/PoS,9.106302e+08,1.000000e+09,1.011248,-1.213101,-1.247386,WhiteCoin,0
ZEC,Equihash,PoW,1.078766e+07,2.100000e+07,-1.343103,-0.038467,0.050878,ZCash,1
ZEL,Equihash,PoW/PoS,1.215844e+08,2.100000e+08,0.560359,-0.936375,-0.816088,Zelcash,0


### Visualizing Results

#### 3D-Scatter with Clusters

In [17]:
# Create a 3D-Scatter with the PCA data and the clusters
fig = px.scatter_3d(
    grouped_data,
    x="PC 3",
    y="PC 2",
    z="PC 1",
    color="Class",
    symbol="Class",
    width=800,
    color_continuous_scale="picnic"
)
fig.update_layout(legend=dict(x=0, y=1))
fig.show()


#### Table of Tradable Cryptocurrencies

In [18]:
# Table with tradable cryptos
crypto_table = pd.concat([coin_names, crypto_df], axis=1)
crypto_table["Class"] = predictions
crypto_table.hvplot.table(
    
)

In [19]:
# Print the total number of tradable cryptocurrencies
print(f"The Total number of tradable cryprocurrencies: {len(crypto_table)}")

The Total number of tradable cryprocurrencies: 109


#### Scatter Plot with Tradable Cryptocurrencies

In [57]:
# Plot the scatter with x="TotalCoinsMined" and y="TotalCoinSupply"
crypto_table.hvplot.scatter(
    x="TotalCoinsMined",
    y="MaxSupply",
    xformatter= NumeralTickFormatter(format='0,0'),
    yformatter=NumeralTickFormatter(format='0,0'),
    hover_cols=["CoinName"],
    title="Cryptocurrencies Mined vs. Supply",
)