# Clustering Crypto

In [1]:
# Initial imports
import requests
import pandas as pd
import matplotlib.pyplot as plt
import hvplot.pandas
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from pathlib import Path

### Fetching Cryptocurrency Data

In [2]:
# Use the following endpoint to fetch json data
url = "https://min-api.cryptocompare.com/data/all/coinlist"

In [3]:
# Execute get request
response_data = requests.get(url)
# checking if the 'Data' exists in the Output data
response_data.content



In [4]:
import json

data = response_data.json()

# Create a DataFrame with 'Data' key
# HINT: You will need to use the 'Data' key from the json response, then transpose the DataFrame.
df = pd.DataFrame(data["Data"])
df.head(10)

Unnamed: 0,42,300,365,404,433,611,808,888,1337,2015,...,PPC,BCN,BURST,SC,POA,AE,UTOPIA,XLM,UMI,TNCC
Id,4321,749869,33639,21227,926547,20909,28223,29462,20824,3744,...,2349,5280,5286,13072,793845,190978,936528,4614,936533,934865
Url,/coins/42/overview,/coins/300/overview,/coins/365/overview,/coins/404/overview,/coins/433/overview,/coins/611/overview,/coins/808/overview,/coins/888/overview,/coins/1337/overview,/coins/2015/overview,...,/coins/ppc/overview,/coins/bcn/overview,/coins/burst/overview,/coins/sc/overview,/coins/poa/overview,/coins/ae/overview,/coins/utopia/overview,/coins/xlm/overview,/coins/umi/overview,/coins/tncc/overview
ImageUrl,/media/35650717/42.jpg,/media/27010595/300.png,/media/352070/365.png,/media/35650851/404-300x300.jpg,/media/34836095/433.png,/media/35650940/611-sixeleven.png,/media/351513/808.png,/media/351639/888.png,/media/35520987/elite.png,/media/20180/2015.png,...,/media/19864/peercoin-logo.png,/media/12318404/bcn.png,/media/16746623/burst.png,/media/20726/siacon-logo.png,/media/35280537/poa.png,/media/1383836/ae.png,/media/37305506/utopia.png,/media/35521289/xlm.png,/media/37305509/umi.png,/media/36935227/tncc.png
ContentCreatedOn,1427211129,1517935016,1480032918,1466100361,1541597321,1465914773,1473980395,1475534352,1465838687,1425316878,...,1424105258,1430209309,1430209451,1443613824,1519908188,1500643421,1600078707,1428492077,1600093077,1589367512
Name,42,300,365,404,433,611,808,888,1337,2015,...,PPC,BCN,BURST,SC,POA,AE,UTOPIA,XLM,UMI,TNCC
Symbol,42,300,365,404,433,611,808,888,1337,2015,...,PPC,BCN,BURST,SC,POA,AE,UTOPIA,XLM,UMI,TNCC
CoinName,42 Coin,300 token,365Coin,404Coin,433 Token,SixEleven,808,Octocoin,EliteCoin,2015 coin,...,PeerCoin,ByteCoin,BurstCoin,Siacoin,Poa Network,Aeternity,Amp,Stellar,Universal Money Intrument,TNC Coin
FullName,42 Coin (42),300 token (300),365Coin (365),404Coin (404),433 Token (433),SixEleven (611),808 (808),Octocoin (888),EliteCoin (1337),2015 coin (2015),...,PeerCoin (PPC),ByteCoin (BCN),BurstCoin (BURST),Siacoin (SC),Poa Network (POA),Aeternity (AE),Amp (UTOPIA),Stellar (XLM),Universal Money Intrument (UMI),TNC Coin (TNCC)
Algorithm,Scrypt,,X11,Scrypt,,SHA-256,SHA-256,,X13,X11,...,,CryptoNight,Shabal256,Blake2b,Proof-of-Authority,,,,,
ProofType,PoW/PoS,,PoW/PoS,PoW/PoS,,PoW,PoW/PoS,PoW,PoW/PoS,PoW/PoS,...,PoS,PoW,PoC,PoW,PoA,PoW/PoS,,,,


In [5]:
# Alternatively, use the provided csv file:
# file_path = Path("../Resources/crypto_data.csv")
# Create a DataFrame
# df_crypto = pd.read_csv(file_path, encoding="ISO-8859-1")

#Transposing the Data
crypto_df = df.T
crypto_df.tail()

Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Algorithm,ProofType,...,SortOrder,Sponsored,Taxonomy,Rating,IsTrading,TotalCoinsMined,BlockNumber,NetHashesPerSecond,BlockReward,BlockTime
AE,190978,/coins/ae/overview,/media/1383836/ae.png,1500643421,AE,AE,Aeternity,Aeternity (AE),,PoW/PoS,...,1375,False,"{'Access': 'Permissioned', 'FCA': 'Exchange,Ut...","{'Weiss': {'Rating': 'D-', 'TechnologyAdoption...",True,89792285.0,314042.0,940000.0,145.233,180.0
UTOPIA,936528,/coins/utopia/overview,/media/37305506/utopia.png,1600078707,UTOPIA,UTOPIA,Amp,Amp (UTOPIA),,,...,5783,False,"{'Access': 'Permissioned', 'FCA': 'Utility', '...","{'Weiss': {'Rating': '', 'TechnologyAdoptionRa...",False,19287800000.0,0.0,0.0,0.0,0.0
XLM,4614,/coins/xlm/overview,/media/35521289/xlm.png,1428492077,XLM,XLM,Stellar,Stellar (XLM),,,...,208,False,"{'Access': 'Permissioned', 'FCA': 'Exchange', ...","{'Weiss': {'Rating': 'B-', 'TechnologyAdoption...",True,20659000000.0,0.0,0.0,0.0,0.0
UMI,936533,/coins/umi/overview,/media/37305509/umi.png,1600093077,UMI,UMI,Universal Money Intrument,Universal Money Intrument (UMI),,,...,5784,False,"{'Access': 'Permissionless', 'FCA': 'Utility',...","{'Weiss': {'Rating': '', 'TechnologyAdoptionRa...",True,,,,,
TNCC,934865,/coins/tncc/overview,/media/36935227/tncc.png,1589367512,TNCC,TNCC,TNC Coin,TNC Coin (TNCC),,,...,5109,False,"{'Access': 'Permissionless', 'FCA': 'Utility',...","{'Weiss': {'Rating': '', 'TechnologyAdoptionRa...",True,1000000000000.0,0.0,0.0,0.0,0.0


### Data Preprocessing

In [6]:
# Keep only necessary columns:
# 'CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply'
crypto_df = crypto_df[["CoinName", "Algorithm", "IsTrading", "ProofType", "TotalCoinsMined","TotalCoinSupply"]]
crypto_df.tail()             

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
AE,Aeternity,,True,PoW/PoS,89792285.0,273685831
UTOPIA,Amp,,False,,19287800000.0,0
XLM,Stellar,,True,,20659000000.0,50000000000
UMI,Universal Money Intrument,,True,,,0
TNCC,TNC Coin,,True,,1000000000000.0,1000000000000


In [7]:
# Checking frequency count of values in a IsTrading Column
crypto_df.IsTrading.value_counts()

True     4200
False    1086
Name: IsTrading, dtype: int64

In [8]:
# Keep only cryptocurrencies that are trading
crypto_trading_df = crypto_df[crypto_df.IsTrading == True]            
crypto_trading_df.head()

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,True,PoW/PoS,42.0,42
300,300 token,,True,,300.0,300
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1273830.0,532000000
611,SixEleven,SHA-256,True,PoW,,611000


In [9]:
# Checking frequency count of values in a IsTrading Column post taking only traded Cryptocurrencies
crypto_trading_df.IsTrading.value_counts()

True    4200
Name: IsTrading, dtype: int64

In [10]:
# Checking frequency count of values in a Algorithm Column before removing N/A Values
crypto_trading_df.Algorithm.value_counts()

N/A                  2740
Scrypt                452
X11                   219
SHA-256               154
PoS                    71
                     ... 
X11Evo                  1
CryptoNight Heavy       1
IMesh                   1
X16RT                   1
Zhash                   1
Name: Algorithm, Length: 122, dtype: int64

In [11]:
# Keep only cryptocurrencies with a working algorithm
crypto_trading_algo_df = crypto_trading_df[crypto_trading_df.Algorithm != "N/A"]            
crypto_trading_algo_df.head()

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,True,PoW/PoS,42.0,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1273830.0,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0


In [12]:
# Checking frequency count of values in a working algorithm Column
crypto_trading_algo_df.Algorithm.value_counts()

Scrypt                 452
X11                    219
SHA-256                154
PoS                     71
X13                     70
                      ... 
Cryptonight Haven        1
GO20                     1
QuarkTX                  1
vBlake                   1
HybridScryptHash256      1
Name: Algorithm, Length: 121, dtype: int64

In [13]:
# Checking frequency count of values in a Dataframe
crypto_trading_algo_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1460 entries, 42 to POA
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   CoinName         1460 non-null   object
 1   Algorithm        1460 non-null   object
 2   IsTrading        1460 non-null   object
 3   ProofType        1460 non-null   object
 4   TotalCoinsMined  873 non-null    object
 5   TotalCoinSupply  1460 non-null   object
dtypes: object(6)
memory usage: 79.8+ KB


In [14]:
# Remove the 'IsTrading' column
crypto_algo_df = crypto_trading_algo_df.drop(columns = ["IsTrading"])
crypto_algo_df.head(2)

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,42.0,42
365,365Coin,X11,PoW/PoS,,2300000000


In [15]:
# Remove rows with at least 1 null value
crypto_algo_df = crypto_algo_df.dropna()            
# Checking dataframe for null values
crypto_algo_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 873 entries, 42 to POA
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   CoinName         873 non-null    object
 1   Algorithm        873 non-null    object
 2   ProofType        873 non-null    object
 3   TotalCoinsMined  873 non-null    object
 4   TotalCoinSupply  873 non-null    object
dtypes: object(5)
memory usage: 40.9+ KB


In [16]:
# Remove rows with cryptocurrencies having no coins mined
crypto_algo_coins_df = crypto_algo_df[crypto_algo_df.TotalCoinsMined != 0]
# Checking dataframe for frequencies
crypto_algo_coins_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 670 entries, 42 to POA
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   CoinName         670 non-null    object
 1   Algorithm        670 non-null    object
 2   ProofType        670 non-null    object
 3   TotalCoinsMined  670 non-null    object
 4   TotalCoinSupply  670 non-null    object
dtypes: object(5)
memory usage: 31.4+ KB


In [17]:
# Checking dataframe for N/A values
crypto_algo_df.head(18)

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,42.0,42.0
404,404Coin,Scrypt,PoW/PoS,1273830.0,532000000.0
808,808,SHA-256,PoW/PoS,0.0,0.0
1337,EliteCoin,X13,PoW/PoS,29481900000.0,314159265359.0
BTCD,BitcoinDark,SHA-256,PoW/PoS,1288862.0,22000000.0
XBS,Bitstake,X11,PoW/PoS,0.0,1300000.0
XPY,PayCoin,SHA-256,PoS,11995300.0,12500000.0
PRC,ProsperCoin,Scrypt,PoW,6268245.0,21000000.0
KOBO,KoboCoin,X15,PoW/PoS,25596500.0,350000000.0
XPD,PetroDollar,SHA-256D,,63993275.0,122107462.0


ProofType and TotalCoinSupply columns have "N/A" values

In [18]:
# Drop rows where there are 'N/A' text values from ProofType and TotalCoinSupply
cleaned_crypto_df = crypto_algo_coins_df[crypto_algo_coins_df.ProofType != "N/A"]  
cleaned_crypto_df = cleaned_crypto_df[cleaned_crypto_df.TotalCoinSupply != "N/A"]  
cleaned_crypto_df.head(10)

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,42.0,42
404,404Coin,Scrypt,PoW/PoS,1273830.0,532000000
1337,EliteCoin,X13,PoW/PoS,29481900000.0,314159265359
BTCD,BitcoinDark,SHA-256,PoW/PoS,1288862.0,22000000
XPY,PayCoin,SHA-256,PoS,11995300.0,12500000
PRC,ProsperCoin,Scrypt,PoW,6268245.0,21000000
KOBO,KoboCoin,X15,PoW/PoS,25596500.0,350000000
ARG,Argentum,Scrypt,PoW,13832900.0,64000000
BLU,BlueCoin,Scrypt,PoW/PoS,647272520.0,0
XMY,MyriadCoin,Multiple,PoW,1747693250.0,2000000000


In [19]:
# Checking frequency count of values in a ProofType after removing N/A values
cleaned_crypto_df.ProofType.value_counts()

PoW                     232
PoW/PoS                 176
PoS                      94
DPoS                     11
PoS/PoW                   3
PoC                       2
Proof of Authority        2
PoA                       2
PoW/PoW                   1
POBh                      1
dPoW/PoW                  1
DPoC                      1
PoW/PoS                   1
 PoW/PoS/PoA              1
Pos                       1
POS / MN                  1
PoS/PoW/PoT               1
PoW/nPoS                  1
HPoW                      1
PoST                      1
TPoS                      1
PoW + Hive                1
DPOS                      1
Zero-Knowledge Proof      1
PoS/LPoS                  1
Proof of Trust            1
LPoS                      1
Name: ProofType, dtype: int64

In [20]:
# Checking frequency count of values in a ProofType after removing N/A values
cleaned_crypto_df.TotalCoinSupply.value_counts()

0               44
21000000        36
100000000       24
1000000000      19
20000000        12
                ..
91388946         1
990000000000     1
9507271          1
888000000        1
100000000        1
Name: TotalCoinSupply, Length: 276, dtype: int64

In [21]:
# Checking cleaned dataframe for frequencies
cleaned_crypto_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 541 entries, 42 to POA
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   CoinName         541 non-null    object
 1   Algorithm        541 non-null    object
 2   ProofType        541 non-null    object
 3   TotalCoinsMined  541 non-null    object
 4   TotalCoinSupply  541 non-null    object
dtypes: object(5)
memory usage: 25.4+ KB


In [22]:
# Downloading Cleaned Crypo data
cleaned_crypto_df.to_csv("Resources/cleaned_crypto_df.csv")

In [23]:
# Store the 'CoinName'column in its own DataFrame prior to dropping it from crypto_df 
coins_name = cleaned_crypto_df.CoinName
coins_name.head()

42          42 Coin
404         404Coin
1337      EliteCoin
BTCD    BitcoinDark
XPY         PayCoin
Name: CoinName, dtype: object

In [24]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm
cluster = cleaned_crypto_df.drop(columns = ["CoinName"])
cluster.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,Scrypt,PoW/PoS,42.0,42
404,Scrypt,PoW/PoS,1273830.0,532000000
1337,X13,PoW/PoS,29481900000.0,314159265359
BTCD,SHA-256,PoW/PoS,1288862.0,22000000
XPY,SHA-256,PoS,11995300.0,12500000


In [25]:
# Create dummy variables for text features
cluster_dummy = pd.get_dummies(cluster[["Algorithm", "ProofType"]])
cluster_dummy.head()

Unnamed: 0,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_BMW512 / Echo512,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,Algorithm_C31,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
42,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
404,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1337,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
BTCD,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
XPY,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
# Standardize data
scaler = StandardScaler()
# Standarize data with StandarScaler
X = StandardScaler().fit_transform(cluster_dummy)
print(X[0:5])

[[-0.04303315 -0.04303315 -0.04303315 -0.06091449 -0.04303315 -0.07467401
  -0.04303315 -0.06091449 -0.06091449 -0.04303315 -0.04303315 -0.04303315
  -0.18011871 -0.04303315 -0.04303315 -0.08630637 -0.04303315 -0.04303315
  -0.1300665  -0.08630637 -0.04303315 -0.04303315 -0.04303315 -0.04303315
  -0.15691148 -0.04303315 -0.04303315 -0.1300665  -0.04303315 -0.04303315
  -0.08630637 -0.04303315 -0.04303315 -0.04303315 -0.04303315 -0.04303315
  -0.04303315 -0.04303315 -0.04303315 -0.07467401 -0.09658343 -0.06091449
  -0.04303315 -0.13723116 -0.1300665  -0.1300665  -0.04303315 -0.04303315
  -0.04303315 -0.07467401 -0.18551772 -0.04303315 -0.04303315 -0.04303315
  -0.04303315 -0.07467401 -0.19078382 -0.04303315 -0.31558437 -0.04303315
  -0.08630637 -0.07467401 -0.06091449 -0.04303315  1.44617723 -0.04303315
  -0.04303315 -0.06091449 -0.04303315 -0.06091449 -0.04303315 -0.04303315
  -0.04303315 -0.04303315 -0.04303315 -0.06091449 -0.38866931 -0.04303315
  -0.18551772 -0.04303315 -0.08630637 

### Reducing Dimensions Using PCA

In [27]:
# Use PCA to reduce dimensions to 3 principal components
# Initialize PCA model
pca = PCA(n_components=3)
# Get three principal components for the iris data.
crypto_pca = pca.fit_transform(X)

In [28]:
# Create a DataFrame with the principal components data

# Transform PCA data to a DataFrame
pca_columns = ["PC 1", "PC 2", "PC 3"]
pcs_df = pd.DataFrame(
    data=crypto_pca, columns=pca_columns, index = coins_name.index
)
pcs_df.head(10)

Unnamed: 0,PC 1,PC 2,PC 3
42,0.868906,-0.677764,-0.179655
404,0.868906,-0.677764,-0.179655
1337,1.654811,-0.811768,-0.189151
BTCD,0.780133,-0.449451,-0.10042
XPY,0.653244,-0.070262,-0.008489
PRC,-1.19327,0.043816,-0.027812
KOBO,1.732714,-0.86451,-0.21092
ARG,-1.19327,0.043816,-0.027812
BLU,0.868906,-0.677764,-0.179655
XMY,-1.69189,0.401771,0.07831


In [29]:
# Fetch the explained variance
pca.explained_variance_ratio_

# You can see that principal component values have no direct relation with the values in the original dataset. 
# They can be seen as a reduced representation of the original data.
# The trick is to sacrifice a little accuracy for simplicity. Smaller datasets are easier to 
# explore and visualize. They ease data analysis and speed up machine-learning algorithms without extraneous variables to process.

# The first principal component contains 1.88% of the variance, second contains 1.83% & third contains 1.82%
# of the variance. All components together contain 5.52% of the information.

array([0.01880221, 0.01827451, 0.01822643])

### Clustering Crytocurrencies Using K-Means

#### Finding the Best Value for `k` Using the Elbow Curve

In [30]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(pcs_df[pca_columns])
    inertia.append(km.inertia_)

# Create the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")

Running K-Means with `k=5`

In [31]:
# Initialize the K-Means model
model = KMeans(n_clusters=5, random_state=0)

# Fit the model
model.fit(pcs_df[pca_columns])

# Predict clusters
predictions = model.predict(pcs_df[pca_columns])

# Add the predicted class columns
pcs_df["class"] = model.labels_
pcs_df

Unnamed: 0,PC 1,PC 2,PC 3,class
42,0.868906,-0.677764,-0.179655,1
404,0.868906,-0.677764,-0.179655,1
1337,1.654811,-0.811768,-0.189151,1
BTCD,0.780133,-0.449451,-0.100420,1
XPY,0.653244,-0.070262,-0.008489,1
...,...,...,...,...
CLOAK,1.654811,-0.811768,-0.189151,1
BCN,-2.264292,0.548002,0.089103,0
BURST,1.659357,-0.215244,-0.101534,1
SC,-2.308004,0.601318,0.136346,0


In [32]:
# Create a new DataFrame including predicted clusters and cryptocurrencies features
clustered_df=pd.DataFrame({
    "Algorithm": cluster.Algorithm, 
    "ProofType": cluster.ProofType, 
    "TotalCoinsMined": cluster.TotalCoinsMined, 
    "TotalCoinSupply": cluster.TotalCoinSupply, 
    "PC 1": pcs_df["PC 1"], 
    "PC 2": pcs_df["PC 2"], 
    "PC 3": pcs_df["PC 3"], 
    "CoinName": coins_name, 
    "Class": pcs_df["class"]
    }, index = cluster.index
)
clustered_df.head(10)

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC 1,PC 2,PC 3,CoinName,Class
42,Scrypt,PoW/PoS,42.0,42,0.868906,-0.677764,-0.179655,42 Coin,1
404,Scrypt,PoW/PoS,1273830.0,532000000,0.868906,-0.677764,-0.179655,404Coin,1
1337,X13,PoW/PoS,29481900000.0,314159265359,1.654811,-0.811768,-0.189151,EliteCoin,1
BTCD,SHA-256,PoW/PoS,1288862.0,22000000,0.780133,-0.449451,-0.10042,BitcoinDark,1
XPY,SHA-256,PoS,11995300.0,12500000,0.653244,-0.070262,-0.008489,PayCoin,1
PRC,Scrypt,PoW,6268245.0,21000000,-1.19327,0.043816,-0.027812,ProsperCoin,0
KOBO,X15,PoW/PoS,25596500.0,350000000,1.732714,-0.86451,-0.21092,KoboCoin,1
ARG,Scrypt,PoW,13832900.0,64000000,-1.19327,0.043816,-0.027812,Argentum,0
BLU,Scrypt,PoW/PoS,647272520.0,0,0.868906,-0.677764,-0.179655,BlueCoin,1
XMY,Multiple,PoW,1747693250.0,2000000000,-1.69189,0.401771,0.07831,MyriadCoin,0


In [33]:
clustered_df.info
# Checking the frequency of the classes
clustered_df.Class.value_counts()

1    299
0    236
4      2
3      2
2      2
Name: Class, dtype: int64

### Visualizing Results

#### 3D-Scatter with Clusters

In [34]:
# Create a 3D-Scatter with the PCA data and the clusters
fig = px.scatter_3d(
    clustered_df,
    x="PC 1",
    y="PC 2",
    z="PC 3", 
#     color="class",
#     symbol="class",
    width=800,
    hover_name="CoinName",
    hover_data=["Algorithm"], 
    title = "3D-Scatter with the PCA data and the clusters",
    )
fig.update_layout(legend=dict(x=0, y=1))
fig.show()

#### Table of Tradable Cryptocurrencies

In [35]:
# Table with tradable cryptos
cols = ["CoinName", "Algorithm", "ProofType", "TotalCoinSupply", "TotalCoinsMined", "Class"]
clustered_df.hvplot.table(cols)

In [36]:
# Print the total number of tradable cryptocurrencies
print(f"Total number of tradable cryptocurrencies are: {clustered_df.CoinName.count()}")
print(f"Total number of Unique tradable cryptocurrencies are: {len(clustered_df.CoinName.value_counts())}")

Total number of tradable cryptocurrencies are: 541
Total number of Unique tradable cryptocurrencies are: 540


#### Scatter Plot with Tradable Cryptocurrencies

In [37]:
# Scale data to create the scatter plot
clustered_df["TotalCoinsMined_scaled"]= MinMaxScaler().fit_transform(clustered_df[["TotalCoinsMined"]])
clustered_df["TotalCoinSupply_scaled"]= MinMaxScaler().fit_transform(clustered_df[["TotalCoinSupply"]])
clustered_df.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC 1,PC 2,PC 3,CoinName,Class,TotalCoinsMined_scaled,TotalCoinSupply_scaled
42,Scrypt,PoW/PoS,42.0,42,0.868906,-0.677764,-0.179655,42 Coin,1,0.005942,4.2e-11
404,Scrypt,PoW/PoS,1273830.0,532000000,0.868906,-0.677764,-0.179655,404Coin,1,0.005944,0.000532
1337,X13,PoW/PoS,29481900000.0,314159265359,1.654811,-0.811768,-0.189151,EliteCoin,1,0.035545,0.3141593
BTCD,SHA-256,PoW/PoS,1288862.0,22000000,0.780133,-0.449451,-0.10042,BitcoinDark,1,0.005944,2.2e-05
XPY,SHA-256,PoS,11995300.0,12500000,0.653244,-0.070262,-0.008489,PayCoin,1,0.005954,1.25e-05


In [38]:
# Plot the scatter with x="TotalCoinsMined" and y="TotalCoinSupply"
clustered_df.hvplot.scatter(
    x= "TotalCoinsMined_scaled",
    y="TotalCoinSupply_scaled",
    hover_cols=["CoinName"],
    by="Class",
    title = "Scatter plot of TotalCoinsMined and TotalCoinSupply"
)