# Clustering Crypto

In [107]:
# Initial imports
import requests
import pandas as pd
import matplotlib.pyplot as plt
import hvplot.pandas
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from pathlib import Path

### Fetching Cryptocurrency Data

In [108]:
# Use the following endpoint to fetch json data
url = "https://min-api.cryptocompare.com/data/all/coinlist"

In [109]:
# Create a DataFrame 
# HINT: You will need to use the 'Data' key from the json response, then transpose the DataFrame.

In [190]:
# Alternatively, use the provided csv file:
# file_path = Path("Resources/crypto_data.csv")

# Create a DataFrame
file_path = Path("Resources/crypto_data.csv")
crypto_df = pd.read_csv(file_path)
crypto_df.head(10)

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0
5,1337,EliteCoin,X13,True,PoW/PoS,29279420000.0,314159265359
6,2015,2015 coin,X11,True,PoW/PoS,,0
7,BTC,Bitcoin,SHA-256,True,PoW,17927180.0,21000000
8,ETH,Ethereum,Ethash,True,PoW,107684200.0,0
9,LTC,Litecoin,Scrypt,True,PoW,63039240.0,84000000


In [191]:
crypto_df.rename(columns={"Unnamed: 0": "Coin"}, inplace=True)
crypto_df.head()

Unnamed: 0,Coin,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [192]:
crypto_df.set_index('Coin', inplace=True)

In [193]:
crypto_df.head()

Unnamed: 0_level_0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
Coin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0


In [194]:
crypto_df.describe()

Unnamed: 0,TotalCoinsMined
count,744.0
mean,3883103000.0
std,38672630000.0
min,-5917978000.0
25%,632216.4
50%,13404270.0
75%,93998950.0
max,989988700000.0


In [195]:
crypto_df.shape

(1252, 6)

In [196]:
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [197]:
crypto_df['Algorithm'].value_counts().sum()

1252

In [198]:
crypto_df['Algorithm'].value_counts().sort_index()

1GB AES Pattern Search      1
536                         2
Argon2                      2
Argon2d                     1
Avesta hash                 1
BLAKE256                    2
Blake                       8
Blake2S                     4
Blake2b                     3
C11                         2
Cloverhash                  1
Counterparty                1
CryptoNight                40
CryptoNight Heavy           3
CryptoNight Heavy X         1
CryptoNight-Lite            1
CryptoNight-V7              6
CryptoNight-lite            1
Cryptonight-GPU             1
Curve25519                  2
DPoS                       19
Dagger                      4
Dagger-Hashimoto            1
ECC 256K1                   1
Equihash                   26
Equihash+Scrypt             1
Equihash1927                1
Ethash                     18
Exosis                      1
Green Protocol              1
Groestl                     5
HMQ1725                     2
HybridScryptHash256         1
IMesh     

### Data Preprocessing

In [199]:
# Keep only necessary columns:
# 'CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply'
col_list = ['CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply']
crypto_df1 = crypto_df[col_list]
crypto_df1.head()

Unnamed: 0_level_0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
Coin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0


In [200]:
crypto_df1['IsTrading'].value_counts()

True     1144
False     108
Name: IsTrading, dtype: int64

In [201]:
# Keep only cryptocurrencies that are trading
crypto_df2 = crypto_df1[crypto_df1['IsTrading'] == True]

In [202]:
crypto_df2['IsTrading'].value_counts()

True    1144
Name: IsTrading, dtype: int64

#### I see all are working algorithm. So i am just keeping all

In [203]:
# Keep only cryptocurrencies with a working algorithm


In [204]:
# Remove the "IsTrading" column
crypto_df3 = crypto_df2.drop(columns=['IsTrading'])
crypto_df3.head()

Unnamed: 0_level_0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Coin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
365,365Coin,X11,PoW/PoS,,2300000000
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,PoW,,611000
808,808,SHA-256,PoW/PoS,0.0,0


In [205]:
len(crypto_df3)

1144

In [206]:
# Remove rows with at least 1 null value
crypto_df4 = crypto_df3.dropna()
crypto_df4.head()

Unnamed: 0_level_0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Coin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
808,808,SHA-256,PoW/PoS,0.0,0
1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000


In [207]:
len(crypto_df4)

685

In [208]:
# Remove rows with cryptocurrencies having no coins mined
crypto_df5 = crypto_df4[crypto_df4['TotalCoinsMined'] > 0]
crypto_df5.head()

Unnamed: 0_level_0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Coin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000
ETH,Ethereum,Ethash,PoW,107684200.0,0


In [209]:
len(crypto_df5)

532

In [210]:
# Drop rows where there are 'N/A' text values
crypto_df6 = crypto_df5[~crypto_df5['TotalCoinSupply'].str.contains('N/A')]
crypto_df6.head()

Unnamed: 0_level_0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Coin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000
ETH,Ethereum,Ethash,PoW,107684200.0,0


In [211]:
crypto_df6.info()

<class 'pandas.core.frame.DataFrame'>
Index: 532 entries, 42 to XBC
Data columns (total 5 columns):
CoinName           532 non-null object
Algorithm          532 non-null object
ProofType          532 non-null object
TotalCoinsMined    532 non-null float64
TotalCoinSupply    532 non-null object
dtypes: float64(1), object(4)
memory usage: 24.9+ KB


In [212]:
crypto_df6['TotalCoinSupply'] = crypto_df6['TotalCoinSupply'].astype(float)
crypto_df6.head()

Unnamed: 0_level_0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Coin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
42,42 Coin,Scrypt,PoW/PoS,41.99995,42.0
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000.0
1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159300000.0
BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000.0
ETH,Ethereum,Ethash,PoW,107684200.0,0.0


In [213]:
len(crypto_df6)

532

In [214]:
crypto_df6.info()

<class 'pandas.core.frame.DataFrame'>
Index: 532 entries, 42 to XBC
Data columns (total 5 columns):
CoinName           532 non-null object
Algorithm          532 non-null object
ProofType          532 non-null object
TotalCoinsMined    532 non-null float64
TotalCoinSupply    532 non-null float64
dtypes: float64(2), object(3)
memory usage: 24.9+ KB


In [215]:
# Store the 'CoinName'column in its own DataFrame prior to dropping it from crypto_df
coin_df = crypto_df6['CoinName'].to_frame()
coin_df.head()
#print(coin_df)

Unnamed: 0_level_0,CoinName
Coin,Unnamed: 1_level_1
42,42 Coin
404,404Coin
1337,EliteCoin
BTC,Bitcoin
ETH,Ethereum


In [230]:
len(coin_df)

532

In [216]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm
crypto_df7 = crypto_df6.drop(columns=['CoinName'])
crypto_df7.head()

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Coin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42,Scrypt,PoW/PoS,41.99995,42.0
404,Scrypt,PoW/PoS,1055185000.0,532000000.0
1337,X13,PoW/PoS,29279420000.0,314159300000.0
BTC,SHA-256,PoW,17927180.0,21000000.0
ETH,Ethash,PoW,107684200.0,0.0


In [217]:
crypto_df7['ProofType'].value_counts()

PoW                     237
PoW/PoS                 176
PoS                      86
DPoS                      9
PoC                       3
PoS/PoW                   2
PoW/nPoS                  1
PoW/PoS                   1
PoST                      1
dPoW/PoW                  1
Proof of Authority        1
POBh                      1
TPoS                      1
PoA                       1
Zero-Knowledge Proof      1
Proof of Trust            1
LPoS                      1
DPOS                      1
PoS/LPoS                  1
Pos                       1
HPoW                      1
PoS/PoW/PoT               1
PoW and PoS               1
PoW/PoW                   1
PoW + Hive                1
Name: ProofType, dtype: int64

In [218]:
crypto_df7.info()

<class 'pandas.core.frame.DataFrame'>
Index: 532 entries, 42 to XBC
Data columns (total 4 columns):
Algorithm          532 non-null object
ProofType          532 non-null object
TotalCoinsMined    532 non-null float64
TotalCoinSupply    532 non-null float64
dtypes: float64(2), object(2)
memory usage: 20.8+ KB


In [219]:
# Create dummy variables for text features
# Encode the categorical features (One-hot encode)
crypto_enc = pd.get_dummies(crypto_df7)
crypto_enc.head()

Unnamed: 0_level_0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,Algorithm_Cloverhash,Algorithm_Counterparty,Algorithm_CryptoNight,Algorithm_CryptoNight Heavy,Algorithm_CryptoNight-V7,Algorithm_Cryptonight-GPU,Algorithm_DPoS,Algorithm_Dagger,Algorithm_Dagger-Hashimoto,Algorithm_ECC 256K1,Algorithm_Equihash,Algorithm_Equihash+Scrypt,Algorithm_Ethash,Algorithm_Exosis,Algorithm_Green Protocol,Algorithm_Groestl,Algorithm_HMQ1725,Algorithm_HybridScryptHash256,Algorithm_IMesh,Algorithm_Jump Consistent Hash,Algorithm_Keccak,Algorithm_Leased POS,Algorithm_Lyra2RE,Algorithm_Lyra2REv2,Algorithm_Lyra2Z,Algorithm_M7 POW,Algorithm_Multiple,Algorithm_NIST5,Algorithm_NeoScrypt,Algorithm_Ouroboros,Algorithm_PHI1612,Algorithm_POS 2.0,Algorithm_POS 3.0,Algorithm_PoS,Algorithm_Proof-of-Authority,Algorithm_Proof-of-BibleHash,Algorithm_QUAIT,Algorithm_QuBit,Algorithm_Quark,Algorithm_SHA-256,Algorithm_SHA-256 + Hive,Algorithm_SHA-256D,Algorithm_SHA-512,Algorithm_SHA3,Algorithm_Scrypt,Algorithm_Semux BFT consensus,Algorithm_Shabal256,Algorithm_Skein,Algorithm_SkunkHash,Algorithm_SkunkHash v2 Raptor,Algorithm_Stanford Folding,Algorithm_TRC10,Algorithm_Time Travel,Algorithm_Tribus,Algorithm_VBFT,Algorithm_VeChainThor Authority,Algorithm_X11,Algorithm_X11GOST,Algorithm_X13,Algorithm_X14,Algorithm_X15,Algorithm_X16R,Algorithm_XEVAN,ProofType_DPOS,ProofType_DPoS,ProofType_HPoW,ProofType_LPoS,ProofType_POBh,ProofType_PoA,ProofType_PoC,ProofType_PoS,ProofType_PoS/LPoS,ProofType_PoS/PoW,ProofType_PoS/PoW/PoT,ProofType_PoST,ProofType_PoW,ProofType_PoW + Hive,ProofType_PoW and PoS,ProofType_PoW/PoS,ProofType_PoW/PoS,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
Coin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1
42,41.99995,42.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
404,1055185000.0,532000000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
1337,29279420000.0,314159300000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
BTC,17927180.0,21000000.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
ETH,107684200.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0


In [229]:
len(crypto_enc)

532

In [220]:
crypto_enc.info()

<class 'pandas.core.frame.DataFrame'>
Index: 532 entries, 42 to XBC
Data columns (total 98 columns):
TotalCoinsMined                     532 non-null float64
TotalCoinSupply                     532 non-null float64
Algorithm_1GB AES Pattern Search    532 non-null uint8
Algorithm_536                       532 non-null uint8
Algorithm_Argon2d                   532 non-null uint8
Algorithm_BLAKE256                  532 non-null uint8
Algorithm_Blake                     532 non-null uint8
Algorithm_Blake2S                   532 non-null uint8
Algorithm_Blake2b                   532 non-null uint8
Algorithm_C11                       532 non-null uint8
Algorithm_Cloverhash                532 non-null uint8
Algorithm_Counterparty              532 non-null uint8
Algorithm_CryptoNight               532 non-null uint8
Algorithm_CryptoNight Heavy         532 non-null uint8
Algorithm_CryptoNight-V7            532 non-null uint8
Algorithm_Cryptonight-GPU           532 non-null uint8
Algorithm_DPoS 

In [221]:
# Standardize data
from sklearn import preprocessing

scaler = preprocessing.StandardScaler().fit(crypto_enc)
crypto_enc_scaled = scaler.transform(crypto_enc)

In [222]:
crypto_enc_scaled

array([[-0.11710817, -0.1528703 , -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [-0.09396955, -0.145009  , -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [ 0.52494561,  4.48942416, -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       ...,
       [-0.09561336, -0.13217937, -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [-0.11694817, -0.15255998, -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [-0.11710536, -0.15285552, -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ]])

### Reducing Dimensions Using PCA

In [223]:
# Use PCA to reduce dimensions to 3 principal components

# Initialize PCA model
pca = PCA(n_components=3)

# Get two principal components for the iris data.
crypto_pca = pca.fit_transform(crypto_enc_scaled)

In [224]:
crypto_pca

array([[-0.32148799,  1.0173802 , -0.56194727],
       [-0.30485695,  1.01759711, -0.56229491],
       [ 2.31709756,  1.59143098, -0.61929252],
       ...,
       [ 0.29204319, -2.2612659 ,  0.40832015],
       [-0.17869958, -2.01327404,  0.38788875],
       [-0.25651574,  0.7824484 , -0.26268911]])

In [226]:
# Create a DataFrame with the principal components data
# Transform PCA data to a DataFrame
df_crypto_pca = pd.DataFrame(
    data=crypto_pca, columns=["principal component 1", "principal component 2", "principal component 3"]
)
df_crypto_pca.head()

Unnamed: 0,principal component 1,principal component 2,principal component 3
0,-0.321488,1.01738,-0.561947
1,-0.304857,1.017597,-0.562295
2,2.317098,1.591431,-0.619293
3,-0.174451,-1.253868,0.182379
4,-0.169222,-2.015711,0.377215


In [228]:
df_crypto_pca.set_index(keys=coin_df.index, inplace=True)

In [231]:
df_crypto_pca.head()

Unnamed: 0_level_0,principal component 1,principal component 2,principal component 3
Coin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
42,-0.321488,1.01738,-0.561947
404,-0.304857,1.017597,-0.562295
1337,2.317098,1.591431,-0.619293
BTC,-0.174451,-1.253868,0.182379
ETH,-0.169222,-2.015711,0.377215


In [232]:
# Fetch the explained variance
pca.explained_variance_ratio_

array([0.0279092, 0.0213002, 0.0205088])

In [233]:
sum(pca.explained_variance_ratio_)

0.06971820199318515

### Clustering Crytocurrencies Using K-Means

#### Find the Best Value for `k` Using the Elbow Curve

In [235]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(df_crypto_pca)
    inertia.append(km.inertia_)

# Create the Elbow Curve using hvPlot

elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")

Running K-Means with `k=<your best value for k here>`  K = 4 seems to be the best value.

In [236]:
# Initialize the K-Means model
model = KMeans(n_clusters=4, random_state=0)
# Fit the model
model.fit(df_crypto_pca)
# Predict clusters
predictions = model.predict(df_crypto_pca)
# Create a new DataFrame including predicted clusters and cryptocurrencies features
df_crypto_pca_k = df_crypto_pca.copy(deep=True)
df_crypto_pca_k["class"] = model.labels_
df_crypto_pca_k.head()

Unnamed: 0_level_0,principal component 1,principal component 2,principal component 3,class
Coin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42,-0.321488,1.01738,-0.561947,0
404,-0.304857,1.017597,-0.562295,0
1337,2.317098,1.591431,-0.619293,0
BTC,-0.174451,-1.253868,0.182379,1
ETH,-0.169222,-2.015711,0.377215,1


In [237]:
df_crypto_pca_k['class'].value_counts()

0    285
1    241
3      5
2      1
Name: class, dtype: int64

### Visualizing Results

#### 3D-Scatter with Clusters

In [238]:
# Create a 3D-Scatter with the PCA data and the clusters
# Plotting the clusters
df_crypto_pca_k.hvplot.scatter(
    x="principal component 1",
    y="principal component 2",
#    z="principal component 3",
    hover_cols=["class"],
    by="class",
)

In [239]:
# Plotting the clusters
df_crypto_pca_k.hvplot.scatter(
    x="principal component 2",
    y="principal component 3",
#    z="principal component 3",
    hover_cols=["class"],
    by="class",
)

In [240]:
# Plotting the clusters
df_crypto_pca_k.hvplot.scatter(
    x="principal component 1",
    y="principal component 3",
#    z="principal component 3",
    hover_cols=["class"],
    by="class",
)

#### Table of Tradable Cryptocurrencies

##### The dataframe coin_df already has only tradeable crypto's as we already dropped the ones that are not. Also this is cleaned data where we dropped Nulls, and also retained only mined > 0. 

In [242]:
# Table with tradable cryptos
#coin_df.head()
coin_df.hvplot.table(columns=['Coin','CoinName'], sortable=True, selectable=True)

In [243]:
# Print the total number of tradable cryptocurrencies
print('Total number of tradeable cryptocurrencies: ', len(coin_df))
#coin_df

Total number of tradeable cryptocurrencies:  532


#### Scatter Plot with Tradable Cryptocurrencies

In [244]:
crypto_df6.head()
crypto_trd_df = crypto_df6.drop(columns=['CoinName', 'Algorithm', 'ProofType'])
crypto_trd_df.head()

Unnamed: 0_level_0,TotalCoinsMined,TotalCoinSupply
Coin,Unnamed: 1_level_1,Unnamed: 2_level_1
42,41.99995,42.0
404,1055185000.0,532000000.0
1337,29279420000.0,314159300000.0
BTC,17927180.0,21000000.0
ETH,107684200.0,0.0


In [246]:
# Scale data to create the scatter plot
crypto_trd = StandardScaler().fit_transform(crypto_trd_df)
crypto_trd

array([[-0.11710817, -0.1528703 ],
       [-0.09396955, -0.145009  ],
       [ 0.52494561,  4.48942416],
       ...,
       [-0.09561336, -0.13217937],
       [-0.11694817, -0.15255998],
       [-0.11710536, -0.15285552]])

In [247]:
df_crypto_trd = pd.DataFrame(
    data=crypto_trd, columns=["TotalCoinsMined", "TotalCoinSupply"])
df_crypto_trd.head()

Unnamed: 0,TotalCoinsMined,TotalCoinSupply
0,-0.117108,-0.15287
1,-0.09397,-0.145009
2,0.524946,4.489424
3,-0.116715,-0.15256
4,-0.114747,-0.15287


In [248]:
df_crypto_trd.set_index(keys=coin_df.index, inplace=True)

In [249]:
df_crypto_trd.head()

Unnamed: 0_level_0,TotalCoinsMined,TotalCoinSupply
Coin,Unnamed: 1_level_1,Unnamed: 2_level_1
42,-0.117108,-0.15287
404,-0.09397,-0.145009
1337,0.524946,4.489424
BTC,-0.116715,-0.15256
ETH,-0.114747,-0.15287


In [256]:
# Plot the scatter with x="TotalCoinsMined" and y="TotalCoinSupply"
df_crypto_trd.hvplot.scatter(
    x="TotalCoinsMined",
    y="TotalCoinSupply",
#    z="principal component 3",
#    hover_cols=["Coin"]
#    by="Coin"
)

In [258]:
crypto_df6.head()
crypto_trd_df2 = crypto_df6.drop(columns=['CoinName', 'Algorithm', 'ProofType'])
crypto_trd_df2.head()

Unnamed: 0_level_0,TotalCoinsMined,TotalCoinSupply
Coin,Unnamed: 1_level_1,Unnamed: 2_level_1
42,41.99995,42.0
404,1055185000.0,532000000.0
1337,29279420000.0,314159300000.0
BTC,17927180.0,21000000.0
ETH,107684200.0,0.0


#### Plot w/o scaling

In [259]:
crypto_trd_df2.hvplot.scatter(
    x="TotalCoinsMined",
    y="TotalCoinSupply"
#    z="principal component 3",
#    hover_cols=["class"],
#    by="class",
)