---
Generating a report of what cryptocurrencies are available on the trading market and how they can be grouped using classification.

I will complete this project by finishing the following tasks:

- Data Preprocessing: Prepare data for dimension reduction with PCA / clustering using K-Means.

- Reducing Data Dimensions Using PCA: Reduce data dimension using PCA algorithm.

- Clustering Cryptocurrencies Using K-Means: Predict clusters using cryptos data using KMeans algorithm.

- Visualizing Results: Create some plots / data tables.

In [80]:
# Initial imports
from pathlib import Path
import requests
import pandas as pd
import matplotlib.pyplot as plt
import hvplot.pandas
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

In [81]:
# Fetching Cryptocurrency Data
# Use the following endpoint to fetch json data
url = "https://min-api.cryptocompare.com/data/all/coinlist"

In [82]:
# Alternatively, use the provided csv file:
file_path = Path("Resources/crypto_data.csv")

crypto_df = pd.read_csv(file_path, index_col=0)
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0


### Data Preprocessing

In [83]:
# Keep only cryptocurrencies that are trading

crypto_df = crypto_df[crypto_df['IsTrading'] == True]
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0


In [84]:
# Keep only cryptocurrencies with a working algorithm

crypto_df = crypto_df[crypto_df["Algorithm"] != "N/A"]
crypto_df

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,True,PoW/PoS,4.199995e+01,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1.055185e+09,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.000000e+00,0
...,...,...,...,...,...,...
SERO,Super Zero,Ethash,True,PoW,,1000000000
UOS,UOS,SHA-256,True,DPoI,,1000000000
BDX,Beldex,CryptoNight,True,PoW,9.802226e+08,1400222610
ZEN,Horizen,Equihash,True,PoW,7.296538e+06,21000000


In [85]:
# Remove the "IsTrading" column

crypto_df = crypto_df.drop(columns='IsTrading')
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
365,365Coin,X11,PoW/PoS,,2300000000
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,PoW,,611000
808,808,SHA-256,PoW/PoS,0.0,0


In [86]:
# Remove rows with at least 1 null value

crypto_df = crypto_df.dropna()
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
808,808,SHA-256,PoW/PoS,0.0,0
1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000


In [87]:
# Remove rows with cryptocurrencies having no coins mined

crypto_df = crypto_df[crypto_df["TotalCoinsMined"] > 0]
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000
ETH,Ethereum,Ethash,PoW,107684200.0,0


In [88]:
# Drop rows where there are 'N/A' text values

crypto_df = crypto_df[crypto_df != 'N/A']
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000
ETH,Ethereum,Ethash,PoW,107684200.0,0


In [89]:
# Store the 'CoinName'column in its own DF

coinName = crypto_df['CoinName']
coinName.head()

42        42 Coin
404       404Coin
1337    EliteCoin
BTC       Bitcoin
ETH      Ethereum
Name: CoinName, dtype: object

In [90]:
# Drop the 'CoinName' column

crypto_df.drop(columns='CoinName', inplace=True)

In [91]:
# Create dummy variables for text features

X = pd.get_dummies(crypto_df, columns=['Algorithm', 'ProofType'])
X.head()

Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
42,41.99995,42,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
404,1055185000.0,532000000,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1337,29279420000.0,314159265359,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
BTC,17927180.0,21000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ETH,107684200.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [92]:
# Standardize data

X_scaled = StandardScaler().fit_transform(X)
X_scaled

array([[-0.11710817, -0.1528703 , -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [-0.09396955, -0.145009  , -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [ 0.52494561,  4.48942416, -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       ...,
       [-0.09561336, -0.13217937, -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [-0.11694817, -0.15255998, -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ],
       [-0.11710536, -0.15285552, -0.0433963 , ..., -0.0433963 ,
        -0.0433963 , -0.0433963 ]])

In [95]:
# Reduce Dimensions Using PCA
# Use PCA to reduce dimensions to 3 principal components

pca = PCA(n_components=3)
crypto_pca = pca.fit_transform(X_scaled)

In [96]:
# Create a DataFrame with the principal components data

pc_df = pd.DataFrame(
    data=crypto_pca, columns=["PC 1", "PC 2", "PC 3"],
    index= crypto_df.index
)
pc_df.head()

Unnamed: 0,PC 1,PC 2,PC 3
42,-0.342554,1.085036,-0.585707
404,-0.325895,1.085204,-0.586057
1337,2.32352,1.583003,-0.619356
BTC,-0.139089,-1.342335,0.205263
ETH,-0.158408,-2.036048,0.434968


In [97]:
# Clustering Crytocurrencies Using K-Means
# Find Best Value for `k` Using the Elbow Curve

inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(pc_df)
    inertia.append(km.inertia_)

# Create the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")

In [98]:
# Running K-Means with `k=<your best value for k here>`
# Initialize the K-Means model

model = KMeans(n_clusters=4, random_state=0)

# Fit the model
model.fit(pc_df)

# Predict clusters
predictions = model.predict(pc_df)

# Create a new DataFrame including predicted clusters and cryptocurrencies features
clustered_df=pd.DataFrame({
    "Algorithm": crypto_df.Algorithm,
    "ProofType": crypto_df.ProofType,
    "TotalCoinsMined": crypto_df.TotalCoinsMined,
    "TotalCoinSupply": crypto_df.TotalCoinSupply,
    "PC1": pc_df["PC 1"],
    "PC2": pc_df["PC 2"],
    "PC3": pc_df["PC 3"],
    "CoinName": coinName,
    "Class": model.labels_,
    },index= crypto_df.index)
clustered_df.head(10)


Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC1,PC2,PC3,CoinName,Class
42,Scrypt,PoW/PoS,41.99995,42,-0.342554,1.085036,-0.585707,42 Coin,3
404,Scrypt,PoW/PoS,1055185000.0,532000000,-0.325895,1.085204,-0.586057,404Coin,3
1337,X13,PoW/PoS,29279420000.0,314159265359,2.32352,1.583003,-0.619356,EliteCoin,3
BTC,SHA-256,PoW,17927180.0,21000000,-0.139089,-1.342335,0.205263,Bitcoin,0
ETH,Ethash,PoW,107684200.0,0,-0.158408,-2.036048,0.434968,Ethereum,0
LTC,Scrypt,PoW,63039240.0,84000000,-0.177738,-1.05286,-0.026567,Litecoin,0
DASH,X11,PoW/PoS,9031294.0,22000000,-0.387074,1.140701,-0.497748,Dash,3
XMR,CryptoNight-V7,PoW,17201140.0,0,-0.151067,-2.20226,0.47092,Monero,0
ETC,Ethash,PoW,113359700.0,210000000,-0.15685,-2.036149,0.434947,Ethereum Classic,0
ZEC,Equihash,PoW,7383056.0,21000000,-0.137678,-2.112298,0.296417,ZCash,0


### Visualizing Results

In [99]:
# Visualize Results
# Create Scatter Plot with Tradable Cryptocurrencies
# Scale data to create the scatter plot

clustered_df["TotalCoinSupply"]= clustered_df["TotalCoinSupply"].astype(float)/100000000
clustered_df["TotalCoinsMined"]= clustered_df["TotalCoinsMined"].astype(float)/100000000

In [100]:
# Plot the scatterplot with x="TotalCoinsMined" and y="TotalCoinSupply"

clustered_df.hvplot(
    kind="scatter",
    x= "TotalCoinsMined",
    y= "TotalCoinSupply",
    hover_cols=["CoinName"],
    by= "Class")

In [101]:
# Table of Tradable Cryptocurrencies

columns = ['CoinName', 'Algorithm', 'ProofType', 'TotalCoinSupply', 'TotalCoinsMined', 'Class']
table = clustered_df.hvplot.table(columns)
table

In [102]:
# Print total number of tradable cryptocurrencies

print(f"The total number of tradable cryptocurrencies is {coinName.count()}")

The total number of tradable cryptocurrencies is 532
