# Clustering Crypto

In [42]:
# Initial imports
import altair as alt
from vega_datasets import data

import requests
import pandas as pd
#import matplotlib.pyplot as plt
#import hvplot.pandas
#import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import json

### Fetching Cryptocurrency Data

In [43]:
# Use the following endpoint to fetch json data
url = "https://min-api.cryptocompare.com/data/all/coinlist"
crypto_data=requests.get(url).json()

In [44]:
# Create a DataFrame 
# HINT: You will need to use the 'Data' key from the json response, then transpose the DataFrame.
crypto_url_df=pd.DataFrame(crypto_data["Data"]).T
crypto_url_df.head()


Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Description,AssetTokenStatus,...,MaxSupply,MktCapPenalty,IsUsedInDefi,IsUsedInNft,PlatformType,AlgorithmType,Difficulty,BuiltOn,SmartContractAddress,DecimalPoints
42,4321,/coins/42/overview,/media/35650717/42.jpg,1427211129,42,42,42 Coin,42 Coin (42),Everything about 42 coin is 42 - apart from th...,,...,42.0,0.0,0.0,0.0,blockchain,scrypt,2.367098,,,
300,749869,/coins/300/overview,/media/27010595/300.png,1517935016,300,300,300 token,300 token (300),300 token is an ERC20 token. This Token was cr...,,...,300.0,0.0,0.0,0.0,token,,,ETH,0xaec98a708810414878c3bcdf46aad31ded4a4557,18.0
365,33639,/coins/365/overview,/media/352070/365.png,1480032918,365,365,365Coin,365Coin (365),365Coin is a Proof of Work and Proof of Stake ...,,...,-1.0,0.0,0.0,0.0,blockchain,,,,,
404,21227,/coins/404/overview,/media/35650851/404-300x300.jpg,1466100361,404,404,404Coin,404Coin (404),404 is a PoW/PoS hybrid cryptocurrency that al...,,...,-1.0,0.0,0.0,0.0,blockchain,,,,,
433,926547,/coins/433/overview,/media/34836095/433.png,1541597321,433,433,433 Token,433 Token (433),433 Token is a decentralised soccer platform t...,Finished,...,,,,,,,,,,


In [45]:
# Alternatively, use the provided csv file:
# file_path = Path("Resources/crypto_data.csv")

# Create a DataFrame


### Data Preprocessing

In [46]:
# Keep only necessary columns:
# 'CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply'

crypto_df = crypto_url_df[['CoinName', 'Algorithm', 'IsTrading', 'ProofType',
       'TotalCoinsMined', 'MaxSupply']]

In [47]:
# Keep only cryptocurrencies that are trading
is_Trding = crypto_df['IsTrading']==True
crypto_trading = crypto_df[is_Trding]
crypto_trading.head()


Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,True,PoW/PoS,41.999952,42
300,300 token,,True,,300.0,300
365,365Coin,X11,True,PoW/PoS,0.0,-1
404,404Coin,Scrypt,True,PoW/PoS,0.0,-1
611,SixEleven,SHA-256,True,PoW,0.0,0


In [48]:
# Keep only cryptocurrencies with a working algorithm
[i for i in crypto_df.Algorithm.unique() if i.startswith("N")]

['N/A', 'NIST5', 'NeoScrypt', 'NEP-5', 'NRC20 Token']

In [49]:
crypto_df = crypto_df[crypto_df['Algorithm'] != 'N/A']
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,True,PoW/PoS,41.999952,42
365,365Coin,X11,True,PoW/PoS,0.0,-1
404,404Coin,Scrypt,True,PoW/PoS,0.0,-1
611,SixEleven,SHA-256,True,PoW,0.0,0
808,808,SHA-256,True,PoW/PoS,0.0,0


In [50]:
# Remove the 'IsTrading' column
crypto_df=crypto_df.drop(columns="IsTrading")
crypto_df.head()


Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,PoW/PoS,41.999952,42
365,365Coin,X11,PoW/PoS,0.0,-1
404,404Coin,Scrypt,PoW/PoS,0.0,-1
611,SixEleven,SHA-256,PoW,0.0,0
808,808,SHA-256,PoW/PoS,0.0,0


In [51]:
# Remove rows with at least 1 null value
crypto_df.dropna(inplace=True,how="any",axis=0)
crypto_df.isnull().sum()


CoinName           0
Algorithm          0
ProofType          0
TotalCoinsMined    0
MaxSupply          0
dtype: int64

In [52]:
# Remove rows with cryptocurrencies having no coins mined
crypto_df = crypto_df[crypto_df['TotalCoinsMined']!=0]
crypto_df.info()


<class 'pandas.core.frame.DataFrame'>
Index: 343 entries, 42 to KMD
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   CoinName         343 non-null    object
 1   Algorithm        343 non-null    object
 2   ProofType        343 non-null    object
 3   TotalCoinsMined  343 non-null    object
 4   MaxSupply        343 non-null    object
dtypes: object(5)
memory usage: 16.1+ KB


In [53]:
# Drop rows where there are 'N/A' text values
crypto_df=crypto_df[crypto_df.iloc[:] != "N/A"]
crypto_df.head()


Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,MaxSupply
42,42 Coin,Scrypt,PoW/PoS,41.999952,42
NSR,NuShares,PoS,PoS,6173410297.8311,0
TRI,Triangles Coin,X13,PoW/PoS,191620.842403,0
CMTC,CometCoin,Scrypt,PoW,872830.0,0
CHAT,OpenChat,Scrypt,PoW/PoS,1000000000.0,-1


In [54]:
# Store the 'CoinName'column in its own DataFrame prior to dropping it from crypto_df 
coin_name=pd.DataFrame(crypto_df["CoinName"], index = crypto_df.index)
coin_name.head()


Unnamed: 0,CoinName
42,42 Coin
NSR,NuShares
TRI,Triangles Coin
CMTC,CometCoin
CHAT,OpenChat


In [55]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm
crypto_df=crypto_df.drop(columns="CoinName")

In [56]:
crypto_df.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,MaxSupply
42,Scrypt,PoW/PoS,41.999952,42
NSR,PoS,PoS,6173410297.8311,0
TRI,X13,PoW/PoS,191620.842403,0
CMTC,Scrypt,PoW,872830.0,0
CHAT,Scrypt,PoW/PoS,1000000000.0,-1


In [59]:
# Create dummy variables for text features
features_df=crypto_df.copy()
X = pd.get_dummies(features_df)
X.head()


Unnamed: 0,Algorithm_Autolykos,Algorithm_BEP-2,Algorithm_BEP-2 Token,Algorithm_BEP-20 Token,Algorithm_BEP2 Token,Algorithm_BLAKE256,Algorithm_BMW512 / Echo512,Algorithm_Blake2B + SHA3,Algorithm_Blake2b,Algorithm_C31,...,MaxSupply_16555000000.0,MaxSupply_21000000000.0,MaxSupply_40000000000.0,MaxSupply_45000000000.0,MaxSupply_70000000000.0,MaxSupply_86712634466.0,MaxSupply_100000000000.0,MaxSupply_184470000000.0,MaxSupply_1000016730264.435,MaxSupply_21000000000000.0
42,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
NSR,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
TRI,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CMTC,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CHAT,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [60]:
# Standardize data
scaler=StandardScaler().fit(X)
X_scaled=scaler.transform(X)
X_scaled

array([[-0.05407381, -0.09393364, -0.05407381, ..., -0.05407381,
        -0.05407381, -0.05407381],
       [-0.05407381, -0.09393364, -0.05407381, ..., -0.05407381,
        -0.05407381, -0.05407381],
       [-0.05407381, -0.09393364, -0.05407381, ..., -0.05407381,
        -0.05407381, -0.05407381],
       ...,
       [-0.05407381, -0.09393364, -0.05407381, ..., -0.05407381,
        -0.05407381, -0.05407381],
       [-0.05407381, -0.09393364, -0.05407381, ..., -0.05407381,
        -0.05407381, -0.05407381],
       [-0.05407381, -0.09393364, -0.05407381, ..., -0.05407381,
        -0.05407381, -0.05407381]])

### Reducing Dimensions Using PCA

In [61]:
# Use PCA to reduce dimensions to 3 principal components
pca = PCA(n_components=3)
X_pca = pca.fit_transform(X_scaled)
print(X_pca)


[[ 2.19479464 -0.50533864 -0.22509385]
 [ 2.25696085 -0.44726825 -0.03619437]
 [ 2.20269983 -0.47656403 -0.16014468]
 ...
 [-0.33078064 -0.02968848 -0.00720316]
 [-0.8404262   0.09869862 -0.03845541]
 [ 5.45586751 -1.36654923 -0.69066315]]


In [62]:
# Create a DataFrame with the principal components data
pcs_df=pd.DataFrame(data=X_pca,columns=["PC 1", "PC 2","PC 3"],index=crypto_df.index)
pcs_df.head()


Unnamed: 0,PC 1,PC 2,PC 3
42,2.194795,-0.505339,-0.225094
NSR,2.256961,-0.447268,-0.036194
TRI,2.2027,-0.476564,-0.160145
CMTC,3.07559,-0.742433,-0.360982
CHAT,-0.01488,-0.133216,-0.169996


### Clustering Crytocurrencies Using K-Means

#### Finding the Best Value for `k` Using the Elbow Curve

In [None]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values
for i in k:


# Create the Elbow Curve using hvPlot



Running K-Means with `k=<your best value for k here>`

In [None]:
# Initialize the K-Means model

# Fit the model

# Predict clusters

# Create a new DataFrame including predicted clusters and cryptocurrencies features



### Visualizing Results

#### 3D-Scatter with Clusters

In [None]:
# Create a 3D-Scatter with the PCA data and the clusters



#### Table of Tradable Cryptocurrencies

In [None]:
# Table with tradable cryptos



In [None]:
# Print the total number of tradable cryptocurrencies

#### Scatter Plot with Tradable Cryptocurrencies

In [None]:
# Scale data to create the scatter plot



In [None]:
# Plot the scatter with x="TotalCoinsMined" and y="TotalCoinSupply"

