# Clustering Crypto

In [72]:
# Initial imports
import requests
import pandas as pd
import matplotlib.pyplot as plt
import hvplot.pandas
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

### Fetching Cryptocurrency Data

In [73]:
# Use the following endpoint to fetch json data
url = "https://min-api.cryptocompare.com/data/all/coinlist"
response = requests.get(url).json()

In [74]:
# Create a DataFrame 
# HINT: You will need to use the 'Data' key from the json response, then transpose the DataFrame.

crypto_df = pd.DataFrame(response['Data']).T
crypto_df.head()

Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Description,AssetTokenStatus,...,BlockReward,BlockTime,AssetLaunchDate,MaxSupply,MktCapPenalty,PlatformType,BuiltOn,SmartContractAddress,Difficulty,IsUsedInDefi
42,4321,/coins/42/overview,/media/35650717/42.jpg,1427211129,42,42,42 Coin,42 Coin (42),Everything about 42 coin is 42 - apart from th...,,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,
300,749869,/coins/300/overview,/media/27010595/300.png,1517935016,300,300,300 token,300 token (300),300 token is an ERC20 token. This Token was cr...,,...,0.0,0.0,2017-07-01,-1.0,0.0,token,ETH,0xaec98a708810414878c3bcdf46aad31ded4a4557,,
365,33639,/coins/365/overview,/media/352070/365.png,1480032918,365,365,365Coin,365Coin (365),365Coin is a Proof of Work and Proof of Stake ...,,...,,,,,,,,,,
404,21227,/coins/404/overview,/media/35650851/404-300x300.jpg,1466100361,404,404,404Coin,404Coin (404),404 is a PoW/PoS hybrid cryptocurrency that al...,,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,
433,926547,/coins/433/overview,/media/34836095/433.png,1541597321,433,433,433 Token,433 Token (433),433 Token is a decentralised soccer platform t...,Finished,...,,,,,,,,,,


In [75]:
# Alternatively, use the provided csv file:
file_path = Path("Resources/crypto_data.csv")

# Create a DataFrame
crypto_df = pd.read_csv(file_path, index_col=0)
crypto_df.head(10)

NameError: name 'Path' is not defined

### Data Preprocessing

In [76]:
# Keep only necessary columns:
# 'CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply'

crypto_df=crypto_df[['CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply']]
crypto_df.head(10)

KeyError: "['TotalCoinSupply'] not in index"

In [77]:
# Keep only cryptocurrencies that are trading

crypto_df = crypto_df[crypto_df["IsTrading"] == True]
print(crypto_df.shape)
crypto_df.head(10)

(4554, 30)


Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Description,AssetTokenStatus,...,BlockReward,BlockTime,AssetLaunchDate,MaxSupply,MktCapPenalty,PlatformType,BuiltOn,SmartContractAddress,Difficulty,IsUsedInDefi
42,4321,/coins/42/overview,/media/35650717/42.jpg,1427211129,42,42,42 Coin,42 Coin (42),Everything about 42 coin is 42 - apart from th...,,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,
300,749869,/coins/300/overview,/media/27010595/300.png,1517935016,300,300,300 token,300 token (300),300 token is an ERC20 token. This Token was cr...,,...,0.0,0.0,2017-07-01,-1.0,0.0,token,ETH,0xaec98a708810414878c3bcdf46aad31ded4a4557,,
365,33639,/coins/365/overview,/media/352070/365.png,1480032918,365,365,365Coin,365Coin (365),365Coin is a Proof of Work and Proof of Stake ...,,...,,,,,,,,,,
404,21227,/coins/404/overview,/media/35650851/404-300x300.jpg,1466100361,404,404,404Coin,404Coin (404),404 is a PoW/PoS hybrid cryptocurrency that al...,,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,
611,20909,/coins/611/overview,/media/35650940/611-sixeleven.png,1465914773,611,611,SixEleven,SixEleven (611),"611 is a Namecoin based cryptocurrency, and it...",,...,,,,,,,,,,
808,28223,/coins/808/overview,/media/351513/808.png,1473980395,808,808,808,808 (808),808 is a coin develop for the music community ...,,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,
888,29462,/coins/888/overview,/media/351639/888.png,1475534352,888,888,Octocoin,Octocoin (888),Octocoin is a Proof of Work cryptocurrency. It...,,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,
1337,20824,/coins/1337/overview,/media/35520987/elite.png,1465838687,1337,1337,EliteCoin,EliteCoin (1337),1337 coin was created as an experimental Proof...,,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,
2015,3744,/coins/2015/overview,/media/20180/2015.png,1425316878,2015,2015,2015 coin,2015 coin (2015),Twenty15 Coin was conceived to be an asset bac...,,...,,,,,,,,,,
NXT,1183,/coins/nxt/overview,/media/20627/nxt.png,1417635253,NXT,NXT,Nxt,Nxt (NXT),Nxt is an open-source blockchain platform and...,Finished,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,


In [78]:
# Keep only cryptocurrencies with a working algorithm

crypto_df = crypto_df[crypto_df["Algorithm"] != "N/A"]
print(crypto_df.shape)
crypto_df.head(10)

(1464, 30)


Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Description,AssetTokenStatus,...,BlockReward,BlockTime,AssetLaunchDate,MaxSupply,MktCapPenalty,PlatformType,BuiltOn,SmartContractAddress,Difficulty,IsUsedInDefi
42,4321,/coins/42/overview,/media/35650717/42.jpg,1427211129,42,42,42 Coin,42 Coin (42),Everything about 42 coin is 42 - apart from th...,,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,
365,33639,/coins/365/overview,/media/352070/365.png,1480032918,365,365,365Coin,365Coin (365),365Coin is a Proof of Work and Proof of Stake ...,,...,,,,,,,,,,
404,21227,/coins/404/overview,/media/35650851/404-300x300.jpg,1466100361,404,404,404Coin,404Coin (404),404 is a PoW/PoS hybrid cryptocurrency that al...,,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,
611,20909,/coins/611/overview,/media/35650940/611-sixeleven.png,1465914773,611,611,SixEleven,SixEleven (611),"611 is a Namecoin based cryptocurrency, and it...",,...,,,,,,,,,,
808,28223,/coins/808/overview,/media/351513/808.png,1473980395,808,808,808,808 (808),808 is a coin develop for the music community ...,,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,
1337,20824,/coins/1337/overview,/media/35520987/elite.png,1465838687,1337,1337,EliteCoin,EliteCoin (1337),1337 coin was created as an experimental Proof...,,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,
2015,3744,/coins/2015/overview,/media/20180/2015.png,1425316878,2015,2015,2015 coin,2015 coin (2015),Twenty15 Coin was conceived to be an asset bac...,,...,,,,,,,,,,
NXT,1183,/coins/nxt/overview,/media/20627/nxt.png,1417635253,NXT,NXT,Nxt,Nxt (NXT),Nxt is an open-source blockchain platform and...,Finished,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,
BTCD,4400,/coins/btcd/overview,/media/19630/btcd_1.png,1427711372,BTCD,BTCD,BitcoinDark,BitcoinDark (BTCD),Bitcoin Dark (BTCD) is a PoW and PoS hybrid al...,,...,,,,,,,,,,
CRAIG,4425,/coins/craig/overview,/media/20022/craig.png,1427711632,CRAIG,CRAIG,CraigsCoin,CraigsCoin (CRAIG),CraigCoin (CRAIG) is a 100% pure PoS or Proof ...,,...,,,,,,,,,,


In [79]:
# Remove the "IsTrading" column

crypto_df.drop("IsTrading", axis=1, inplace=True)
print(crypto_df.shape)
crypto_df.head(10)

(1464, 29)


Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Description,AssetTokenStatus,...,BlockReward,BlockTime,AssetLaunchDate,MaxSupply,MktCapPenalty,PlatformType,BuiltOn,SmartContractAddress,Difficulty,IsUsedInDefi
42,4321,/coins/42/overview,/media/35650717/42.jpg,1427211129,42,42,42 Coin,42 Coin (42),Everything about 42 coin is 42 - apart from th...,,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,
365,33639,/coins/365/overview,/media/352070/365.png,1480032918,365,365,365Coin,365Coin (365),365Coin is a Proof of Work and Proof of Stake ...,,...,,,,,,,,,,
404,21227,/coins/404/overview,/media/35650851/404-300x300.jpg,1466100361,404,404,404Coin,404Coin (404),404 is a PoW/PoS hybrid cryptocurrency that al...,,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,
611,20909,/coins/611/overview,/media/35650940/611-sixeleven.png,1465914773,611,611,SixEleven,SixEleven (611),"611 is a Namecoin based cryptocurrency, and it...",,...,,,,,,,,,,
808,28223,/coins/808/overview,/media/351513/808.png,1473980395,808,808,808,808 (808),808 is a coin develop for the music community ...,,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,
1337,20824,/coins/1337/overview,/media/35520987/elite.png,1465838687,1337,1337,EliteCoin,EliteCoin (1337),1337 coin was created as an experimental Proof...,,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,
2015,3744,/coins/2015/overview,/media/20180/2015.png,1425316878,2015,2015,2015 coin,2015 coin (2015),Twenty15 Coin was conceived to be an asset bac...,,...,,,,,,,,,,
NXT,1183,/coins/nxt/overview,/media/20627/nxt.png,1417635253,NXT,NXT,Nxt,Nxt (NXT),Nxt is an open-source blockchain platform and...,Finished,...,0.0,0.0,0000-00-00,0.0,0.0,,,,,
BTCD,4400,/coins/btcd/overview,/media/19630/btcd_1.png,1427711372,BTCD,BTCD,BitcoinDark,BitcoinDark (BTCD),Bitcoin Dark (BTCD) is a PoW and PoS hybrid al...,,...,,,,,,,,,,
CRAIG,4425,/coins/craig/overview,/media/20022/craig.png,1427711632,CRAIG,CRAIG,CraigsCoin,CraigsCoin (CRAIG),CraigCoin (CRAIG) is a 100% pure PoS or Proof ...,,...,,,,,,,,,,


In [80]:
# Remove rows with at least 1 null value

crypto_df = crypto_df.dropna(axis=0, how="any")
print(crypto_df.shape)
crypto_df.head(10)

(0, 29)


Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Description,AssetTokenStatus,...,BlockReward,BlockTime,AssetLaunchDate,MaxSupply,MktCapPenalty,PlatformType,BuiltOn,SmartContractAddress,Difficulty,IsUsedInDefi


In [81]:
# Remove rows with cryptocurrencies having no coins mined

crypto_df = crypto_df[crypto_df["TotalCoinsMined"] > 0]
print(crypto_df.shape)
crypto_df.head(10)

(0, 29)


Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Description,AssetTokenStatus,...,BlockReward,BlockTime,AssetLaunchDate,MaxSupply,MktCapPenalty,PlatformType,BuiltOn,SmartContractAddress,Difficulty,IsUsedInDefi


In [82]:
# Drop rows where there are 'N/A' text values

crypto_df = crypto_df[crypto_df.iloc[:] != 'N/A'].dropna()
crypto_df.head(10)

Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,CoinName,FullName,Description,AssetTokenStatus,...,BlockReward,BlockTime,AssetLaunchDate,MaxSupply,MktCapPenalty,PlatformType,BuiltOn,SmartContractAddress,Difficulty,IsUsedInDefi


In [83]:
# Store the 'CoinName'column in its own DataFrame prior to dropping it from crypto_df
coins_name = pd.DataFrame(crypto_df["CoinName"], index=crypto_df.index)
print(coins_name.shape)
coins_name.head()

(0, 1)


Unnamed: 0,CoinName


In [84]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm
crypto_df = crypto_df.drop("CoinName", axis=1)
print(crypto_df.shape)
crypto_df.head(10)

(0, 28)


Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,FullName,Description,AssetTokenStatus,Algorithm,...,BlockReward,BlockTime,AssetLaunchDate,MaxSupply,MktCapPenalty,PlatformType,BuiltOn,SmartContractAddress,Difficulty,IsUsedInDefi


In [85]:
# Create dummy variables for text features
X = pd.get_dummies(data=crypto_df, columns=["Algorithm", "ProofType"])
print(X.shape)
X.head(10)

(0, 26)


Unnamed: 0,Id,Url,ImageUrl,ContentCreatedOn,Name,Symbol,FullName,Description,AssetTokenStatus,SortOrder,...,BlockReward,BlockTime,AssetLaunchDate,MaxSupply,MktCapPenalty,PlatformType,BuiltOn,SmartContractAddress,Difficulty,IsUsedInDefi


In [86]:
# Standardize data
X = StandardScaler().fit_transform(X)
X[:5]

ValueError: Found array with 0 sample(s) (shape=(0, 26)) while a minimum of 1 is required by StandardScaler.

### Reducing Dimensions Using PCA

In [87]:
# Use PCA to reduce dimensions to 3 principal components
n_comp = 3
pca = PCA(n_components=n_comp)
principal_components = pca.fit_transform(X)
principal_components

ValueError: Found array with 0 sample(s) (shape=(0, 26)) while a minimum of 1 is required.

In [88]:
# Create a DataFrame with the principal components data
col_names = [f"PC {i}" for i in range(1, n_comp + 1)]
pcs_df = pd.DataFrame(principal_components, columns=col_names, index=crypto_df.index)
print(pcs_df.shape)
pcs_df.head(10)

NameError: name 'principal_components' is not defined

### Clustering Crytocurrencies Using K-Means

#### Find the Best Value for `k` Using the Elbow Curve

In [89]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(pcs_df)
    inertia.append(km.inertia_)

# Create the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")

NameError: name 'pcs_df' is not defined

Running K-Means with `k=<your best value for k here>`

In [90]:
# Initialize the K-Means model
model = KMeans(n_clusters=4, random_state=0)

# Fit the model
model.fit(pcs_df)

# Predict clusters
predictions = model.predict(pcs_df)

# Create a new DataFrame including predicted clusters and cryptocurrencies features
clustered_df = pd.concat([crypto_df, pcs_df], axis=1, sort=False)
clustered_df["CoinName"] = coins_name["CoinName"]
clustered_df["Class"] = model.labels_
print(clustered_df.shape)
clustered_df.head(10)

NameError: name 'pcs_df' is not defined

### Visualizing Results

#### 3D-Scatter with Clusters

In [91]:
# Create a 3D-Scatter with the PCA data and the clusters
fig = px.scatter_3d(
    clustered_df,
    x="PC 1",
    y="PC 2",
    z="PC 3",
    color="Class",
    symbol="Class",
    hover_name="CoinName",
    hover_data=["Algorithm"],
    width=800,
)
fig.update_layout(legend=dict(x=0, y=1))
fig.show()

NameError: name 'clustered_df' is not defined

#### Table of Tradable Cryptocurrencies

In [92]:
# Table with tradable cryptos
clustered_df[
    [
        "CoinName",
        "Algorithm",
        "ProofType",
        "TotalCoinSupply",
        "TotalCoinsMined",
        "Class",
    ]
].hvplot.table()

NameError: name 'clustered_df' is not defined

In [93]:
# Print the total number of tradable cryptocurrencies
print(f"There are {clustered_df.shape[0]} tradable cryptocurrencies.")

NameError: name 'clustered_df' is not defined

#### Scatter Plot with Tradable Cryptocurrencies

In [94]:
# Scale data to create the scatter plot
mm_scaler = MinMaxScaler()
plot_data = mm_scaler.fit_transform(
    clustered_df[["TotalCoinSupply", "TotalCoinsMined"]]
)
plot_df = pd.DataFrame(
    plot_data, columns=["TotalCoinSupply", "TotalCoinsMined"], index=clustered_df.index
)
plot_df["CoinName"] = clustered_df["CoinName"]
plot_df["Class"] = clustered_df["Class"]
plot_df.head()

NameError: name 'clustered_df' is not defined

In [95]:
# Plot the scatter with x="TotalCoinsMined" and y="TotalCoinSupply"
plot_df.hvplot.scatter(
    x="TotalCoinsMined", y="TotalCoinSupply", hover_cols=["CoinName"], by="Class"
)

NameError: name 'plot_df' is not defined