# Clustering Crypto

In [3]:
# Initial imports
import requests
import pandas as pd
# import hvplot.pandas
!pip install -U altair
import altair as alt
from path import Path
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler



### Fetching Cryptocurrency Data

In [4]:
file_path = Path("Resources/crypto_data.csv")
# Note: File path for AWS is below
# file_path = Path("crypto_data.csv")
df = pd.read_csv(file_path)
df.head(10)

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0
5,1337,EliteCoin,X13,True,PoW/PoS,29279420000.0,314159265359
6,2015,2015 coin,X11,True,PoW/PoS,,0
7,BTC,Bitcoin,SHA-256,True,PoW,17927180.0,21000000
8,ETH,Ethereum,Ethash,True,PoW,107684200.0,0
9,LTC,Litecoin,Scrypt,True,PoW,63039240.0,84000000


### THE API DOESN'T GIVE YOU 'TOTAL COIN SUPPLY', SO USING THE CSV INSTEAD

In [5]:
# # Use the following endpoint to fetch json data
# url = "https://min-api.cryptocompare.com/data/all/coinlist"
# response = requests.get(url).json()

In [6]:
# # Create a DataFrame 
# # HINT: You will need to use the 'Data' key from the json response, then transpose the DataFrame.
# df = pd.DataFrame(response['Data']).T
# df.head()

In [7]:
# Alternatively, use the provided csv file:
# file_path = Path("Resources/crypto_data.csv")

# Create a DataFrame

### Data Preprocessing

In [8]:
# Keep only necessary columns:
# 'CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply'

# USING CSV
keep_columns = ['CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined','TotalCoinSupply']

# USING API
# keep_columns = ['CoinName','Algorithm','IsTrading','ProofType','TotalCoinsMined']


crypto_df = pd.DataFrame()

for i in keep_columns:
    crypto_df[f'{i}'] = df[f'{i}']

crypto_df.head()

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365Coin,X11,True,PoW/PoS,,2300000000
2,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,SixEleven,SHA-256,True,PoW,,611000
4,808,SHA-256,True,PoW/PoS,0.0,0


In [9]:
crypto_df.shape

(1252, 6)

In [10]:
# Keep only cryptocurrencies that are trading
crypto_df = crypto_df[crypto_df.IsTrading != False]
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365Coin,X11,True,PoW/PoS,,2300000000
2,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,SixEleven,SHA-256,True,PoW,,611000
4,808,SHA-256,True,PoW/PoS,0.0,0


In [11]:
crypto_df.shape

(1144, 6)

In [12]:
# Keep only cryptocurrencies with a working algorithm
crypto_df = crypto_df[crypto_df['Algorithm']!= "N/A"]
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365Coin,X11,True,PoW/PoS,,2300000000
2,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,SixEleven,SHA-256,True,PoW,,611000
4,808,SHA-256,True,PoW/PoS,0.0,0


In [13]:
crypto_df.shape

(1144, 6)

In [14]:
# Remove the "IsTrading" column
crypto_df = crypto_df.drop(["IsTrading"], axis=1)
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42 Coin,Scrypt,PoW/PoS,41.99995,42
1,365Coin,X11,PoW/PoS,,2300000000
2,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
3,SixEleven,SHA-256,PoW,,611000
4,808,SHA-256,PoW/PoS,0.0,0


In [15]:
# Remove rows with at least 1 null value
crypto_df.shape

(1144, 5)

In [16]:
for column in crypto_df.columns:
    print(f"Column {column} has {crypto_df[column].isnull().sum()} null values")

Column CoinName has 0 null values
Column Algorithm has 0 null values
Column ProofType has 0 null values
Column TotalCoinsMined has 459 null values
Column TotalCoinSupply has 0 null values


In [17]:
crypto_df.dropna(inplace=True)
crypto_df.shape

(685, 5)

In [18]:
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42 Coin,Scrypt,PoW/PoS,41.99995,42
2,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
4,808,SHA-256,PoW/PoS,0.0,0
5,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
7,Bitcoin,SHA-256,PoW,17927180.0,21000000


In [19]:
# Remove rows with cryptocurrencies having no coins mined
crypto_df = crypto_df[crypto_df.TotalCoinsMined > 0]
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42 Coin,Scrypt,PoW/PoS,41.99995,42
2,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
5,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
7,Bitcoin,SHA-256,PoW,17927180.0,21000000
8,Ethereum,Ethash,PoW,107684200.0,0


In [20]:
crypto_df.shape

(532, 5)

In [21]:
# Drop rows where there are 'N/A' text values
crypto_df = crypto_df[crypto_df.iloc[:] != 'N/A'].dropna()

crypto_df.head()

  result = method(y)


Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42 Coin,Scrypt,PoW/PoS,41.99995,42
2,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
5,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
7,Bitcoin,SHA-256,PoW,17927180.0,21000000
8,Ethereum,Ethash,PoW,107684200.0,0


In [22]:
crypto_df.shape

(532, 5)

In [23]:
#reset the crypto_df index just for ease later on
crypto_df = crypto_df.reset_index()
# crypto_df.set_index(crypto_df['CoinName'], inplace=True)
crypto_df = crypto_df.drop(columns=["index"])
crypto_df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42 Coin,Scrypt,PoW/PoS,41.99995,42
1,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
2,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
3,Bitcoin,SHA-256,PoW,17927180.0,21000000
4,Ethereum,Ethash,PoW,107684200.0,0


In [24]:
# Save this dataframe for later table use
coins_list_df = crypto_df.copy()
coins_list_df.set_index(coins_list_df['CoinName'], inplace=True)
coins_list_df = coins_list_df.drop(columns=["CoinName"])
coins_list_df.head()

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42 Coin,Scrypt,PoW/PoS,41.99995,42
404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
Bitcoin,SHA-256,PoW,17927180.0,21000000
Ethereum,Ethash,PoW,107684200.0,0


In [25]:
# Store the 'CoinName'column in its own DataFrame prior to dropping it from crypto_df
CoinNameDf = crypto_df[['CoinName']]
CoinNameDf.head()

Unnamed: 0,CoinName
0,42 Coin
1,404Coin
2,EliteCoin
3,Bitcoin
4,Ethereum


In [26]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm
crypto_df = crypto_df.drop(columns=["CoinName"])
crypto_df.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,PoW/PoS,41.99995,42
1,Scrypt,PoW/PoS,1055185000.0,532000000
2,X13,PoW/PoS,29279420000.0,314159265359
3,SHA-256,PoW,17927180.0,21000000
4,Ethash,PoW,107684200.0,0


In [27]:
# Create dummy variables for text features
crypto_df_encoded = pd.get_dummies(crypto_df, columns=["Algorithm", "ProofType"])
crypto_df_encoded.head()

Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
0,41.99995,42,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,1055185000.0,532000000,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,29279420000.0,314159265359,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
3,17927180.0,21000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,107684200.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [28]:
# Standardize data
from sklearn.preprocessing import StandardScaler

# Creating the scaler instance
data_scaler = StandardScaler()

# Fitting the scaler
data_scaler.fit(crypto_df_encoded)

# Transforming the data
crypto_data_scaled = data_scaler.transform(crypto_df_encoded)
crypto_data_scaled[0][0:5]

array([-0.11710817, -0.1528703 , -0.0433963 , -0.0433963 , -0.0433963 ])

### Reducing Dimensions Using PCA

In [29]:
# Use PCA to reduce dimensions to 3 principal components

# Initialize PCA model
pca = PCA(n_components=3)

# Get two principal components for the iris data.
crypto_pca = pca.fit_transform(crypto_data_scaled)
crypto_pca

array([[-0.33033063,  1.00581353, -0.57099885],
       [-0.313672  ,  1.00649362, -0.57146161],
       [ 2.30926531,  1.74251967, -0.67340192],
       ...,
       [ 0.32698172, -2.29900342,  0.40380232],
       [-0.16031203, -1.93579189,  0.50017677],
       [-0.28290149,  0.77116608, -0.26860982]])

# How to retain the component name below?

In [30]:
# Create a DataFrame with the principal components data
df_crypto_pca = pd.DataFrame(
    data=crypto_pca, columns=["principal component 1", "principal component 2", "principal component 3"]
)
df_crypto_pca.head()

Unnamed: 0,principal component 1,principal component 2,principal component 3
0,-0.330331,1.005814,-0.570999
1,-0.313672,1.006494,-0.571462
2,2.309265,1.74252,-0.673402
3,-0.146507,-1.305472,0.194867
4,-0.149122,-2.102588,0.337756


In [31]:
# CoinNameDf.reset_index(drop=True, inplace=True)
# df_crypto_pca.reset_index(drop=True,inplace=True)

df_crypto_pca = pd.concat([df_crypto_pca, CoinNameDf], axis=1)
df_crypto_pca['CoinName'] = CoinNameDf['CoinName']
df_crypto_pca.head()

Unnamed: 0,principal component 1,principal component 2,principal component 3,CoinName
0,-0.330331,1.005814,-0.570999,42 Coin
1,-0.313672,1.006494,-0.571462,404Coin
2,2.309265,1.74252,-0.673402,EliteCoin
3,-0.146507,-1.305472,0.194867,Bitcoin
4,-0.149122,-2.102588,0.337756,Ethereum


In [32]:
df_crypto_pca = df_crypto_pca.set_index(df_crypto_pca['CoinName'])
df_crypto_pca = df_crypto_pca.drop(columns=["CoinName"])
df_crypto_pca.head()

Unnamed: 0_level_0,principal component 1,principal component 2,principal component 3
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
42 Coin,-0.330331,1.005814,-0.570999
404Coin,-0.313672,1.006494,-0.571462
EliteCoin,2.309265,1.74252,-0.673402
Bitcoin,-0.146507,-1.305472,0.194867
Ethereum,-0.149122,-2.102588,0.337756


In [33]:
print(f" the explained variance ratio is: {pca.explained_variance_ratio_}")

 the explained variance ratio is: [0.02793112 0.02137531 0.0205025 ]


In [34]:
pca.explained_variance_ratio_.sum()

print(f" the explained variance sum is: {round(pca.explained_variance_ratio_.sum(),4)}")

 the explained variance sum is: 0.0698


### Clustering Crytocurrencies Using K-Means

#### Find the Best Value for `k` Using the Elbow Curve

In [35]:
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(df_crypto_pca)
    inertia.append(km.inertia_)




In [36]:
# Create the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
# df_elbow.hvplot.line(x="k", y="inertia", xticks=k, title="Elbow Curve")

#### HVPLOT CODE ^^^^
# ULTAIR CODE BELOW
df_elbow.k.values



array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [37]:
df_elbow

Unnamed: 0,k,inertia
0,1,3639.557978
1,2,2471.885174
2,3,1486.240607
3,4,554.100992
4,5,345.981385
5,6,256.872898
6,7,194.365541
7,8,161.615544
8,9,129.399003
9,10,105.232878


In [38]:
# Create the Elbow Curve using Altair
# x = df_elbow.k.values
# source = df_elbow

alt.Chart(df_elbow).mark_line().encode(x="k", y="inertia")

Running K-Means with `k=4`

In [39]:
# Initialize the K-Means model
kmeansModel = KMeans(n_clusters=4, random_state=0)

# Fit the model
kmeansModel.fit(crypto_pca)

# Predict clusters
predictions = kmeansModel.predict(crypto_pca)
predictions



array([0, 0, 0, 3, 3, 3, 0, 3, 3, 3, 0, 3, 0, 0, 3, 0, 3, 3, 0, 0, 3, 3,
       3, 3, 3, 0, 3, 3, 3, 0, 3, 0, 3, 3, 0, 0, 3, 3, 3, 3, 3, 3, 0, 0,
       3, 3, 3, 3, 3, 0, 0, 3, 0, 3, 3, 3, 3, 0, 3, 3, 0, 3, 0, 0, 0, 3,
       3, 3, 0, 0, 0, 0, 0, 3, 3, 3, 0, 0, 3, 0, 3, 0, 0, 3, 3, 3, 3, 0,
       0, 3, 0, 3, 3, 0, 0, 3, 0, 0, 3, 3, 0, 0, 3, 0, 0, 3, 0, 3, 0, 3,
       0, 3, 0, 0, 3, 3, 0, 3, 3, 3, 0, 3, 3, 3, 3, 3, 0, 0, 3, 3, 3, 0,
       3, 0, 3, 3, 0, 3, 0, 3, 0, 0, 3, 3, 0, 3, 3, 0, 0, 3, 0, 3, 0, 0,
       0, 3, 3, 3, 3, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0,
       0, 3, 0, 3, 0, 0, 3, 0, 3, 0, 0, 3, 0, 3, 0, 3, 0, 3, 0, 0, 0, 0,
       3, 0, 0, 0, 0, 0, 3, 3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0,
       0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 3, 3, 3, 0, 0, 0, 0, 3, 0, 3, 0,
       0, 3, 0, 3, 3, 0, 3, 3, 0, 3, 0, 0, 0, 3, 0, 0, 3, 0, 0, 0, 0, 0,
       0, 0, 3, 0, 3, 0, 0, 0, 0, 3, 0, 3, 0, 3, 3, 3, 3, 0, 3, 0, 0, 3,
       0, 3, 3, 3, 0, 3, 0, 3, 3, 3, 0, 3, 0, 3, 0,

In [40]:
# Create a new DataFrame including predicted clusters and cryptocurrencies features

df_crypto_pca["class"] = predictions
df_crypto_pca.head(10)

# crypto_df.reset_index(drop=True, inplace=True)
# CoinNameDf.reset_index(drop=True,inplace=True)

# df_crypto_pca = pd.concat([df_crypto_pca, crypto_df], axis=1)
# df_crypto_pca['CoinName'] = CoinNameDf['CoinName']
# df_crypto_pca.head()

Unnamed: 0_level_0,principal component 1,principal component 2,principal component 3,class
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42 Coin,-0.330331,1.005814,-0.570999,0
404Coin,-0.313672,1.006494,-0.571462,0
EliteCoin,2.309265,1.74252,-0.673402,0
Bitcoin,-0.146507,-1.305472,0.194867,3
Ethereum,-0.149122,-2.102588,0.337756,3
Litecoin,-0.160883,-1.166096,-0.021221,3
Dash,-0.405959,1.316837,-0.516569,0
Monero,-0.149771,-2.237511,0.388588,3
Ethereum Classic,-0.147564,-2.102635,0.337728,3
ZCash,-0.160311,-1.935792,0.500177,3


### Visualizing Results

#### 3D-Scatter with Clusters

In [41]:
hvplotdf = df_crypto_pca

# reset the index so that you can hover over each point on the actual graph using hover_name='CoinName'
hvplotdf = hvplotdf.reset_index()
hvplotdf.head()

Unnamed: 0,CoinName,principal component 1,principal component 2,principal component 3,class
0,42 Coin,-0.330331,1.005814,-0.570999,0
1,404Coin,-0.313672,1.006494,-0.571462,0
2,EliteCoin,2.309265,1.74252,-0.673402,0
3,Bitcoin,-0.146507,-1.305472,0.194867,3
4,Ethereum,-0.149122,-2.102588,0.337756,3


In [42]:
df_new = pd.concat([hvplotdf, crypto_df], axis=1)
df_new.head()

Unnamed: 0,CoinName,principal component 1,principal component 2,principal component 3,class,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42 Coin,-0.330331,1.005814,-0.570999,0,Scrypt,PoW/PoS,41.99995,42
1,404Coin,-0.313672,1.006494,-0.571462,0,Scrypt,PoW/PoS,1055185000.0,532000000
2,EliteCoin,2.309265,1.74252,-0.673402,0,X13,PoW/PoS,29279420000.0,314159265359
3,Bitcoin,-0.146507,-1.305472,0.194867,3,SHA-256,PoW,17927180.0,21000000
4,Ethereum,-0.149122,-2.102588,0.337756,3,Ethash,PoW,107684200.0,0


In [43]:
# Create a 3D-Scatter with the PCA data and the clusters
# fig = px.scatter_3d(
#     hvplotdf,
#     x='principal component 1',
#     y='principal component 2',
#     z='principal component 3',
#     color='class',
#     hover_name='CoinName'
# )
# fig.update_layout()
# fig.show()


alt.Chart(df_new).mark_circle(size=60).encode(
    x='principal component 1',
    y='principal component 2',
    color='class',
    tooltip=['CoinName', 'Algorithm', 'TotalCoinsMined', 'TotalCoinSupply']
).interactive()


#### Table of Tradable Cryptocurrencies

In [44]:
df_crypto_pca.head()

Unnamed: 0_level_0,principal component 1,principal component 2,principal component 3,class
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42 Coin,-0.330331,1.005814,-0.570999,0
404Coin,-0.313672,1.006494,-0.571462,0
EliteCoin,2.309265,1.74252,-0.673402,0
Bitcoin,-0.146507,-1.305472,0.194867,3
Ethereum,-0.149122,-2.102588,0.337756,3


In [45]:
coins_list_df.head()

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42 Coin,Scrypt,PoW/PoS,41.99995,42
404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
Bitcoin,SHA-256,PoW,17927180.0,21000000
Ethereum,Ethash,PoW,107684200.0,0


In [46]:
# Table with tradable cryptos
combined_crypto_table = pd.concat([coins_list_df, df_crypto_pca], axis=1)
# display(df_crypto_pca)
# combined_crypto_table.head()

In [47]:
display(combined_crypto_table)

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,principal component 1,principal component 2,principal component 3,class
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
42 Coin,Scrypt,PoW/PoS,4.199995e+01,42,-0.330331,1.005814,-0.570999,0
404Coin,Scrypt,PoW/PoS,1.055185e+09,532000000,-0.313672,1.006494,-0.571462,0
EliteCoin,X13,PoW/PoS,2.927942e+10,314159265359,2.309265,1.742520,-0.673402,0
Bitcoin,SHA-256,PoW,1.792718e+07,21000000,-0.146507,-1.305472,0.194867,3
Ethereum,Ethash,PoW,1.076842e+08,0,-0.149122,-2.102588,0.337756,3
Litecoin,Scrypt,PoW,6.303924e+07,84000000,-0.160883,-1.166096,-0.021221,3
Dash,X11,PoW/PoS,9.031294e+06,22000000,-0.405959,1.316837,-0.516569,0
Monero,CryptoNight-V7,PoW,1.720114e+07,0,-0.149771,-2.237511,0.388588,3
Ethereum Classic,Ethash,PoW,1.133597e+08,210000000,-0.147564,-2.102635,0.337728,3
ZCash,Equihash,PoW,7.383056e+06,21000000,-0.160311,-1.935792,0.500177,3


In [48]:
# Print the total number of tradable cryptocurrencies
print (f"The total number of tradeable cryptocurrencies is {combined_crypto_table.shape[0]}")

The total number of tradeable cryptocurrencies is 532


#### Scatter Plot with Tradable Cryptocurrencies

In [49]:
# Scale data to create the scatter plot
scaler = MinMaxScaler()

data = scaler.fit_transform(
    combined_crypto_table[['TotalCoinsMined','TotalCoinSupply']]
)

data

array([[0.00000000e+00, 4.20000000e-11],
       [1.06585544e-03, 5.32000000e-04],
       [2.95755135e-02, 3.14159265e-01],
       ...,
       [9.90135079e-04, 1.40022261e-03],
       [7.37028150e-06, 2.10000000e-05],
       [1.29582282e-07, 1.00000000e-06]])

In [50]:
coins_scaled_df = pd.DataFrame(data, columns=['TotalCoinMined','TotalCoinSupply'])

coins_scaled_df = coins_scaled_df.rename(columns={
    "TotalCoinMined": "TotalCoinMinedScaled",
    "TotalCoinSupply": "TotalCoinSupplyScaled"
})

coins_scaled_df.head()

Unnamed: 0,TotalCoinMinedScaled,TotalCoinSupplyScaled
0,0.0,4.2e-11
1,0.001066,0.000532
2,0.029576,0.3141593
3,1.8e-05,2.1e-05
4,0.000109,0.0


In [51]:
CoinNameDf.head()

Unnamed: 0,CoinName
0,42 Coin
1,404Coin
2,EliteCoin
3,Bitcoin
4,Ethereum


In [52]:
coins_hv_df = pd.concat([coins_scaled_df, CoinNameDf], axis=1)
coins_hv_df.set_index(coins_hv_df['CoinName'], inplace=True)
coins_hv_df["Class"] = combined_crypto_table["class"]
coins_hv_df = coins_hv_df.drop(columns=['CoinName'])
coins_hv_df.head()

Unnamed: 0_level_0,TotalCoinMinedScaled,TotalCoinSupplyScaled,Class
CoinName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
42 Coin,0.0,4.2e-11,0
404Coin,0.001066,0.000532,0
EliteCoin,0.029576,0.3141593,0
Bitcoin,1.8e-05,2.1e-05,3
Ethereum,0.000109,0.0,3


In [53]:
alt.Chart(coins_hv_df).mark_circle(size=60).encode(
    x='TotalCoinMinedScaled',
    y='TotalCoinSupplyScaled',
    color='Class',
    tooltip=['TotalCoinMinedScaled', 'TotalCoinSupplyScaled', 'Class']
).interactive()