In [263]:
# Dependencies
import warnings
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import AgglomerativeClustering
import pandas as pd
import numpy as np
warnings.simplefilter(action='ignore', category=FutureWarning)
import matplotlib.pyplot as plt

In [264]:
# Read in crypto data csv
df = pd.read_csv('crypto_data.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [265]:
# Check # of rows and columns
df.shape

(1252, 7)

In [266]:
# Get indexes where IsTrading is False
indexNames = df[df['IsTrading'] == False].index

# Delete False rows from dataframe
df.drop(indexNames, inplace=True)


In [267]:
# Recheck # of rows and columns to make sure False values dropped
df.shape

(1144, 7)

In [268]:
# Drop IsTrading column
df = df.drop(columns='IsTrading')


In [269]:
# Preview dataframe
df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,41.99995,42
1,365,365Coin,X11,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,PoW,,611000
4,808,808,SHA-256,PoW/PoS,0.0,0


In [270]:
# Drop rows with null values
df = df.dropna(how='any', axis=0)

In [271]:
# Any drops?
df.shape

(685, 6)

In [272]:
# Filter for cryptocurrencies that have been mined
df = df.loc[df['TotalCoinsMined'] !=0]


In [273]:
# Any drops?
df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,41.99995,42
2,404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
5,1337,EliteCoin,X13,PoW/PoS,29279420000.0,314159265359
7,BTC,Bitcoin,SHA-256,PoW,17927180.0,21000000
8,ETH,Ethereum,Ethash,PoW,107684200.0,0


In [274]:
# Drop CoinName column
df = df.drop(columns='CoinName')

In [275]:
# Preview dataframe
df.head()

Unnamed: 0.1,Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,Scrypt,PoW/PoS,41.99995,42
2,404,Scrypt,PoW/PoS,1055185000.0,532000000
5,1337,X13,PoW/PoS,29279420000.0,314159265359
7,BTC,SHA-256,PoW,17927180.0,21000000
8,ETH,Ethash,PoW,107684200.0,0


In [276]:
# Drop Unnamed: 0 column
df = df.drop(columns='Unnamed: 0')

In [277]:
df.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,PoW/PoS,41.99995,42
2,Scrypt,PoW/PoS,1055185000.0,532000000
5,X13,PoW/PoS,29279420000.0,314159265359
7,SHA-256,PoW,17927180.0,21000000
8,Ethash,PoW,107684200.0,0


In [278]:
# Convert Algorithm and ProofType to numerical data by creating dummy variables
dummy_df = pd.get_dummies(df, columns=['Algorithm', 'ProofType'])
dummy_df.head()

Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
0,41.99995,42,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,1055185000.0,532000000,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
5,29279420000.0,314159265359,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7,17927180.0,21000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,107684200.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [279]:
# Check data types
dummy_df.dtypes

TotalCoinsMined                     float64
TotalCoinSupply                      object
Algorithm_1GB AES Pattern Search      uint8
Algorithm_536                         uint8
Algorithm_Argon2d                     uint8
                                     ...   
ProofType_Proof of Authority          uint8
ProofType_Proof of Trust              uint8
ProofType_TPoS                        uint8
ProofType_Zero-Knowledge Proof        uint8
ProofType_dPoW/PoW                    uint8
Length: 100, dtype: object

In [280]:
# Convert object data types to float and verify
dummy_df['TotalCoinSupply'] = pd.to_numeric(dummy_df.TotalCoinSupply, errors='coerce')
dummy_df.dtypes


TotalCoinsMined                     float64
TotalCoinSupply                     float64
Algorithm_1GB AES Pattern Search      uint8
Algorithm_536                         uint8
Algorithm_Argon2d                     uint8
                                     ...   
ProofType_Proof of Authority          uint8
ProofType_Proof of Trust              uint8
ProofType_TPoS                        uint8
ProofType_Zero-Knowledge Proof        uint8
ProofType_dPoW/PoW                    uint8
Length: 100, dtype: object

In [298]:
# Standardize data by scaling
scaler = StandardScaler()
scaled_data = scaler.fit_transform(dummy_df[['TotalCoinsMined', 'TotalCoinSupply']])


In [304]:
# Create new df from scaled data
scaled_df = pd.DataFrame(scaled_data, columns=dummy_df.columns[0:2])
scaled_df.head()

Unnamed: 0,TotalCoinsMined,TotalCoinSupply
0,-0.116748,-0.152865
1,-0.093589,-0.144996
2,0.525872,4.493764
3,-0.116354,-0.152554
4,-0.114384,-0.152865


In [305]:
# Add scaled columns to dummy_df
dummy_df['TotalCoinsMined'] = scaled_df['TotalCoinsMined']
dummy_df['TotalCoinSupply'] = scaled_df['TotalCoinSupply']
dummy_df.head()


Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
0,-0.116748,-0.152865,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,0.525872,4.493764,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
5,-0.115364,-0.151622,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7,-0.11637,-0.152865,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,-0.11426,-0.149759,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
