In [1]:
from sklearn.preprocessing import normalize
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import AgglomerativeClustering
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder


In [2]:
import pandas as pd
file = Path('crypto_data.csv')

In [3]:
df = pd.read_csv(file)

In [4]:
df.sample(5)

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
1039,ABS,Absolute Coin,Lyra2REv2,True,PoW/PoS,13332620.0,52500000
579,SEEDS,SeedShares,SHA-256D,True,PoW/PoS,,7996400
753,DNR,Denarius,NIST5,True,PoW/PoS,4171382.0,10000000
571,KURT,Kurrent,X11,True,PoW,61364810.0,228000000
1220,XSB,Extreme Sportsbook,X11,False,PoS,,3000000


In [5]:
df2 = df.drop('IsTrading', axis=1)
df2.sample(5)

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
32,AGS,Aegis,X13,PoS,,0
742,EQT,EquiTrader,Scrypt,PoW,13673410.0,72000000
842,DSR,Desire,NeoScrypt,PoW,,22000000
913,SKULL,Pirate Blocks,X11,PoW/PoS,,240000000
535,RYCN,RoyalCoin 2.0,X13,PoS,,2500124


In [6]:
df2 = df2.dropna()
df2.sample(5)

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
927,LOT,LottoCoin,Scrypt,PoW,14491010000.0,18406979840
687,MUSIC,Musicoin,Ethash,PoW,0.0,454898394
148,OPAL,OpalCoin,X13,PoW/PoS,15156360.0,0
18,XBS,Bitstake,X11,PoW/PoS,0.0,1300000
162,SBC,StableCoin,Scrypt,PoW,24215180.0,250000000


In [7]:
df2 = df2[df2['TotalCoinsMined'] > 0]
df2.sample(5)

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
1204,VOLLAR,Vollar,Equihash+Scrypt,PoW,100000000.0,2100000000
739,USC,Ultimate Secure Cash,SHA-256,PoS,10343110.0,200084200
1125,GALI,Galilel,Quark,PoW/PoS,18265010.0,19035999
800,TER,TerraNovaCoin,Scrypt,PoW/PoS,1140735.0,15733333
1056,EUNO,EUNO,X11,PoW/PoS,30771130.0,50000000


In [8]:
df2 = df2.drop(columns=['Unnamed: 0', 'CoinName'], axis=1)

In [9]:
df2

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,PoW/PoS,4.199995e+01,42
2,Scrypt,PoW/PoS,1.055185e+09,532000000
5,X13,PoW/PoS,2.927942e+10,314159265359
7,SHA-256,PoW,1.792718e+07,21000000
8,Ethash,PoW,1.076842e+08,0
...,...,...,...,...
1242,Scrypt,PoW/PoS,1.493105e+07,250000000
1245,CryptoNight,PoW,9.802226e+08,1400222610
1246,Equihash,PoW,7.296538e+06,21000000
1247,Scrypt,PoS,1.283270e+05,1000000


In [10]:
##Your next step in data preparation is to convert the remaining features with 
##text values, Algorithm and ProofType, into numerical data. 
##To accomplish this task, use Pandas to create dummy variables. 
##Examine the number of rows and columns of your dataset now. How did they change?
df2 = pd.get_dummies(df2, columns=['Algorithm', 'ProofType'])
df2

Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
0,4.199995e+01,42,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,1.055185e+09,532000000,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
5,2.927942e+10,314159265359,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7,1.792718e+07,21000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,1.076842e+08,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1242,1.493105e+07,250000000,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1245,9.802226e+08,1400222610,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1246,7.296538e+06,21000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1247,1.283270e+05,1000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
# Scaling the X data by using StandardScaler()
scaler = StandardScaler()
df2[['TotalCoinSupply', 'TotalCoinsMined']] = scaler.fit_transform(df2[['TotalCoinSupply', 'TotalCoinsMined']])
df2

Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
0,-0.114501,-0.150725,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,-0.090420,-0.142558,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
5,0.553699,4.671859,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7,-0.114092,-0.150403,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,-0.112043,-0.150725,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1242,-0.114160,-0.146887,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1245,-0.092131,-0.129230,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1246,-0.114334,-0.150403,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1247,-0.114498,-0.150710,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
