In [1]:
from sklearn.preprocessing import normalize
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import AgglomerativeClustering
import matplotlib.pyplot as plt
from pathlib import Path
import pandas as pd

In [2]:
#import the data
data_df = pd.read_csv(Path('Resources/crypto_data.csv'))
data_df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


## Data Preparation

- Reduce data to only cryptocurrencies that are being traded
- Drop 'IsTrading', 'Unnamed: 0' and 'CoinName' columns.
- CoinName was dropped because it does not contribute to the final analysis.
- Drop all null values
- reduce data to only mined cryptocurrencies

In [3]:
#REDUCE DATA TO ONLY TRADED CURRENCIES
traded_crypto_df = data_df.loc[data_df['IsTrading']==True,:]

#verify the drop
print(traded_crypto_df['IsTrading'].unique())
traded_crypto_df.head()

[ True]


Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [4]:
#DROP 'IsTrading', 'Unnamed: 0' and 'CoinName' COLUMNS
#CoinName was dropped because it does not contribute to the final analysis.
traded_crypto_df = traded_crypto_df.drop(['IsTrading', 'CoinName','Unnamed: 0'], axis=1)
print(traded_crypto_df.shape)

traded_crypto_df.head()

(1144, 4)


Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,PoW/PoS,41.99995,42
1,X11,PoW/PoS,,2300000000
2,Scrypt,PoW/PoS,1055185000.0,532000000
3,SHA-256,PoW,,611000
4,SHA-256,PoW/PoS,0.0,0


In [8]:
#DROP ALL NULL VALUES
traded_crypto_df = traded_crypto_df.dropna()
print(traded_crypto_df.shape)

traded_crypto_df.head()

(532, 4)


Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,PoW/PoS,41.99995,42.0
2,Scrypt,PoW/PoS,1055185000.0,532000000.0
5,X13,PoW/PoS,29279420000.0,314000000000.0
7,SHA-256,PoW,17927180.0,21000000.0
8,Ethash,PoW,107684200.0,0.0


In [6]:
#REDUCE DATA TO ONLY MINED CRYPTOCURRENCIES
traded_crypto_df = traded_crypto_df.loc[traded_crypto_df['TotalCoinsMined']>0,:]

#verify the changes
over_zero = traded_crypto_df['TotalCoinsMined']>0
print(over_zero.unique())

traded_crypto_df.head()

[ True]


Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,PoW/PoS,41.99995,42.0
2,Scrypt,PoW/PoS,1055185000.0,532000000.0
5,X13,PoW/PoS,29279420000.0,314000000000.0
7,SHA-256,PoW,17927180.0,21000000.0
8,Ethash,PoW,107684200.0,0.0


In [7]:
#CONVERT NON-NUMERICAL VALUES
