In [62]:
# imports
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import plotly.express as px
import hvplot.pandas

In [63]:
# load data
file_path = "Resources/crypto_data.csv"
crypto_df = pd.read_csv(file_path)
crypto_df.head()


Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [64]:
# remove all cryptocurrencies that aren't trading
trading_crypto_df = crypto_df.loc[crypto_df['IsTrading'] == True]
trading_crypto_df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [65]:
# confirm the removal
trading_crypto_df['IsTrading'].unique()

array([ True])

In [66]:
# see what algorithms are present
list(trading_crypto_df['Algorithm'].unique())

['Scrypt',
 'X11',
 'SHA-256',
 'X13',
 'Ethash',
 'CryptoNight-V7',
 'Equihash',
 'SHA-512',
 'Multiple',
 'X15',
 'NIST5',
 'Quark',
 'Groestl',
 'PoS',
 'NeoScrypt',
 'SHA3',
 'HybridScryptHash256',
 'Scrypt-n',
 'PHI1612',
 'Lyra2REv2',
 'CryptoNight',
 'Shabal256',
 'Counterparty',
 'Blake',
 'Momentum',
 'Stanford Folding',
 'QuBit',
 'XG Hash',
 'M7 POW',
 'Curve25519',
 'Lyra2RE',
 'QUAIT',
 'vDPOS',
 'Blake2b',
 'BLAKE256',
 '1GB AES Pattern Search',
 'Dagger',
 'CryptoNight-Lite',
 'X11GOST',
 'SHA-256D',
 'POS 3.0',
 'Progressive-n',
 'DPoS',
 'Lyra2Z',
 'X14',
 'Time Travel',
 'Argon2',
 'Keccak',
 'Blake2S',
 'Dagger-Hashimoto',
 '536',
 'Argon2d',
 'Cloverhash',
 'Skein',
 'SkunkHash v2 Raptor',
 'VeChainThor Authority',
 'Ouroboros',
 'POS 2.0',
 'SkunkHash',
 'C11',
 'Proof-of-BibleHash',
 'SHA-256 + Hive',
 'Proof-of-Authority',
 'XEVAN',
 'VBFT',
 'YescryptR16',
 'IMesh',
 'Green Protocol',
 'Semux BFT consensus',
 'X16R',
 'Tribus',
 'CryptoNight Heavy',
 'Jump Consi

In [67]:
# see if there's null values under "Algorithm"
trading_crypto_df['Algorithm'].isnull().sum()

# note: no entries have undefined algorithm

0

In [68]:
df = trading_crypto_df.drop(columns=['IsTrading'], axis=1)
df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,41.99995,42
1,365,365Coin,X11,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,PoW,,611000
4,808,808,SHA-256,PoW/PoS,0.0,0


In [69]:
# fine out how many null values in df
df.isnull().sum()

Unnamed: 0           0
CoinName             0
Algorithm            0
ProofType            0
TotalCoinsMined    459
TotalCoinSupply      0
dtype: int64