In [1]:
import pandas as pd
from path import Path
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

In [2]:
filepath =  Path('./Resources/crypto_data.csv')

In [4]:
df = pd.read_csv(filepath, index_col=0)
df.head(20)

Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0
1337,EliteCoin,X13,True,PoW/PoS,29279420000.0,314159265359
2015,2015 coin,X11,True,PoW/PoS,,0
BTC,Bitcoin,SHA-256,True,PoW,17927180.0,21000000
ETH,Ethereum,Ethash,True,PoW,107684200.0,0
LTC,Litecoin,Scrypt,True,PoW,63039240.0,84000000


In [13]:
# List the algorithms that are being used 
algorithms = set(df.Algorithm.values)
print(algorithms)

{'Exosis', 'Skein', 'Proof-of-Authority', 'Jump Consistent Hash', 'Lyra2RE', 'Shabal256', 'Scrypt', 'VeChainThor Authority', 'IMesh', 'PHI1612', 'VBFT', 'Blake2b', 'Cryptonight-GPU', 'Argon2d', 'Cloverhash', 'ECC 256K1', 'Dagger', 'POS 3.0', 'Blake2S', 'CryptoNight-V7', 'Quark', 'Equihash', 'Leased POS', 'SHA-256', 'Blake', 'X16R', 'CryptoNight Heavy', 'Equihash+Scrypt', 'Ethash', 'Groestl', 'C11', 'QUAIT', 'BLAKE256', 'Lyra2REv2', 'X11GOST', 'M7 POW', 'Proof-of-BibleHash', 'X13', 'SkunkHash v2 Raptor', 'TRC10', '536', '1GB AES Pattern Search', 'X11', 'HybridScryptHash256', 'NIST5', 'SHA-512', 'Tribus', 'X14', 'QuBit', 'SHA3', 'PoS', 'NeoScrypt', 'Ouroboros', 'Lyra2Z', 'X15', 'SkunkHash', 'Counterparty', 'Green Protocol', 'Multiple', 'SHA-256 + Hive', 'XEVAN', 'CryptoNight', 'Time Travel', 'POS 2.0', 'HMQ1725', 'SHA-256D', 'Stanford Folding', 'Keccak', 'Semux BFT consensus', 'DPoS', 'Dagger-Hashimoto'}


In [6]:
df['IsTrading'].value_counts()

True     1144
False     108
Name: IsTrading, dtype: int64

In [7]:
# Selecting the cryptocurrencies that are being traded only
df = df.loc[df['IsTrading'] == True]
df['IsTrading'].value_counts()

True    1144
Name: IsTrading, dtype: int64

In [8]:
# Tutor - delete the istrending column because it's not necessary anymore 
df = df.drop('IsTrading', axis=1)
df.head()

Unnamed: 0,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,42 Coin,Scrypt,PoW/PoS,41.99995,42
365,365Coin,X11,PoW/PoS,,2300000000
404,404Coin,Scrypt,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,PoW,,611000
808,808,SHA-256,PoW/PoS,0.0,0


In [9]:
# Look for the null values and remove from the data 
df = df.dropna(axis=0, how='any')
df.shape

(685, 5)

In [10]:
# Sort the values in TotalCoinsMined to find the negative and the zero values
df.TotalCoinsMined.sort_values()

FIII   -5.917978e+09
LBTC    0.000000e+00
RIPO    0.000000e+00
BASH    0.000000e+00
CSH     0.000000e+00
            ...     
QWC     9.955311e+10
NYC     1.430067e+11
GCN     1.630551e+11
BCN     1.840668e+11
BTT     9.899887e+11
Name: TotalCoinsMined, Length: 685, dtype: float64

In [11]:
# Select the rows with positve totalcoinmined 
df = df[df['TotalCoinsMined'] > 0]
len(df)

532

In [12]:
# Delete the coinname from the og df 
df = df.drop('CoinName', axis=1)
df.head()

Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
42,Scrypt,PoW/PoS,41.99995,42
404,Scrypt,PoW/PoS,1055185000.0,532000000
1337,X13,PoW/PoS,29279420000.0,314159265359
BTC,SHA-256,PoW,17927180.0,21000000
ETH,Ethash,PoW,107684200.0,0
