In [34]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier

In [37]:
crypto_df = pd.read_csv(Path('cryptocurrency-challenge/crypto_data.csv'))

In [11]:
crypto_df

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055184902.04000,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.00000,0
...,...,...,...,...,...,...,...
1243,SERO,Super Zero,Ethash,True,PoW,,1000000000
1244,UOS,UOS,SHA-256,True,DPoI,,1000000000
1245,BDX,Beldex,CryptoNight,True,PoW,980222595.00000,1400222610
1246,ZEN,Horizen,Equihash,True,PoW,7296537.50000,21000000


In [12]:
# Suppress scientific notation
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [13]:
# Filter for currencies that are currently being traded
crypto_df.drop(crypto_df[crypto_df['IsTrading'] == False].index, inplace=True)

In [14]:
crypto_df

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055184902.04000,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.00000,0
...,...,...,...,...,...,...,...
1243,SERO,Super Zero,Ethash,True,PoW,,1000000000
1244,UOS,UOS,SHA-256,True,DPoI,,1000000000
1245,BDX,Beldex,CryptoNight,True,PoW,980222595.00000,1400222610
1246,ZEN,Horizen,Equihash,True,PoW,7296537.50000,21000000


In [15]:
# Drop CoinName and IsTrading columns
crypto_df2 = crypto_df.drop(['IsTrading', 'CoinName'], axis=1)

In [16]:
# Drop all rows that have at least one null value
crypto_df2.dropna()

Unnamed: 0.1,Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,Scrypt,PoW/PoS,41.99995,42
2,404,Scrypt,PoW/PoS,1055184902.04000,532000000
4,808,SHA-256,PoW/PoS,0.00000,0
5,1337,X13,PoW/PoS,29279424622.50270,314159265359
7,BTC,SHA-256,PoW,17927175.00000,21000000
...,...,...,...,...,...
1238,ZEPH,SHA-256,DPoS,1999999995.30560,2000000000
1242,GAP,Scrypt,PoW/PoS,14931046.15466,250000000
1245,BDX,CryptoNight,PoW,980222595.00000,1400222610
1246,ZEN,Equihash,PoW,7296537.50000,21000000


In [29]:
# Filter for cryptocurrencies that have been mined
crypto_df3 = crypto_df2[crypto_df2['TotalCoinsMined'] > 0] 
crypto_df3


Unnamed: 0.1,Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,Scrypt,PoW/PoS,41.99995,42
2,404,Scrypt,PoW/PoS,1055184902.04000,532000000
5,1337,X13,PoW/PoS,29279424622.50270,314159265359
7,BTC,SHA-256,PoW,17927175.00000,21000000
8,ETH,Ethash,PoW,107684222.68650,0
...,...,...,...,...,...
1238,ZEPH,SHA-256,DPoS,1999999995.30560,2000000000
1242,GAP,Scrypt,PoW/PoS,14931046.15466,250000000
1245,BDX,CryptoNight,PoW,980222595.00000,1400222610
1246,ZEN,Equihash,PoW,7296537.50000,21000000


In [27]:
#Convert features columns to numerical
crypto_dummies = pd.get_dummies(crypto_df3, columns = ['Algorithm', 'ProofType'])

In [28]:
crypto_dummies

Unnamed: 0.1,Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
0,42,41.99995,42,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,404,1055184902.04000,532000000,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
5,1337,29279424622.50270,314159265359,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7,BTC,17927175.00000,21000000,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,ETH,107684222.68650,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1238,ZEPH,1999999995.30560,2000000000,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1242,GAP,14931046.15466,250000000,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
1245,BDX,980222595.00000,1400222610,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1246,ZEN,7296537.50000,21000000,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
# Standardize the data
