In [12]:
# Import dependencies
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import StandardScaler

In [2]:
#Read crypto_data.csv into Pandas
file_path = Path("Data/crypto_data.csv")
crypto_df = pd.read_csv(file_path)
crypto_df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [3]:
# Name unnamed column
crypto_df.columns
crypto_df.columns = ['CoinAbbr', 'CoinName', 'Algorithm', 'IsTrading', 'ProofType',
       'TotalCoinsMined', 'TotalCoinSupply']
crypto_df.head()

Unnamed: 0,CoinAbbr,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [4]:
# Check dataframe shape before dropping rows
crypto_df.shape

(1252, 7)

In [5]:
# Check data types
crypto_df.dtypes

CoinAbbr            object
CoinName            object
Algorithm           object
IsTrading             bool
ProofType           object
TotalCoinsMined    float64
TotalCoinSupply     object
dtype: object

In [6]:
# Discard all cryptocurrencies that are not being traded. 
Trading_Crypto_df = crypto_df[(crypto_df['IsTrading']==True)]
 
# Drop the IsTrading column from the dataframe.
Trading_Crypto_df = Trading_Crypto_df.drop(['IsTrading'], axis=1)

In [7]:
# Remove all rows that have at least one null value.
Trading_Crypto_df.dropna()

Unnamed: 0,CoinAbbr,CoinName,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,PoW/PoS,4.199995e+01,42
2,404,404Coin,Scrypt,PoW/PoS,1.055185e+09,532000000
4,808,808,SHA-256,PoW/PoS,0.000000e+00,0
5,1337,EliteCoin,X13,PoW/PoS,2.927942e+10,314159265359
7,BTC,Bitcoin,SHA-256,PoW,1.792718e+07,21000000
...,...,...,...,...,...,...
1238,ZEPH,ZEPHYR,SHA-256,DPoS,2.000000e+09,2000000000
1242,GAP,Gapcoin,Scrypt,PoW/PoS,1.493105e+07,250000000
1245,BDX,Beldex,CryptoNight,PoW,9.802226e+08,1400222610
1246,ZEN,Horizen,Equihash,PoW,7.296538e+06,21000000


In [8]:
# Filter for cryptocurrencies that have been mined (total coins mined > 0)
Mined_Crypto_df = Trading_Crypto_df.loc[Trading_Crypto_df['TotalCoinsMined'] > 0]
Mined_Crypto_df.shape

(532, 6)

In [9]:
# Delete the CoinName from the original dataframe
Mined_Crypto_df = Mined_Crypto_df.drop(['CoinName'], axis=1)

In [10]:
# Delete the CoinAbbr from the original dataframe
Mined_Crypto_df = Mined_Crypto_df.drop(['CoinAbbr'], axis=1)

In [14]:
# Convert Algorithm and ProffType column into numerical data
# Number of rows did not change, number of columns increased as a new column was created for each category
Cleaned_crypto = pd.get_dummies(Mined_Crypto_df, columns=['Algorithm', 'ProofType'], drop_first=True)

In [16]:
# Check new dataframe
Cleaned_crypto.head()

Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,Algorithm_Cloverhash,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
0,41.99995,42,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,1055185000.0,532000000,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
5,29279420000.0,314159265359,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7,17927180.0,21000000,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,107684200.0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
# Standardize data for modeling 
scaler = StandardScaler()
scaled_data = scaler.fit_transform(Cleaned_crypto)