# Import Dependencies and Data

In [14]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [5]:
crypto_df = pd.read_csv("Resources/crypto_data.csv")

# Data Preprocessing

In [6]:
# Drop unused columns
crypto_df = crypto_df.drop(["Unnamed: 0", "CoinName"], axis=1)
print(crypto_df.count())
crypto_df.head()

Algorithm          1252
IsTrading          1252
ProofType          1252
TotalCoinsMined     744
TotalCoinSupply    1252
dtype: int64


Unnamed: 0,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,True,PoW/PoS,41.99995,42
1,X11,True,PoW/PoS,,2300000000
2,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,SHA-256,True,PoW,,611000
4,SHA-256,True,PoW/PoS,0.0,0


In [7]:
# Drop rows not currently being traded then drop the column
trading_df = crypto_df.loc[crypto_df["IsTrading"] == True, :]
trading_df = trading_df.drop(["IsTrading"], axis=1)
print(trading_df.count())
trading_df.head()

Algorithm          1144
ProofType          1144
TotalCoinsMined     685
TotalCoinSupply    1144
dtype: int64


Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,PoW/PoS,41.99995,42
1,X11,PoW/PoS,,2300000000
2,Scrypt,PoW/PoS,1055185000.0,532000000
3,SHA-256,PoW,,611000
4,SHA-256,PoW/PoS,0.0,0


In [8]:
# Drop rows with null values
no_nulls_df = trading_df.dropna(how="any")
print(no_nulls_df.count())
no_nulls_df.head()

Algorithm          685
ProofType          685
TotalCoinsMined    685
TotalCoinSupply    685
dtype: int64


Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,PoW/PoS,41.99995,42
2,Scrypt,PoW/PoS,1055185000.0,532000000
4,SHA-256,PoW/PoS,0.0,0
5,X13,PoW/PoS,29279420000.0,314159265359
7,SHA-256,PoW,17927180.0,21000000


In [10]:
# Filter for total coins mined greater than 0
mined_df = no_nulls_df.loc[no_nulls_df["TotalCoinsMined"] > 0, :]
print(mined_df.count())
mined_df.head()

Algorithm          532
ProofType          532
TotalCoinsMined    532
TotalCoinSupply    532
dtype: int64


Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,PoW/PoS,41.99995,42
2,Scrypt,PoW/PoS,1055185000.0,532000000
5,X13,PoW/PoS,29279420000.0,314159265359
7,SHA-256,PoW,17927180.0,21000000
8,Ethash,PoW,107684200.0,0


In [11]:
# Convert column from object to numeric
mined_df["TotalCoinSupply"] = pd.to_numeric(mined_df["TotalCoinSupply"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mined_df["TotalCoinSupply"] = pd.to_numeric(mined_df["TotalCoinSupply"])


In [13]:
data = pd.get_dummies(mined_df)
print(data.count())
data.head()

TotalCoinsMined                     532
TotalCoinSupply                     532
Algorithm_1GB AES Pattern Search    532
Algorithm_536                       532
Algorithm_Argon2d                   532
                                   ... 
ProofType_Proof of Authority        532
ProofType_Proof of Trust            532
ProofType_TPoS                      532
ProofType_Zero-Knowledge Proof      532
ProofType_dPoW/PoW                  532
Length: 98, dtype: int64


Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
0,41.99995,42.0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,1055185000.0,532000000.0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
5,29279420000.0,314159300000.0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7,17927180.0,21000000.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,107684200.0,0.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
# Scale data
scaler = StandardScaler().fit(data)
data_scaled = scaler.transform(data)