In [1]:
from sklearn.preprocessing import normalize
#from scipy.cluster.hierarchy import dendrogram, linkage
#from sklearn.cluster import AgglomerativeClustering
#import matplotlib.pyplot as plt
from pathlib import Path
import pandas as pd
from sklearn.decomposition import PCA

In [2]:
#import the data
data_df = pd.read_csv(Path('Resources/crypto_data.csv'))
data_df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


## Data Preparation

- Reduce data to only cryptocurrencies that are being traded
- Drop 'IsTrading', 'Unnamed: 0' and 'CoinName' columns.
- CoinName was dropped because it does not contribute to the final analysis.
- Drop all null values
- reduce data to only mined cryptocurrencies

In [3]:
#REDUCE DATA TO ONLY TRADED CURRENCIES
traded_crypto_df = data_df.loc[data_df['IsTrading']==True,:]

#verify the drop
print(traded_crypto_df['IsTrading'].unique())
traded_crypto_df.head()

[ True]


Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [4]:
#DROP 'IsTrading', 'Unnamed: 0' and 'CoinName' COLUMNS
#CoinName was dropped because it does not contribute to the final analysis.
traded_crypto_df = traded_crypto_df.drop(['IsTrading', 'CoinName','Unnamed: 0'], axis=1)
print(traded_crypto_df.shape)

traded_crypto_df.head()

(1144, 4)


Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,PoW/PoS,41.99995,42
1,X11,PoW/PoS,,2300000000
2,Scrypt,PoW/PoS,1055185000.0,532000000
3,SHA-256,PoW,,611000
4,SHA-256,PoW/PoS,0.0,0


In [5]:
#DROP ALL NULL VALUES
traded_crypto_df = traded_crypto_df.dropna()
print(traded_crypto_df.shape)

traded_crypto_df.head()

(685, 4)


Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,PoW/PoS,41.99995,42.0
2,Scrypt,PoW/PoS,1055185000.0,532000000.0
4,SHA-256,PoW/PoS,0.0,0.0
5,X13,PoW/PoS,29279420000.0,314000000000.0
7,SHA-256,PoW,17927180.0,21000000.0


In [6]:
#REDUCE DATA TO ONLY MINED CRYPTOCURRENCIES
traded_crypto_df = traded_crypto_df.loc[traded_crypto_df['TotalCoinsMined']>0,:]

#verify the changes
over_zero = traded_crypto_df['TotalCoinsMined']>0
print(over_zero.unique())

traded_crypto_df.head()

[ True]


Unnamed: 0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,PoW/PoS,41.99995,42.0
2,Scrypt,PoW/PoS,1055185000.0,532000000.0
5,X13,PoW/PoS,29279420000.0,314000000000.0
7,SHA-256,PoW,17927180.0,21000000.0
8,Ethash,PoW,107684200.0,0.0


In [7]:
# CHECK DATATYPES
traded_crypto_df.dtypes

Algorithm           object
ProofType           object
TotalCoinsMined    float64
TotalCoinSupply     object
dtype: object

In [8]:
#CONVERT 'TotalCoinSupply' TO NUMERIC VALUE
traded_crypto_df['TotalCoinSupply'] = pd.to_numeric(traded_crypto_df['TotalCoinSupply'])
traded_crypto_df.dtypes

Algorithm           object
ProofType           object
TotalCoinsMined    float64
TotalCoinSupply    float64
dtype: object

In [9]:
#CONVERT NON-NUMERICAL VALUES
traded_crypto_df = pd.get_dummies(traded_crypto_df)
traded_crypto_df.head()

Unnamed: 0,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,Algorithm_C11,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
0,41.99995,42.0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,1055185000.0,532000000.0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
5,29279420000.0,314000000000.0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7,17927180.0,21000000.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,107684200.0,0.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Get_Dummies is a Dummy
The get_dummies function created a column for every Algorithm and Proof Type in the data set. I'm not sure why Pandas did this instead of just creating a numerical values for each unique vaule in those data sets. 

## Dimensionality Reduction with PCA

In [11]:
# Initialize PCA model
pca = PCA(n_components=0.9)

# Get two principal components for the data.
crypto_pca = pca.fit_transform(traded_crypto_df)

AttributeError: 'numpy.ndarray' object has no attribute 'head'

In [13]:
# Transform PCA data to a DataFrame
pca_crypto_df = pd.DataFrame(data=crypto_pca)
pca_crypto_df

Unnamed: 0,0,1
0,-1.162742e+10,-5.498665e+08
1,-1.063893e+10,-1.197423e+09
2,2.749096e+11,1.311688e+11
3,-1.160027e+10,-5.548847e+08
4,-1.157356e+10,-6.431096e+08
...,...,...
527,-8.895180e+09,-1.281198e+09
528,-1.140348e+10,-4.377382e+08
529,-9.924644e+09,-6.982048e+08
530,-1.160559e+10,-5.456797e+08


## Did I do this correct?
I'm not sure if this is the correct outcome and I don't know how or why this is the outcome. Help. 