### Dependencies

In [15]:
from sklearn.preprocessing import normalize
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import AgglomerativeClustering
import matplotlib.pyplot as plt
from pathlib import Path
import pandas as pd

### Data Preparation

In [2]:
# Read in CSV
file = Path('crypto_data.csv')
df = pd.read_csv(file)

In [3]:
# Preview Data
df.sample(10)

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
7,BTC,Bitcoin,SHA-256,True,PoW,17927180.0,21000000
1176,PIGGY,Piggy Coin,X11,True,PoW/PoS,494240700.0,1000000000
831,CSTL,Castle,Quark,True,DPoS,0.0,50000000
1092,LTZ,Litecoinz,Equihash,True,PoW,,84000000
593,XCI,Cannabis Industry Coin,CryptoNight,True,PoW,978145.0,21000000
400,EXIT,ExitCoin,Scrypt,True,PoW/PoS,,756000000
245,SMAC,Social Media Coin,X11,True,PoW/PoS,,0
1130,CSPN,Crypto Sports,Quark,True,PoS,1934702.0,13370000
785,FRAZ,FrazCoin,Scrypt,True,PoW,9704042.0,20000000
1113,BBTC,BlakeBitcoin,Blake,False,PoW,20595480.0,21000000


In [4]:
# Filter DataFrame by currency that is traded
crypto_df = df[df["IsTrading"] == True]
crypto_df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [5]:
# Count Null Values by column
crypto_df.isnull().sum(axis=0)

Unnamed: 0           0
CoinName             0
Algorithm            0
IsTrading            0
ProofType            0
TotalCoinsMined    459
TotalCoinSupply      0
dtype: int64

In [6]:
# Remove all rows that have at least one null value
crypto_df = crypto_df.dropna(how="any", axis=0)

In [7]:
# Count Null Values by column
crypto_df.isnull().sum(axis=0)

Unnamed: 0         0
CoinName           0
Algorithm          0
IsTrading          0
ProofType          0
TotalCoinsMined    0
TotalCoinSupply    0
dtype: int64

In [8]:
# Filter for cryptocurrencies that have been mined. That is, the total coins mined should be greater than zero.
crypto_df = df[df["TotalCoinsMined"] > 0]
crypto_df.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
5,1337,EliteCoin,X13,True,PoW/PoS,29279420000.0,314159265359
7,BTC,Bitcoin,SHA-256,True,PoW,17927180.0,21000000
8,ETH,Ethereum,Ethash,True,PoW,107684200.0,0


In [9]:
# View Data types
crypto_df.dtypes

Unnamed: 0          object
CoinName            object
Algorithm           object
IsTrading             bool
ProofType           object
TotalCoinsMined    float64
TotalCoinSupply     object
dtype: object

In [10]:
# Usingn algorithm seems to be an interesting basis to use, so we need to see what values are included.
crypto_df["Algorithm"].nunique()

73

In [11]:
# Since the coin names do not contribute to the analysis of the data, delete the `CoinName` and `Unnamed`.
crypto_df = crypto_df.drop("CoinName", 1)
crypto_df = crypto_df.drop("Unnamed: 0", 1)
crypto_df.head()

Unnamed: 0,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,True,PoW/PoS,41.99995,42
2,Scrypt,True,PoW/PoS,1055185000.0,532000000
5,X13,True,PoW/PoS,29279420000.0,314159265359
7,SHA-256,True,PoW,17927180.0,21000000
8,Ethash,True,PoW,107684200.0,0


In [12]:
# Transform Previous IsTrading Column
crypto_df.replace({False: 0, True: 1}, inplace=True)

crypto_df.head()

Unnamed: 0,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,Scrypt,1,PoW/PoS,41.99995,42
2,Scrypt,1,PoW/PoS,1055185000.0,532000000
5,X13,1,PoW/PoS,29279420000.0,314159265359
7,SHA-256,1,PoW,17927180.0,21000000
8,Ethash,1,PoW,107684200.0,0


In [13]:
# In order for your dataset to be comprehensible to a machine learning algorithm, its data should be numeric.
crypto_x = pd.get_dummies(data=crypto_df, columns=["Algorithm", "ProofType"])
crypto_x.head()

Unnamed: 0,IsTrading,TotalCoinsMined,TotalCoinSupply,Algorithm_1GB AES Pattern Search,Algorithm_536,Algorithm_Argon2d,Algorithm_BLAKE256,Algorithm_Blake,Algorithm_Blake2S,Algorithm_Blake2b,...,ProofType_PoW/PoS,ProofType_PoW/PoS.1,ProofType_PoW/PoW,ProofType_PoW/nPoS,ProofType_Pos,ProofType_Proof of Authority,ProofType_Proof of Trust,ProofType_TPoS,ProofType_Zero-Knowledge Proof,ProofType_dPoW/PoW
0,1,41.99995,42,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,1,1055185000.0,532000000,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
5,1,29279420000.0,314159265359,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
7,1,17927180.0,21000000,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,1,107684200.0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
# Convert the remaining features with text values, `Algorithm` and `ProofType`, into numerical data.
scaler = StandardScaler()
crypto_scaled = scaler.fit_transform(crypto_x)

In [17]:
# Check that the data was scaled
crypto_scaled[0]

array([ 0.29083753, -0.11450078, -0.15072489, -0.04166667, -0.04166667,
       -0.04166667, -0.05897678, -0.0934947 , -0.04166667, -0.05897678,
       -0.05897678, -0.04166667, -0.04166667, -0.18949048, -0.05897678,
       -0.0934947 , -0.04166667, -0.11081833, -0.0722944 , -0.04166667,
       -0.04166667, -0.1518211 , -0.04166667, -0.13280318, -0.04166667,
       -0.04166667, -0.0835512 , -0.05897678, -0.04166667, -0.04166667,
       -0.04166667, -0.05897678, -0.04166667, -0.0835512 , -0.0934947 ,
       -0.10250796, -0.04166667, -0.1258772 , -0.13280318, -0.1518211 ,
       -0.04166667, -0.0835512 , -0.04166667, -0.04166667, -0.0722944 ,
       -0.17423301, -0.04166667, -0.04166667, -0.04166667, -0.0722944 ,
       -0.16888013, -0.30802055, -0.04166667, -0.0934947 , -0.0934947 ,
       -0.05897678,  1.39963365, -0.04166667, -0.04166667, -0.04166667,
       -0.0835512 , -0.04166667, -0.04166667, -0.04166667, -0.04166667,
       -0.04166667, -0.05897678, -0.04166667, -0.04166667, -0.39

### Dimensionality Reduction

### Cluster Analysis with k-Means

### Recommendation

In [None]:
# Normalize the data
normalized = normalize(df)

In [None]:
# Create a new dataframe with the normalized data
df2 = pd.DataFrame(normalized)

In [None]:
df2.head()

In [None]:
# Copy column names over to new data frame
df2.columns = df.columns

In [None]:
df2.head()

In [None]:
# Perform hierarchical clustering
mergings = linkage(normalized, method='ward')

In [None]:
mergings[:5]

In [None]:
# Generate a dendrogram
plt.figure(figsize=(12,8))

dendrogram(mergings,
          leaf_rotation=90,
          leaf_font_size=5)

plt.show()

In [None]:
# Generate cluster labels with sklearn.cluster's AgglomerativeClustering
cluster = AgglomerativeClustering(n_clusters=2,
                                 affinity='euclidean',
                                 linkage='ward')
labels = cluster.fit_predict(df2)

In [None]:
# Create a scatter plot with two of the features
plt.scatter(df2['Grocery'], df2['Fresh'], c=labels)
plt.show()

In [None]:
plt.scatter(df2['Grocery'], df2['Milk'], c=labels)
plt.show()