# Clustering Crypto

In [40]:
# Initial imports
import pandas as pd
#import hvplot.pandas
from path import Path
#import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans


### Deliverable 1: Preprocessing the Data for PCA

In [41]:
# Load the crypto_data.csv dataset.
# YOUR CODE HERE
file_path = "resources/crypto_data.csv"
df_crypto = pd.read_csv(file_path)
df_crypto.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
1,365,365Coin,X11,True,PoW/PoS,,2300000000
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
3,611,SixEleven,SHA-256,True,PoW,,611000
4,808,808,SHA-256,True,PoW/PoS,0.0,0


In [None]:
# Keep all the cryptocurrencies that are being traded.
# YOUR CODE HERE

for trading in df_crypto["IsTrading"]:
    
#if (df_crypto["IsTrading"] == "True"):

    def change_string(trading):
        if trading == 'True':
            return 1
        else:
            return 0
        
        
df_crypto["IsTrading"] = df_crypto["IsTrading"].apply(change_string)
#df_crypto.dropna()
df_crypto.head()

In [None]:
# Keep all the cryptocurrencies that have a working algorithm.
# YOUR CODE HERE
df_crypto = df_crypto.drop(["Algorithm"].isnull())
df_crypto.head()

In [None]:
# Remove the "IsTrading" column. 
# YOUR CODE HERE
df_crypto.drop(columns=["IsTrading"], inplace=True)
df_crypto.head()

In [42]:
# Remove rows that have at least 1 null value.
# YOUR CODE HERE
df_crypto = df_crypto.dropna()
df_crypto.head()

Unnamed: 0.1,Unnamed: 0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
2,404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
4,808,808,SHA-256,True,PoW/PoS,0.0,0
5,1337,EliteCoin,X13,True,PoW/PoS,29279420000.0,314159265359
7,BTC,Bitcoin,SHA-256,True,PoW,17927180.0,21000000


In [None]:
# Keep the rows where coins are mined.
# YOUR CODE HERE
df_crypto = pd.DataFrame(df_crypto.dropna(["TotalCoinsMined"] == "NaN"))
df_crypto.head()

In [43]:
# Create a new DataFrame that holds only the cryptocurrencies names.
# YOUR CODE HERE
df_coinname = pd.DataFrame(df_crypto["CoinName"])
df_coinname.head()

Unnamed: 0,CoinName
0,42 Coin
2,404Coin
4,808
5,EliteCoin
7,Bitcoin


In [44]:
# Drop the 'CoinName' column since it's not going to be used on the clustering algorithm.
# YOUR CODE HERE
df_crypto.drop(columns=["CoinName"], inplace=True)
df_crypto.head()

Unnamed: 0.1,Unnamed: 0,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
0,42,Scrypt,True,PoW/PoS,41.99995,42
2,404,Scrypt,True,PoW/PoS,1055185000.0,532000000
4,808,SHA-256,True,PoW/PoS,0.0,0
5,1337,X13,True,PoW/PoS,29279420000.0,314159265359
7,BTC,SHA-256,True,PoW,17927180.0,21000000


In [None]:
# Use get_dummies() to create variables for text features.
# YOUR CODE HERE
pd.get_dummies(df_crypto["Algorithm"], df_crypto["ProofType"])
df_crypto.head()

In [None]:
# Standardize the data with StandardScaler().
# YOUR CODE HERE
#standardize w StandardScaler
crypto_scaled = StandardScaler().fit_transform(df_crypto)
print(crypto_scaled[0:5])

### Deliverable 2: Reducing Data Dimensions Using PCA

In [None]:
# Using PCA to reduce dimension to three principal components.
# YOUR CODE HERE
pca = PCA(n_components=2)

In [None]:
# Create a DataFrame with the three principal components.
# YOUR CODE HERE
crypto_pca = pca.fit_transform(crypto_scaled)


df_crypto_pca = pd.DataFrame(
     data=crypto_pca, columns=["principal component 1", "principal component 2", "principal component 3"]
)
df_crypto_pca.head()


### Deliverable 3: Clustering Crytocurrencies Using K-Means

#### Finding the Best Value for `k` Using the Elbow Curve

In [None]:
# Create an elbow curve to find the best value for K.
# YOUR CODE HERE


Running K-Means with `k=4`

In [None]:
# Initialize the K-Means model.
# YOUR CODE HERE
model = KMeans(n_clusters=3, random_state=0)

# Fit the model
# YOUR CODE HERE
model.fit(df_crypto_pca)

# Predict clusters
# YOUR CODE HERE
df_crypto_pca["class"] = model.labels_
df_crypto_pca.head()

In [None]:
# Create a new DataFrame including predicted clusters and cryptocurrencies features.
# Concatentate the crypto_df and pcs_df DataFrames on the same columns.
# YOUR CODE HERE
clustered_df = pd.concat(crypto_df, pcs_df) 

#  Add a new column, "CoinName" to the clustered_df DataFrame that holds the names of the cryptocurrencies. 
# YOUR CODE HERE
clustered_df = clustered_df.append(column=["CoinName"])
#  Add a new column, "Class" to the clustered_df DataFrame that holds the predictions.
# YOUR CODE HERE
clustered_df = clustered_df.append(column=["Class"])

# Print the shape of the clustered_df
print(clustered_df.shape)
clustered_df.head(10)

### Deliverable 4: Visualizing Cryptocurrencies Results

#### 3D-Scatter with Clusters

In [None]:
# Creating a 3D-Scatter with the PCA data and the clusters
# YOUR CODE HERE


In [None]:
# Create a table with tradable cryptocurrencies.
# YOUR CODE HERE

In [None]:
# Print the total number of tradable cryptocurrencies.
# YOUR CODE HERE

In [None]:
# Scaling data to create the scatter plot with tradable cryptocurrencies.
# YOUR CODE HERE

In [None]:
# Create a new DataFrame that has the scaled data with the clustered_df DataFrame index.
# YOUR CODE HERE

# Add the "CoinName" column from the clustered_df DataFrame to the new DataFrame.
# YOUR CODE HERE

# Add the "Class" column from the clustered_df DataFrame to the new DataFrame. 
# YOUR CODE HERE

plot_df.head(10)

In [None]:
# Create a hvplot.scatter plot using x="TotalCoinsMined" and y="TotalCoinSupply".
# YOUR CODE HERE
