In [34]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import hvplot.pandas
import plotly.express as px

In [35]:
# Loading data
file_path = "crypto_data.csv"
crypto_df = pd.read_csv(file_path)
crypto_df = crypto_df.set_index(['Unnamed: 0'])
crypto_df.head()

Unnamed: 0_level_0,CoinName,Algorithm,IsTrading,ProofType,TotalCoinsMined,TotalCoinSupply
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
42,42 Coin,Scrypt,True,PoW/PoS,41.99995,42
365,365Coin,X11,True,PoW/PoS,,2300000000
404,404Coin,Scrypt,True,PoW/PoS,1055185000.0,532000000
611,SixEleven,SHA-256,True,PoW,,611000
808,808,SHA-256,True,PoW/PoS,0.0,0


In [36]:
#Remove all crypto that isnt trading
crypto_df = crypto_df.loc[crypto_df.IsTrading, :]

In [37]:
#Remove all crypto withput an algo defined
crypto_df = crypto_df.dropna(subset=['Algorithm'])

In [38]:
#Remove IsTrading Columns
crypto_df = crypto_df.drop(['IsTrading'], axis=1)

In [39]:
#Remove all rows with at least one element missing
crypto_df = crypto_df.dropna()

In [40]:
crypto_df['TotalCoinSupply'] = crypto_df['TotalCoinSupply'].astype(float)

In [41]:
#Remove all currencies without coins mined
crypto_df = crypto_df.loc[crypto_df.TotalCoinSupply>0, :]

In [42]:
#Create df with just coin names
coins_name = pd.DataFrame(crypto_df['CoinName'])
coins_name = coins_name.set_index(crypto_df.index)
coins_name.head()

Unnamed: 0_level_0,CoinName
Unnamed: 0,Unnamed: 1_level_1
42,42 Coin
404,404Coin
1337,EliteCoin
BTC,Bitcoin
LTC,Litecoin


In [43]:
#Drop CoinName
crypto_df = crypto_df.drop(['CoinName'], axis=1)
crypto_df2 = crypto_df.copy()
crypto_df2.head()

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42,Scrypt,PoW/PoS,41.99995,42.0
404,Scrypt,PoW/PoS,1055185000.0,532000000.0
1337,X13,PoW/PoS,29279420000.0,314159300000.0
BTC,SHA-256,PoW,17927180.0,21000000.0
LTC,Scrypt,PoW,63039240.0,84000000.0


In [44]:
crypto_df.head()

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42,Scrypt,PoW/PoS,41.99995,42.0
404,Scrypt,PoW/PoS,1055185000.0,532000000.0
1337,X13,PoW/PoS,29279420000.0,314159300000.0
BTC,SHA-256,PoW,17927180.0,21000000.0
LTC,Scrypt,PoW,63039240.0,84000000.0


In [45]:
crypto_df.dtypes

Algorithm           object
ProofType           object
TotalCoinsMined    float64
TotalCoinSupply    float64
dtype: object

In [46]:
#Get dummies for text
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
crypto_df['Algorithm'] = le.fit_transform(crypto_df['Algorithm']) 
crypto_df['ProofType'] = le.fit_transform(crypto_df['ProofType']) 
crypto_df.head()

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42,57,15,41.99995,42.0
404,57,15,1055185000.0,532000000.0
1337,72,15,29279420000.0,314159300000.0
BTC,51,12,17927180.0,21000000.0
LTC,57,12,63039240.0,84000000.0


PCA

In [47]:
# Standardize data with StandardScaler
crypto_scaled = StandardScaler().fit_transform(crypto_df)
print(crypto_scaled[0:5])

[[ 0.35856665  0.95311726 -0.09782131 -0.03965512]
 [ 0.35856665  0.95311726 -0.07228807 -0.03965497]
 [ 1.19974129  0.95311726  0.61067897 -0.03956909]
 [ 0.02209679 -0.15682002 -0.09738751 -0.03965511]
 [ 0.35856665 -0.15682002 -0.0962959  -0.0396551 ]]


In [48]:
# Initialize PCA model
pca = PCA(n_components=3)

In [49]:
## Get three principal components for the crypto data.
crypto_pca = pca.fit_transform(crypto_scaled)

In [50]:
#Transform PCA data in a DataFrame
pca_df = pd.DataFrame(
    data=crypto_pca, columns=['PC1', 'PC2', 'PC3']
)
pca_df = pca_df.set_index(crypto_df.index)
pca_df.head()

Unnamed: 0_level_0,PC1,PC2,PC3
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
42,-0.908774,0.061375,-0.181022
404,-0.898369,0.081164,-0.186761
1337,-1.090641,1.03599,-0.625648
BTC,0.064587,-0.074469,-0.014353
LTC,-0.123194,0.096566,-0.128783


KMean and Elbow Curve

In [51]:
inertia = []
k = list(range(1, 11))

In [52]:
# Looking for the best K
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(pca_df)
    inertia.append(km.inertia_)

In [53]:
# Define a DataFrame to plot the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", title="Elbow Curve", xticks=k)



In [54]:
# Initializing model w K=3 for 3 classes of iris plants
model = KMeans(n_clusters=5, random_state=5)
model

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=5, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=5, tol=0.0001, verbose=0)

In [55]:
# Fitting model
model.fit(pca_df)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=5, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=5, tol=0.0001, verbose=0)

In [56]:
#Get predictions
predictions = model.predict(pca_df)
print(predictions)

[4 4 4 0 0 4 3 3 0 0 4 4 0 0 4 4 0 0 4 0 0 4 0 0 0 0 0 0 0 3 0 4 4 0 4 0 4
 0 0 0 4 0 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 4 0 0 4 0 4 4 0 0 0 0 4
 4 0 3 0 4 0 0 0 0 0 0 4 4 0 0 0 4 4 0 0 0 4 0 3 0 3 4 0 0 0 0 4 4 0 3 0 4
 4 0 4 4 3 3 0 3 0 4 0 0 0 0 4 0 0 0 0 4 4 0 4 0 0 0 0 4 0 0 0 4 0 0 0 4 4
 4 3 0 4 4 0 0 0 3 0 4 4 0 4 4 0 3 0 0 3 4 4 0 3 4 4 0 3 4 4 0 4 0 0 4 0 3
 0 4 3 4 4 0 0 4 0 3 0 4 4 4 4 3 4 0 4 0 4 4 0 4 4 0 3 4 4 4 0 4 0 4 4 0 4
 3 4 0 4 4 0 0 4 0 4 0 4 3 0 4 0 0 0 3 0 0 4 3 4 3 0 3 3 4 4 0 3 4 4 4 3 4
 4 4 4 4 3 4 4 4 4 4 4 4 3 4 0 4 4 0 4 0 3 4 3 4 3 4 4 4 4 4 4 4 4 0 3 0 4
 3 0 0 4 3 4 0 3 3 4 0 3 4 3 4 0 0 3 4 0 3 4 4 4 4 0 0 4 0 0 0 0 3 4 4 4 0
 0 3 3 3 3 3 3 0 4 0 4 0 4 0 4 4 4 4 3 4 4 0 3 0 4 4 0 4 0 0 4 0 3 0 0 0 0
 0 4 4 3 0 3 4 3 0 4 4 0 3 3 4 4 0 0 3 4 4 0 3 4 4 4 0 0 4 0 0 3 0 0 3 4 3
 0 4 0 4 3 3 3 3 0 4 0 3 4 4 0 4 4 3 4 4 0 0 0 0 4 4 0 4 0 3 4 1 3 3 4 3 0
 4 3 3 0 0 3 3 0 3 0 0 0 0 0 3 4 3 3 4 4 0 0 3 4 3 0 0 3 0 3 4 4 3 3 0 0 3
 3 3 4 4 3 4 4 4 3 3 0 4 

In [57]:
# Add a new class column to the df_iris
pca_df["class"] = model.labels_
pca_df.head()

Unnamed: 0_level_0,PC1,PC2,PC3,class
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42,-0.908774,0.061375,-0.181022,4
404,-0.898369,0.081164,-0.186761,4
1337,-1.090641,1.03599,-0.625648,4
BTC,0.064587,-0.074469,-0.014353,0
LTC,-0.123194,0.096566,-0.128783,0


In [58]:
crypto_df2.head()

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
42,Scrypt,PoW/PoS,41.99995,42.0
404,Scrypt,PoW/PoS,1055185000.0,532000000.0
1337,X13,PoW/PoS,29279420000.0,314159300000.0
BTC,SHA-256,PoW,17927180.0,21000000.0
LTC,Scrypt,PoW,63039240.0,84000000.0


In [59]:
#Create clustered df data table
clustered_df = pd.merge(crypto_df2, pca_df, left_index=True, right_index=True)
clustered_df = pd.merge(clustered_df, coins_name, left_index=True, right_index=True)

In [60]:
clustered_df.head()

Unnamed: 0_level_0,Algorithm,ProofType,TotalCoinsMined,TotalCoinSupply,PC1,PC2,PC3,class,CoinName
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
42,Scrypt,PoW/PoS,41.99995,42.0,-0.908774,0.061375,-0.181022,4,42 Coin
404,Scrypt,PoW/PoS,1055185000.0,532000000.0,-0.898369,0.081164,-0.186761,4,404Coin
1337,X13,PoW/PoS,29279420000.0,314159300000.0,-1.090641,1.03599,-0.625648,4,EliteCoin
BTC,SHA-256,PoW,17927180.0,21000000.0,0.064587,-0.074469,-0.014353,0,Bitcoin
LTC,Scrypt,PoW,63039240.0,84000000.0,-0.123194,0.096566,-0.128783,0,Litecoin


Visualizing Results

In [66]:
#Unsure
import plotly.express as px
df = clustered_df
fig = px.scatter_3d(df, x='PC1', y='PC2', z='PC3',
              color='class', hover_name="CoinName", hover_data=["Algorithm"] )
fig.show()

In [62]:
#Create hvplot table
clustered_df.hvplot.table(columns=['CoinName','Algorithm','ProofType','TotalCoinSupply','TotalCoinsMined','class'],width=1000)

In [82]:
#Drop Outlier
clustered_df = clustered_df.drop(clustered_df['TotalCoinSupply'].idxmax())


In [83]:
#Drop outlier
clustered_df = clustered_df.drop(clustered_df['TotalCoinsMined'].idxmax())

In [84]:
#hvplot scatter
clustered_df.hvplot.scatter(x='TotalCoinsMined', y='TotalCoinSupply', by='class', hover_cols=["CoinName"])