In [None]:
# Dependencies
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

In [None]:
# Read in crypto data csv into df
crypto_data_filepath = "crypto_data.csv"
crypto_data_df = pd.read_csv(crypto_data_filepath)
crypto_data_df

In [None]:
# Taking a look at the datatypes for each columns
crypto_data_df.dtypes

In [None]:
# Created copy 
crypto_df_copy = crypto_data_df.copy()

In [None]:
# Filter out the cryptocurrency that are not being traded 

crypto_df_copy = crypto_df_copy.loc[crypto_df_copy["IsTrading"] == True, :]
crypto_df_copy = crypto_df_copy.drop(columns = ["IsTrading"])
crypto_df_copy

In [None]:
crypto_df_copy.isna()
crypto_df_copy.isnull()

In [None]:
# Checking null
crypto_df_copy[crypto_df_copy.isna().any(axis = 1)]
crypto_df_copy[crypto_df_copy.isnull().any(axis = 1)]

In [None]:
# Remove rows w values
crypto_df_copy = crypto_df_copy.dropna(axis = 0, how = "any")
crypto_df_copy

In [None]:
crypto_df_copy.info()

In [None]:
# Check how many cryptocurrency greater than 0, equal to 0, or less than 0

crypto_df_copy.loc[crypto_df_copy["TotalCoinsMined"] > 0]
crypto_df_copy.loc[crypto_df_copy["TotalCoinsMined"] == 0]
crypto_df_copy.loc[crypto_df_copy["TotalCoinsMined"] < 0]

In [None]:
# Removing cryptocurrency that have 0 or less coins mined
crypto_df_copy = crypto_df_copy.loc[crypto_df_copy["TotalCoinsMined"] > 0]
crypto_df_copy

In [None]:
# Find duplicate entries
print(f"Duplicate entries: {crypto_df_copy.duplicated().sum()}")

In [None]:
# Drop CoinName column from df
crypto_df_copy = crypto_df_copy.drop(columns = ["CoinName", "Unnamed: 0"])
# crypto_df_copy = crypto_df_copy.rename(columns = {"Unnamed: 0": "Name"})
crypto_df_copy

In [None]:
# Changing the TotalCoinSupply column into an float
crypto_df_copy["TotalCoinSupply"] = pd.to_numeric(crypto_df_copy["TotalCoinSupply"], errors = "raise")
crypto_df_copy

In [None]:
crypto_df_copy.info()

In [None]:
# Create numerical data of the category one-hot-encoded
crypto_df_copy = pd.get_dummies(crypto_df_copy, columns = ["Algorithm", "ProofType"])
crypto_df_copy

In [None]:
crypto_df_copy_copy = crypto_df_copy.copy()
crypto_df_copy_copy

In [None]:
# Creating the standard scaler model
scaler = StandardScaler()

# Fit and transform the data
X_scaled = scaler.fit_transform(crypto_df_copy_copy)
X_scaled

In [None]:
## Dimensionality Reduction

In [None]:
# Initialize the PCA model
pca = PCA(n_components=0.9)

# Fit and transform the data
X_scaled_pca = pca.fit_transform(X_scaled)

# Fetch the explained variance
pca.explained_variance_ratio_

In [None]:
# Run tsne model
tsne = TSNE()

# Fit and transform data
X_scaled_pca_tsne = tsne.fit_transform(X_scaled_pca)

In [None]:
# Plot the results
plt.scatter(X_scaled_pca_tsne[:,0], X_scaled_pca_tsne[:,1])
plt.show()