In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

In [None]:
crypto_df = pd.read_csv('crypto_data.csv')
crypto_df.head()

In [None]:
# Discard all cryptocurrencies that are not being traded.
# In other words, filter for currencies that are currently being traded.
crypto_df_filtered = crypto_df.loc[crypto_df['IsTrading'] == True]
crypto_df_filtered.head()

In [None]:
# Once you have done this, drop the IsTrading column from the dataframe.
new_crypto_df = crypto_df_filtered.drop(["IsTrading"], axis='columns')
new_crypto_df.head()

In [None]:
# Remove all rows that have at least one null value
new_crypto_df.dropna()
new_crypto_df.head()

In [None]:
# Filter for cryptocurrencies that have been mined. 
# That is, the total coins mined should be greater than zero.
crypto_df_filtered2 = new_crypto_df.loc[new_crypto_df['TotalCoinsMined'] > 0]
crypto_df_filtered2.head()

In [None]:
# Delete the CoinName from the original dataframe.
new_crypto_df = crypto_df_filtered2.drop(["CoinName"], axis='columns')
new_crypto_df.head()

In [None]:
# Your next step in data preparation is to convert the remaining features with
# text values, Algorithm and ProofType, into numerical data. 
# To accomplish this task, use Pandas to create dummy variables. ???
new_crypto_df1 = pd.get_dummies(new_crypto_df)
new_crypto_df1.head()

In [None]:
# Standardize your dataset so that columns that contain larger values do not unduly influence the outcome.
scaled_crypto_df = StandardScaler().fit_transform(new_crypto_df1)
print(scaled_crypto_df[0:5])

In [None]:
# Perform dimensionality reduction with PCA.
# Rather than specify the number of principal components when you instantiate the PCA model,
# it is possible to state the desired explained variance.
# For example, say that a dataset has 100 features.
# Using PCA(n_components=0.99) creates a model that will preserve approximately 99% of the explained variance,
# whether that means reducing the dataset to 80 principal components or 3.
# For this project, preserve 90% of the explained variance in dimensionality reduction.

pca = PCA(n_components=.9)
crypto_pca = pca.fit_transform(scaled_crypto_df)
print(crypto_pca)

In [None]:
# Create an elbow plot to identify the best number of clusters.
# Use a for-loop to determine the inertia for each k between 1 through 10.
# Determine, if possible, where the elbow of the plot is, and at which value of k it appears.

# Finding the best value for k
inertia = []
k = list(range(1, 11))

# Calculate the inertia for the range of k values
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(scaled_crypto_df)
    inertia.append(km.inertia_)

# Creating the Elbow Curve
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)

plt.plot(df_elbow['k'], df_elbow['inertia'])
plt.xticks(list(range(11)))
plt.title('Elbow Curve')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.show()