# Cryptocurreny Euclidean Distance Semantic Analysis

### Authors
|    Student Name                 |    Student Number  |
|---------------------------------|--------------------|
| Raj Sandhu                      | 101111960          |
| Akaash Kapoor                   | 101112895          |
| Ali Alvi                        | 101114940          |
| Hassan Jallad                   | 101109334          |
| Areeb Ul Haq                    | 101115337          |
| Ahmad Abuoudeh                  | 101072636          |



# Libraries to Import

In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.metrics.pairwise import euclidean_distances

# Read In Processed Coin Datasheet

In [2]:
parent_folder = os.path.dirname(os.path.dirname(os.getcwd())) #Parent folder of the repo
data_folder = "data"
model_folder = "models"
processed_folder = "processed"
model_data_file_path = os.path.join(parent_folder, model_folder) #path to models folder
processed_data_file_path = os.path.join(parent_folder, data_folder, processed_folder)#path to processed data folder.
coin_df = pd.read_csv(open(os.path.join(processed_data_file_path, "coin-info.csv"), "r"))
coin_df.head() #test output of the processed data file. 

Unnamed: 0,Name,Volatility,Description
0,iota,0.388529,IOTA (IOTA or MIOTA) is a cryptocurrency token...
1,anchor-protocol,1.155277,Anchor Protocol is a yield stable and attracti...
2,compound,155.017778,COMP is an ERC-20 token built on the Ethereum ...
3,bitcoin-sv,64.927187,Bitcoin SV is a cryptocurrency that was create...
4,drep,0.48517,DREPis committed to building a performance-ori...


# Create and Display the Similarity Matrix

In [3]:
#Using the euclidean_distances library from sklearn, create a similarity matrix of volatilities.
coin_similarity_matrix = pd.DataFrame(euclidean_distances(coin_df["Volatility"].to_numpy().reshape(-1,1)), columns = coin_df["Name"], index= coin_df["Name"])
#Print all volatilities
coin_similarity_matrix

Name,iota,anchor-protocol,compound,bitcoin-sv,drep,moonbeam,usd-coin,chainlink,basic-attention-token,bittorrent,...,gala,bitcoin-gold,render-token,unfoldu-group-coin-(new),maker,nexus-mutual,juno,okb,avalanche,compound-usd-coin
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
iota,0.000000,0.766749,154.629249,64.538658,0.096641,2.539415,0.388017,7.039895,0.116270,0.388529,...,0.217023,23.495870,1.269398,0.387051,782.227791,29.031454,8.676256,5.874878,32.929336,0.387886
anchor-protocol,0.766749,0.000000,153.862500,63.771909,0.670108,1.772667,1.154765,6.273146,0.883018,1.155277,...,0.983771,22.729121,0.502649,1.153799,781.461042,28.264705,7.909508,5.108129,32.162588,1.154634
compound,154.629249,153.862500,0.000000,90.090591,154.532608,152.089833,155.017265,147.589354,154.745519,155.017778,...,154.846272,131.133379,153.359851,155.016300,627.598542,125.597795,145.952993,148.754371,121.699913,155.017134
bitcoin-sv,64.538658,63.771909,90.090591,0.000000,64.442017,61.999242,64.926674,57.498763,64.654928,64.927186,...,64.755681,41.042788,63.269260,64.925709,717.689133,35.507204,55.862401,58.663780,31.609322,64.926543
drep,0.096641,0.670108,154.532608,64.442017,0.000000,2.442775,0.484657,6.943254,0.212911,0.485170,...,0.313664,23.399229,1.172757,0.483692,782.131150,28.934813,8.579615,5.778237,32.832695,0.484527
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
nexus-mutual,29.031454,28.264705,125.597795,35.507204,28.934813,26.492038,29.419470,21.991559,29.147724,29.419982,...,29.248477,5.535584,27.762056,29.418505,753.196337,0.000000,20.355197,23.156576,3.897882,29.419339
juno,8.676256,7.909508,145.952993,55.862401,8.579615,6.136841,9.064273,1.636362,8.792526,9.064785,...,8.893279,14.819613,7.406859,9.063307,773.551534,20.355197,0.000000,2.801379,24.253080,9.064142
okb,5.874878,5.108129,148.754371,58.663780,5.778237,3.335462,6.262894,1.165017,5.991148,6.263406,...,6.091901,17.620992,4.605480,6.261929,776.352913,23.156576,2.801379,0.000000,27.054458,6.262763
avalanche,32.929336,32.162588,121.699913,31.609322,32.832695,30.389921,33.317353,25.889441,33.045606,33.317865,...,33.146359,9.433466,31.659938,33.316387,749.298455,3.897882,24.253080,27.054458,0.000000,33.317222


# Sanity Check For Similiarity Matrix

In [4]:
coin_volatility_1 = coin_df["Volatility"][0] #Extract volatility of iota coin
coin_volatility_2 = coin_df["Volatility"][1] #Extract volatility of anchor-protocol coin
coin_name_1 = coin_df["Name"][0] #Extract iota coin name
coin_name_2 = coin_df["Name"][1] #Extract anchor-protocol name

#Store both volatilities into a 2-D Numpy Array.
volatility_array_1 = np.array([[coin_volatility_1]])
volatility_array_2 = np.array([[coin_volatility_2]])

#Calculate the euclidean distance between the two coins 
euclidean_array = euclidean_distances(volatility_array_1, volatility_array_2).reshape(-1,1)

#Run a sanity test to ensure euclidean distance is calculated correctly. 
assert euclidean_array.all() == abs(volatility_array_2 - volatility_array_1).all()
print("Coins " + coin_name_1 + " and " + coin_name_2 + " pass the unit test. They have a euclidean distance of: " + np.array2string(euclidean_array))


Coins iota and anchor-protocol pass the unit test. They have a euclidean distance of: [[0.76674851]]


# Download Similarity Matrix as a CSV File

In [5]:
#Store the similarity matrix in the models file. 
coin_similarity_matrix.to_csv(open(os.path.join(model_data_file_path, "coin-similarity-matrix-description-euclidean.csv"), "w"))