In [1]:
import numpy as np
import pandas as pd
import requests

from sklearn.cluster import KMeans
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

In [2]:

# Отримання даних з API CoinGecko
response = requests.get('https://api.coingecko.com/api/v3/coins/bitcoin/market_chart?vs_currency=usd&days=365')
bitcoin_data = pd.DataFrame(response.json()['prices'], columns=['timestamp', 'price'])
bitcoin_data['price'] = bitcoin_data['price'].astype(float)

response = requests.get('https://api.coingecko.com/api/v3/coins/ethereum/market_chart?vs_currency=usd&days=365')
ethereum_data = pd.DataFrame(response.json()['prices'], columns=['timestamp', 'price'])
ethereum_data['price'] = ethereum_data['price'].astype(float)

response = requests.get('https://api.coingecko.com/api/v3/coins/litecoin/market_chart?vs_currency=usd&days=365')
litecoin_data = pd.DataFrame(response.json()['prices'], columns=['timestamp', 'price'])
litecoin_data['price'] = litecoin_data['price'].astype(float)

# Об'єднання даних в один DataFrame
data = pd.concat([bitcoin_data, ethereum_data, litecoin_data], axis=1)
data.columns = ['timestamp', 'bitcoin', 'timestamp', 'ethereum', 'timestamp', 'litecoin']
data = data[['bitcoin', 'ethereum', 'litecoin']]

# Нормалізація даних
data_norm = (data - data.mean()) / data.std()


In [10]:

# Побудова автоенкодера
input_layer = Input(shape=(3,))
encoded = Dense(2, activation='relu')(input_layer)
decoded = Dense(3, activation='linear')(encoded)

autoencoder = Model(inputs=input_layer, outputs=decoded)
autoencoder.compile(optimizer='adam', loss='mse')

# Тренування автоенкодера
autoencoder.fit(data_norm, data_norm, epochs=1000, verbose=0)

# Отримання прихованих представлень криптовалют
encoder = Model(inputs=input_layer, outputs=encoded)
data_encoded = pd.DataFrame(encoder.predict(data_norm), columns=['x', 'y'])
data_encoded




Unnamed: 0,x,y
0,1.187437,0.0
1,0.870117,0.0
2,1.104256,0.0
3,1.127704,0.0
4,1.100353,0.0
...,...,...
361,2.618751,0.0
362,2.608413,0.0
363,2.644561,0.0
364,2.670502,0.0


In [11]:
# Кластеризація криптовалют за допомогою k-середніх
kmeans = KMeans(n_clusters=3)
kmeans.fit(data_encoded)

# Визначення кластерів для кожної криптовалюти
cluster_labels = pd.DataFrame({'cluster': kmeans.labels_}, index=data.index)

# Додавання кластерів до початкового DataFrame
data_with_clusters = pd.concat([data, data_encoded, cluster_labels], axis=1)

# Виведення результатів
print(data_with_clusters)


          bitcoin     ethereum    litecoin         x    y  cluster
0    38732.937013  2576.627154  100.447017  1.187437  0.0        1
1    41986.034446  2731.037685  106.650339  0.870117  0.0        1
2    39468.354773  2611.464723  102.621996  1.104256  0.0        1
3    38775.175588  2562.832354  104.876201  1.127704  0.0        1
4    38903.693548  2579.458136  105.713137  1.100353  0.0        1
..            ...          ...         ...       ...  ...      ...
361  22348.329915  1568.495601   89.292008  2.618751  0.0        2
362  22421.885806  1563.225662   89.981976  2.608413  0.0        2
363  22415.113660  1567.350147   87.410852  2.644561  0.0        2
364  22217.209885  1563.813182   86.361797  2.670502  0.0        2
365  22022.916390  1555.592623   85.011597  2.703445  0.0        2

[366 rows x 6 columns]


In [12]:
# import plotly and plot the data_with_clusters where x= index and y='bitcoin' and color='cluster'
import plotly.express as px
fig = px.scatter(data_with_clusters, x=data_with_clusters.index, y='bitcoin', color='cluster')
fig.show()


In [15]:
data_with_clusters['target'] = data_with_clusters['bitcoin'].pct_change().shift(-5)
data_with_clusters.groupby('cluster')['target'].agg(['mean', 'std', 'sum', 'count'])

Unnamed: 0_level_0,mean,std,sum,count
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.000186,0.029207,0.036777,198
1,-0.004062,0.031309,-0.251855,62
2,-0.001743,0.034626,-0.176066,101


# TODO
1. Increase the number of epochs
2. Increase the number of layers
3. Increase the number of neurons
4. Increase the number of features
5. Find optimal learning rate
6. Find optimal batch size
7. Find optimal optimizer
8. Find optimal activation function
9. Find optimal loss function
10. Find optimal dropout rate
11. Find optimal weight initialization
12. Find optimal clusering method (KMeans, DBSCAN, etc.)
13. Find optimal number of clusters
14. Find optimal number of clusters for each crypto
15. Fid optimal time lag to hold position