In [None]:
import psycopg2 as pg
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from datetime import date
from tslearn.clustering import TimeSeriesKMeans
from sklearn.decomposition import PCA

In [None]:
SEED = 1990
N_CLUSTERS = 8
WINDOW_SIZE = 100
VAR_PERCENTILE_LEVEL = 5
METRIC = "dtw"
FONT = {'family': 'serif',
        'color':  'black',
        'weight': 'normal',
        'size': 20,
        }

In [None]:
connection = pg.connect(database="postgres", user="postgres", password="Crypto01", host="localhost", port=5430)
cur = connection.cursor()

In [None]:
cur.execute("SELECT * FROM input.crypto_daily_changes ORDER BY date DESC")
data = sorted(cur.fetchall(), key=lambda row: row[0], reverse=False)
daily_changes = pd.DataFrame(data=data, columns=[desc[0] for desc in cur.description], index=[row[0] for row in data], dtype="float64")
daily_changes = daily_changes.drop(["date"], axis=1)
daily_changes = daily_changes[daily_changes.index <= date(2018, 7, 31)]
daily_changes = daily_changes[daily_changes.index >= date(2016, 1, 1)]
daily_changes = daily_changes - 1
transpose_df = daily_changes.transpose()

In [None]:
corr_mat = daily_changes.corr()
cryptos = np.array(corr_mat.columns)
plt.clf()
plt.rcParams['figure.figsize'] = [20, 15]
plt.matshow(corr_mat)
plt.xticks(range(len(cryptos)), cryptos, rotation="vertical", fontsize=24)
plt.yticks(range(len(cryptos)), cryptos, fontsize=24)
# plt.xlabel("Correlation", fontdict=FONT)

ax = plt.gca()
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)

plt.colorbar(cax=cax)
plt.tick_params(labelsize=24)
plt.show()

In [None]:
transpose_array = np.array(transpose_df)

In [None]:
pca_plot = PCA(n_components=29)
pca_plot.fit(transpose_array)

cumsum_variance_ratio = np.cumsum(pca_plot.explained_variance_ratio_)
pc_names = ["PC" + str(i) for i in range(1, 30)]

plt.clf()
plt.plot(cumsum_variance_ratio, color="darkorange")
plt.bar(range(0, 29), pca_plot.explained_variance_ratio_, color="blue")
plt.yticks(fontsize=24)
plt.xticks(range(0, 29), pc_names, rotation=40, fontsize=24)
plt.xlabel("Principal Component", fontdict=FONT, fontsize=28)
plt.ylabel("Explained Variance Percentage", fontdict=FONT, fontsize=28)
plt.show()

In [None]:
pca = PCA(n_components=26)
pca_array = pca.fit_transform(transpose_array)
dba_km = TimeSeriesKMeans(n_clusters=N_CLUSTERS, max_iter=100, metric=METRIC, verbose=True, max_iter_barycenter=10, random_state=SEED)
pred_clusters = dba_km.fit_predict(pca_array)

In [None]:
print("Percentage of interpretation:", sum(pca.explained_variance_ratio_))

In [None]:
for cluster in set(pred_clusters):
    print("Cluster", cluster + 1, ":", cryptos[np.where(pred_clusters == cluster)])

In [None]:
# Plot
plt.clf()
plt.rcParams['figure.figsize'] = [20, 15]
for yi in set(pred_clusters):
    plt.subplot(N_CLUSTERS/2 + 1, 2, yi + 1)
    for xx in pca_array[pred_clusters == yi]:
        plt.plot(xx.ravel(), "k-", alpha=.2)
    plt.plot(dba_km.cluster_centers_[yi].ravel(), "r-")
    plt.xlim(0, pca_array.shape[1])
    plt.ylim(-4, 10)
    title = "Cluster : " + str(yi + 1)
    plt.title(title, fontdict=FONT, fontsize=32)
    plt.xticks(fontsize=24)
    plt.yticks(fontsize=24)
    if yi % 2 == 0 :
        plt.ylabel("Return", rotation="vertical", fontdict=FONT, fontsize=28)
    if yi == 6 or yi == 7:
        plt.xlabel("Principal Component", fontdict=FONT, fontsize=28)
    

plt.tight_layout()
plt.show()

In [None]:
vars_arr_before = []
vars_arr_after = []

for i in range(0, transpose_array.shape[1] - WINDOW_SIZE):
    vars_arr_before.append([np.percentile(row, VAR_PERCENTILE_LEVEL) for row in transpose_array[:, i: i + WINDOW_SIZE]])
    cluster_vars = [None] * 29
    for j in set(pred_clusters):
        var = np.percentile(np.hstack(transpose_array[pred_clusters == j, i: i + WINDOW_SIZE]), VAR_PERCENTILE_LEVEL)
        for k in np.where(pred_clusters == j)[0]:
            cluster_vars[k] = var
    vars_arr_after.append(cluster_vars)

vars_arr_before = np.array(vars_arr_before).transpose()
vars_arr_after = np.array(vars_arr_after).transpose()

In [None]:
vars_arr_before

In [None]:
vars_arr_after

In [None]:
vars_arr_after.shape

In [None]:
vars_arr_before.shape

In [None]:
count = 0
for i in range(0, 28):
    count += np.sum(transpose_array[i, WINDOW_SIZE:] < vars_arr_before[i, :]) >= 42
p_value_1side_before = 1 - count/29

In [None]:
p_value_1side_before

In [None]:
count = 0
for i in range(0, 28):
    count += np.sum(transpose_array[i, WINDOW_SIZE:] < vars_arr_after[i, :]) >= 42
p_value_1side_after = 1 - count/29

In [None]:
p_value_1side_after

In [None]:
accuracies_before = np.array([np.sum(transpose_array[i, WINDOW_SIZE:] > vars_arr_before[i, :]) / vars_arr_before.shape[1] for i in range(0, vars_arr_before.shape[0])])
accuracies_after = np.array([np.sum(transpose_array[i, WINDOW_SIZE:] > vars_arr_after[i, :]) / vars_arr_after.shape[1] for i in range(0, vars_arr_after.shape[0])])

In [None]:
accuracies_before

In [None]:
accuracies_after

In [None]:
np.mean(accuracies_before)

In [None]:
np.mean(accuracies_after)