In [None]:
import seaborn as sns
import pandas as pd
import ta
import numpy as np
import matplotlib.pyplot as plt


In [None]:
def get_data(path, base_m=False, trade_metrics=False, trade_metrics_m=False, google=False, t=0, t_shift=0):
    data_columns = ['open', 'high', 'low', 'close', 'volume', 'qav', 'num_trades', 'taker_base_vol', 'taker_quote_vol']
    if trade_metrics:
        data_columns += ['sum_open_interest', 'sum_open_interest_value', 'count_long_short_ratio']

    if google:
        data_columns += ['Scale_extracted_value']

    df = pd.read_csv(path, usecols=data_columns)

    return_col = 'return'
    prev_close = 'prev_close'
    close = 'close'
    fillna = False
    high = 'high'
    volume = 'volume'
    low = 'low'
    sum_open_interest_value = 'sum_open_interest_value'

    df[prev_close] = df['close'].shift(-t_shift)
    df[return_col] = (df[prev_close] - df['open'])

    if base_m:
        data_columns += ['macd', 'sma', 'ema', 'wma', 'trix', 'adx', 'obv', 'adi', 'fi', 'mfi']
        df['macd'] = ta.trend.macd(close=df[close], window_slow=t, fillna=fillna)
        df['sma'] = ta.trend.sma_indicator(close=df[close], window=t, fillna=fillna)
        df['ema'] = ta.trend.ema_indicator(close=df[close], window=t, fillna=fillna)
        df['wma'] = ta.trend.wma_indicator(close=df[close], window=t, fillna=fillna)
        df['trix'] = ta.trend.trix(close=df[close], window=t, fillna=fillna)
        df['adx'] = ta.trend.adx(high=df[high], low=df[low], close=df[close], window=t, fillna=t)
        df['obv'] = ta.volume.on_balance_volume(close=df[close], volume=df[volume], fillna=t)
        df['adi'] = ta.volume.acc_dist_index(high=df[high], low=df[low], close=df[close], volume=df[volume], fillna=t)
        df['fi'] = ta.volume.force_index(close=df[high], volume=df[volume], window=t, fillna=t)
        df['mfi'] = ta.volume.money_flow_index(high=df[high], low=df[low], close=df[close], volume=df[volume], window=t,
                                               fillna=fillna)
        df['atr'] = ta.volatility.average_true_range(high=df[high], low=df[low], close=df[close], window=t,
                                                     fillna=fillna)
    if trade_metrics_m:
        data_columns += ['tm_obv', 'tm_adi', 'tm_fi', 'tm_mfi']
        df['tm_obv'] = ta.volume.on_balance_volume(close=df[close], volume=df[sum_open_interest_value],
                                                   fillna=fillna)
        df['tm_adi'] = ta.volume.acc_dist_index(high=df[high], low=df[low], close=df[close],
                                                volume=df[sum_open_interest_value], fillna=fillna)
        df['tm_fi'] = ta.volume.force_index(close=df[high], volume=df[sum_open_interest_value], window=t, fillna=fillna)
        df['tm_mfi'] = ta.volume.money_flow_index(high=df[high], low=df[low], close=df[close],
                                                  volume=df[sum_open_interest_value], window=t, fillna=fillna)
    df = df.drop(columns=['return', 'prev_close'])
    df.dropna(inplace=True)
    return df

In [None]:
df = get_data('../data/BTC/BTC-merged-data.csv',
                     base_m=True, trade_metrics=True, trade_metrics_m=False, google=True, t=6, t_shift=4)
df.describe()

In [None]:
df = df.drop(['open', 'high', 'low', 'close' , 'taker_base_vol', 'taker_quote_vol', 'sma', 'wma', 'volume' , 'qav'], axis=1)
correlation_matrix = df.corr()

plt.figure(figsize=(20, 16))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

In [None]:
train_percent = 0.9
validation_percent = 0.1

T = 6
T_shift = 6
path_BTC = '../data/BTC/BTC-merged-data.csv'
paths_ETH = '../data/ETH/ETH-merged-data.csv'
path_BNB = '../data/BNB/BNB-merged-data.csv'
path_DOG = '../data/DOGE/DOGE-merged-data.csv'

columns_BTC, input_data_BTC, targets_BTC = get_data(path=path_BTC, base_m=True, trade_metrics=True,
                                                    trade_metrics_m=False,
                                                    google=False, t=T, t_shift=T_shift)

columns_ETH, input_data_ETH, targets_ETH = get_data(path=paths_ETH, base_m=True, trade_metrics=True,
                                                    trade_metrics_m=False,
                                                    google=False, t=T, t_shift=T_shift)

columns_BNB, input_data_BNB, targets_BNB = get_data(path=path_BNB, base_m=True, trade_metrics=True,
                                                    trade_metrics_m=False,
                                                    google=False, t=T, t_shift=T_shift)

columns_DOG, input_data_DOG, targets_DOG = get_data(path=path_DOG, base_m=True, trade_metrics=True,
                                                    trade_metrics_m=False,
                                                    google=False, t=T, t_shift=T_shift)
targets_BTC = (targets_BTC >= 0.0)
targets_ETH = (targets_ETH >= 0.0)
targets_BNB = (targets_BNB >= 0.0)
targets_DOG = (targets_DOG >= 0.0)

In [None]:
data1 = targets_BTC
data2 = targets_ETH
data3 = targets_BNB
data4 = targets_DOG


def plot_bar(ax, data, title):
    true_count = np.sum(data)
    false_count = len(data) - true_count

    total_count = len(data)
    true_percentage = (true_count / total_count) * 100
    false_percentage = (false_count / total_count) * 100

    labels = ['Long', 'Short']
    counts = [true_count, false_count]
    percentages = [true_percentage, false_percentage]

    bars = ax.bar(labels, counts, color=['green', 'red'])

    for bar, count, percentage in zip(bars, counts, percentages):
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width() / 2, height,
                f'{count} ({percentage:.2f}%)', ha='center', va='bottom')

    ax.set_title(title)


fig, axs = plt.subplots(2, 2, figsize=(14, 10))

plot_bar(axs[0, 0], data1, 'Bitcoin')
plot_bar(axs[0, 1], data2, 'Ethereum')
plot_bar(axs[1, 0], data3, 'BNB')
plot_bar(axs[1, 1], data4, 'Dogecoin')

for ax in axs.flat:
    ax.set_ylabel('Liczba rekordów')

fig.suptitle('Rozkład klas w poszczególnych zbiorach', fontsize=16)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()