In [None]:
import sys
sys.path.insert(0, sys.path[0].removesuffix("/src/nb"))
print(sys.path)

from ta.trend import *
from ta.momentum import *
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from imblearn.under_sampling import InstanceHardnessThreshold, RepeatedEditedNearestNeighbours, AllKNN
from keras.models import Sequential
from keras.layers import Dense

import src.utils as utils
import src.myenv as myenv
import src.calc_utils as calc_utils

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import warnings

clustering = False
shuffle = True
categorical_features = None
train_size = 0.9
imbalance = True
scaled = True

warnings.simplefilter("ignore")

In [None]:
df = utils.get_database(
    symbol="BTCUSDT", interval="1h", columns=myenv.all_klines_cols, parse_dates=False
)
df.drop(
    columns=[
        "open_time",
        "close_time",
        "volume",
        "quote_asset_volume",
        "number_of_trades",
        "taker_buy_base_asset_volume",
        "taker_buy_quote_asset_volume",
        "ignore",
    ],
    inplace=True,
)
df.info()

In [None]:
df = calc_utils.calc_ema_periods(
    df, [150, 200, 250, 300], close_price="close", diff_price=True
)
df = calc_utils.calc_RSI(
    df, close_price="close", window=14, fillna=False, last_one=False
)
df = calc_utils.calc_amplitude(df, column="close")

macd = MACD(df["close"], 12, 26, 9)
df["macd"] = macd.macd()
df["macd_diff"] = macd.macd_diff()
df["macd_signal"] = macd.macd_signal()

aoi = AwesomeOscillatorIndicator(df["high"], df["low"])
df["aoi"] = aoi.awesome_oscillator()

df["tsi"] = TSIIndicator(df["close"]).tsi()

df.dropna(inplace=True)

df.info()

In [None]:
df["variation"] = (
    df["close"] - df["close"].shift(24)
) / df["close"]
df["status"] = np.where(
    df["variation"] > 0.015, 1, 0
)  # 1 == SOBE, 0 == ESTAVEL
df.dropna(inplace=True)
df.info()

In [None]:
df.drop(columns=["variation"], inplace=True)
df["status"].value_counts()

In [None]:
if scaled:
	cols = df.columns.drop('status')
	df_scaled = calc_utils.standard_scaler(df, columns=cols)
else:
	df_scaled = df

In [None]:
from sklearn.cluster import KMeans

if clustering:
	kmeans = KMeans(n_clusters = 4, random_state = 0).fit(df_scaled.drop(columns=["status"]))
	df_scaled["cluster"] = kmeans.labels_
	#df_scaled["cluster"] = np.where(df_scaled["kmeans"] == 0, 0, 1)
	#df_scaled.drop(columns=["kmeans"], inplace=True)
	df_scaled["cluster"] = df_scaled["cluster"].astype('category')
	df_scaled = pd.get_dummies(df_scaled, columns=["cluster"])
	df_scaled["cluster_0"] = df_scaled["cluster_0"].astype('float32')
	df_scaled["cluster_1"] = df_scaled["cluster_1"].astype('float32')
	df_scaled["cluster_2"] = df_scaled["cluster_2"].astype('float32')
	df_scaled["cluster_3"] = df_scaled["cluster_3"].astype('float32')

df_scaled.head()

In [None]:
train_data, validation_data = train_test_split(
    df_scaled, train_size=train_size, random_state=123, shuffle=shuffle
)

X_train = train_data.drop(columns=["status"])
y_train = train_data["status"]

X_validation = validation_data.drop(columns=["status"])
y_validation = validation_data["status"]

In [None]:
if imbalance:
	imb = AllKNN(allow_minority=False, kind_sel='all', n_jobs=-1, n_neighbors=3, sampling_strategy='auto')
	X_res, y_res = imb.fit_resample(X_train, y_train)
	print(X_res.shape, y_res.shape)
else:
	X_res, y_res = X_train, y_train

In [None]:
tf.random.set_seed(123)
basic_model = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(X_res.shape[1],)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid'),
    ]
)

In [None]:
basic_model.summary()

In [None]:
# basic_model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=["accuracy"])
basic_model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=["accuracy"])

In [None]:
basic_model.fit(X_res, y_res, epochs=50, validation_data=(X_validation, y_validation), verbose=1)

In [None]:
loss_and_metrics = basic_model.evaluate(X_res, y_res)
print(loss_and_metrics)
print('Loss = ',loss_and_metrics[0])
print('Accuracy = ',loss_and_metrics[1])

In [None]:
predicted = basic_model.predict(X_validation)

In [None]:
predicted = predicted >= 0.5 # True if >= 0.5 else False (in python True = 1, False = 0)
actual = np.array(y_validation)
conf_mat = confusion_matrix(actual, predicted)
displ = ConfusionMatrixDisplay(confusion_matrix = conf_mat)
displ.plot()

In [None]:
predictions = pd.DataFrame({"prediction_label": predicted[:, 0]}, index=X_validation.index)
predictions["status"] = y_validation
predictions["score"] = predictions["prediction_label"] == predictions["status"]

score_estavel = predictions[predictions["status"] == 0]["score"].value_counts()
score_sobe = predictions[predictions["status"] == 1]["score"].value_counts()
score = predictions["score"].value_counts()

se = score_estavel[0] / (score_estavel[0] + score_estavel[1])
ss = score_sobe[0] / (score_sobe[0] + score_sobe[1])
s = score[0] / (score[0] + score[1])

print(f"Status: ESTAVEL: {se} - SOBE: {ss} - GERAL: {s}")