## Imports

In [24]:
import talib as tb
import pandas as pd
import numpy as np
import json
from sklearn.model_selection import train_test_split
from sklearn import svm
import yfinance as yf



## Constants


In [25]:
params_file="./parameters/params.json"
with open(params_file, 'r') as file:
        params = json.load(file)

## Data Preprocessing   

In [26]:
def calculate_stochastic_k(row, period):
    ll = np.min(row['Low'][-period:])
    hh = np.max(row['High'][-period:])
    k_percent = ((row['close'] - ll) / (hh - ll)) * 100
    return k_percent

def calculate_stochastic_d(row, n):
    k_sum = row['Stochastic_K_percent'].tail(n).sum()
    d_percent = k_sum / n
    return d_percent



In [27]:

# Define a function to determine the future trend
def calculate_future_trend(current_close, future_prices_mean):
    if future_prices_mean > 0:
        return 1  # Price is likely to go up
    else:
        return -1  # Price is likely to go down 

In [28]:

def resample(df, interval):
    # Set index
    df['time'] = pd.to_datetime(df['time'])
    df.set_index('time', inplace=True)
    # Resample the DataFrame to specified period and use the appropriate value within each column
    return df[['open', 'high', 'low', 'close', 'Volume ETH', 'unix']].resample(interval).agg(
        {'open': 'first', 'high': np.max, 'low': np.min, 'close': 'last', 'Volume ETH': np.sum, 'unix': 'first'})

def process(df):
    df.fillna(method='ffill', inplace=True)
    df["ma"] = tb.MA(df["close"], timeperiod=params['ma_period'])
    df["wma"] = tb.WMA(df["close"], timeperiod=params['wma_period'])
    df["mom"] = tb.MOM(df["close"], timeperiod=params['mom_period'])
    df["Stochastic_K_percent"] = tb.STOCH(df["high"], df["low"], df["close"], fastk_period=params["stochastic_k_period"])[0]
    df["Stochastic_D_percent"] = df["Stochastic_K_percent"].rolling(window=params["stochastic_d_period"]).mean()
    df["Williams_R_percent"] = tb.WILLR(df["high"], df["low"], df["close"], timeperiod=params["williams_period"])
    df["rsi"] = tb.RSI(df["close"], timeperiod=params["rsi_period"])
    df["macd"], df["macdsignal"], df["macdhist"] = tb.MACD(df["close"], fastperiod=12, slowperiod=26, signalperiod=9)

    df["ma_trend"] = df.apply(lambda row: 1 if row["close"] > row["ma"] else -1, axis=1) # OK
    df["wma_trend"] = df.apply(lambda row: 1 if row["close"] > row["wma"] else -1, axis=1) # OK
    df["mom_trend"] = df["mom"].apply(lambda x: 1 if x > 0 else -1) # OK
    df["stochastic_K_trend"] = np.where(df["Stochastic_K_percent"] > df["Stochastic_K_percent"].shift(1).fillna(-1), 1, -1) #OK
    df["stochastic_D_trend"] = np.where(df["Stochastic_D_percent"] > df["Stochastic_D_percent"].shift(1).fillna(-1), 1, -1) # Ok
    df["williams_R_trend"] = np.where(df["Williams_R_percent"] > df["Williams_R_percent"].shift(1).fillna(-1), 1, -1) # OK
    df["macd_trend"] = np.where(df["macd"] > df["macd"].shift(1).fillna(-1), 1, -1) # OK
    df["rsi_trend"] = np.where(df["rsi"] < 30, 1, np.where(df["rsi"] > 70, -1, np.where(df["rsi"].shift(1).fillna(df["rsi"].iloc[0]) < df["rsi"], 1, -1))) # OK
    #df["rsi_trend"] = df["rsi"].apply(lambda x: 1 if x < 30 else (-1 if x > 70 else (1 if x > df["rsi"].shift(1).iloc[-1] else -1)))

    # # Calculate the future trend and create the 'target' column
    # future_window = 10  # Consider the next 10 prices for trend prediction

    # # Calculate the mean of future prices within the window
    # df['future_prices_mean'] = df['close'].pct_change().rolling(future_window).mean().shift(-future_window)

    # # Apply the function to determine the trend
    # df['target'] = df.apply(lambda row: calculate_future_trend(row['close'], row['future_prices_mean']), axis=1)

    # # Drop the 'future_prices_mean' column
    # df.drop('future_prices_mean', axis=1, inplace=True)
    window = params["window"]
    df['moving_average'] = df['close'].rolling(window=window).mean().shift(-window)


    df['target'] = np.where(df['close'] * 1.05 < df['moving_average'], 1, -1)
    
    columns = [
               #"rsi_trend",
               "ma_trend",
               "wma_trend",
               "mom_trend",
               "stochastic_K_trend", 
               "stochastic_D_trend", 
               "williams_R_trend",
               "macd_trend",
               "target",
               #"close","moving_average"
               ]
    return df[columns]

## Create dataset

In [29]:
df = pd.read_csv("/home/kmajchrzak/Desktop/scpxd/data/binance_eth.csv")
df = resample(df, params["interval"])
#df = yf.download("AAPL", start="2015-01-01", end="2022-01-01")
#df.rename(columns={"Open": "open", "High": "high", "Low": "low""", "Close": "close"}, inplace=True)
df = process(df)


df.dropna(inplace=True)

features = df.drop('target', axis=1)  # Features (all columns except 'target')
target = df['target']  # Target labels

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=params["validation_size"],train_size=params["train_size"])


# Create and train the SVM model

In [30]:
label_counts = {}
total_samples = len(y_train)

for label in y_train:
    label_counts[label] = label_counts.get(label, 0) + 1

# Step 2: Calculate the percentage for each label
percentage_labels = {}

for label, count in label_counts.items():
    percentage = (count / total_samples) * 100
    percentage_labels[label] = percentage

# Display the results
for label, percentage in percentage_labels.items():
    print(f"Label {label}: {percentage:.2f}%")

Label 1: 52.14%
Label -1: 47.86%


In [31]:
import matplotlib.pyplot as plt

# Assuming df contains your DataFrame with 'close' and 'target' columns

# Plot the 'close' prices
plt.figure(figsize=(12, 6))
plt.plot(df['close'], label='Close Price', color='blue')

# Plot the trend (target)
upward_trend = df[df['target'] == 1]['close']
downward_trend = df[df['target'] == -1]['close']
plt.plot(upward_trend.index, upward_trend, 'g^', label='Upward Trend', markersize=10, alpha=0.7)
plt.plot(downward_trend.index, downward_trend, 'rv', label='Downward Trend', markersize=10, alpha=0.7)

plt.title('Close Price with Trend')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()


KeyError: 'close'

<Figure size 1200x600 with 0 Axes>

In [None]:
pd.set_option('display.max_rows', 15)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
df

Unnamed: 0_level_0,ma_trend,wma_trend,mom_trend,stochastic_K_trend,stochastic_D_trend,williams_R_trend,macd_trend,target,close,moving_average
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-01-01 00:00:00,-1,-1,-1,-1,-1,-1,-1,1,128.82,130.450000
2020-01-01 01:00:00,-1,-1,-1,-1,-1,-1,-1,-1,130.57,130.531667
2020-01-01 02:00:00,-1,-1,-1,-1,-1,-1,-1,-1,130.80,130.616667
2020-01-01 03:00:00,-1,-1,-1,-1,-1,-1,-1,1,130.09,130.773333
2020-01-01 04:00:00,-1,-1,-1,-1,-1,-1,-1,1,130.15,130.993333
...,...,...,...,...,...,...,...,...,...,...
2023-07-04 07:00:00,-1,-1,-1,-1,-1,-1,-1,1,1948.80,1953.085833
2023-07-04 08:00:00,-1,-1,-1,1,-1,1,-1,1,1950.52,1952.384167
2023-07-04 09:00:00,1,1,-1,1,-1,1,-1,-1,1954.87,1951.294167
2023-07-04 10:00:00,1,1,1,1,1,1,1,-1,1958.00,1949.925000


In [None]:
# Assuming df contains the DataFrame with the specified columns

# Calculate the total number of rows
total_rows = len(df)

# Initialize a dictionary to store the percentages
percentage_per_trend = {}

# Iterate through each trend column and calculate the percentage of matching rows
for trend_column in df.columns[:-2]:  # Exclude the last two columns: 'target' and 'close'
    matching_rows = df[df[trend_column] == df['target']]
    percentage = len(matching_rows) / total_rows * 100
    percentage_per_trend[trend_column] = percentage

# Display the percentages for each trend column
for trend_column, percentage in percentage_per_trend.items():
    print(f"Percentage of matching rows for {trend_column}: {percentage:.2f}%")


Percentage of matching rows for ma_trend: 48.83%
Percentage of matching rows for wma_trend: 48.66%
Percentage of matching rows for mom_trend: 49.53%
Percentage of matching rows for stochastic_K_trend: 49.13%
Percentage of matching rows for stochastic_D_trend: 49.65%
Percentage of matching rows for williams_R_trend: 48.99%
Percentage of matching rows for macd_trend: 49.53%
Percentage of matching rows for target: 100.00%


## Create model


In [32]:
clf = svm.SVC(C=100, kernel='poly', degree=2)
clf.fit(X_train, y_train)

accuracy = clf.score(X_test, y_test)
print("Accuracy:", accuracy)

Accuracy: 0.5197593104569849


In [23]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
clf = MLPClassifier(hidden_layer_sizes=(100, 50), activation='logistic', learning_rate_init=0.001, momentum=0.2, max_iter=500, random_state=42)
clf.fit(X_train, y_train)
accuracy = clf.score(X_test, y_test)
print("Accuracy:", accuracy)
# y_pred = clf.predict(X_test)
# accuracy = accuracy_score(y_test, y_pred)
# print("Accuracy:", accuracy)

Accuracy: 0.5156936087168645
