In [None]:
!pip install yfinance

In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
from scipy.stats import kurtosis
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
start = '2012-01-01'
end = '2023-12-31'
interval = '1d'
symbols = ['AAPL', 'NVDA', 'MSFT', 'AMZN', 'META', 'GOOGL', 'BRK.B','GOOG','AVGO', 'TSLA', 'LLY', 'JPM', 'XOM', 'UNH', 'V', 'MA', 'HD', 'PG', 'COST', 'JNJ', 'WMT', 'ABBV', 'NFLX', 'BAC', 'CRM']
data = pd.DataFrame()


for x in symbols:
    current_data = yf.download(x, start=start, end=end, interval=interval)
    current_data.columns = current_data.columns.get_level_values(0)
    current_data.reset_index(inplace=True)
    current_data['Date'] = current_data['Date'].dt.date
    current_data['Symbol'] = x
    data = pd.concat([data, current_data], ignore_index=True)


data["AnnReturn"] = data['Adj Close'].pct_change()
data['v20'] = data['AnnReturn'].rolling(window = 20).var() * 252
data['k20'] = data['AnnReturn'].rolling(window = 20).apply(kurtosis, raw = True)
data['vol10'] = data['Volume'].rolling(window=10).mean()
data['vema12'] = data['Volume'].ewm(span=12, adjust=False).mean()
data['vstd20'] = data['Volume'].rolling(window=20).std()
data['ar'] = (data['High'].rolling(window=26).sum() - data['Open'].rolling(window=26).sum()) / (data['Open'].rolling(window=26).sum() - data['Low'].rolling(window=26).sum()) * 100
data['br'] = (data['High'].rolling(window=26).sum() - data['Close'].shift(1).rolling(window=26).sum()) / (data['Close'].shift(1).rolling(window=26).sum() - data['Low'].rolling(window=26).sum()) * 100


data = data.dropna()
data = data.reset_index(drop=True)
data.columns.name = None
data['close_change_pct'] = data.groupby('Symbol')['Close'].pct_change()

data['Label'] = 0
for x in symbols:
    pct_mean = data[data['Symbol'] == x]['close_change_pct'].mean()
    pct_std = data[data['Symbol'] == x]['close_change_pct'].std()
    for j in data[data['Symbol'] == x].index:
        if data.at[j, 'close_change_pct'] >= (pct_mean + pct_std):
            data.at[j, 'Label'] = 2
        elif 0 < data.at[j, 'close_change_pct'] < (pct_mean + pct_std):
            data.at[j, 'Label'] = 1
        else:
            data.at[j, 'Label'] = 0

data = data.dropna()

In [None]:
dbn_input = data.drop(columns=['Symbol', 'Data'])
x = dbn_input.drop(columns=['Label'])
y = dbn_input['Label']

x_train, x_test, y_train, y_test = train_test_split(dbn_input, test_size = 0.15, random_state = 1)
sclr = StandardScaler()
x_train = sclr.fit_transform(x_train)
x_test = slclr.fit_trainsform(x_test)

layer_1 = BernoulliRBM(n_components = 100, learning_rate = 0.3, n_iter = 1500, verbose = True)
l1_x_train = layer_1.fit_transform(x_train)
l1_x_test = layer_1.fit_transform(x_test)

layer_2 = BernoulliRBM(n_components = 100, learning_rate = 0.3, n_iter = 1500, verbose = True)
l2_x_train = layer_2.fit_transform(l1_x_train)
l2_x_test = layer_2.fit_transform(l1_x_test)

layer_3 = BernoulliRBM(n_components = 40, learning_rate = 0.3, n_iter = 1500, verbose = True)
l3_x_train = layer_3.fit_transform(l2_x_train)
l3_x_test = layer_3.fit_transform(l2_x_test)

log_model = LogisticRegression(max_iter = 100)
log_model.fit(l3_x_train, y_train)
preds = log_model.predict(l3_x_test)

print(classification_report(y_test, preds))