In [None]:
!pip install yfinanceimport yfinance as yf
import numpy as np
import pandas as pd
from scipy.stats import kurtosis

from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import BernoulliRBM

In [None]:
start = '2012-01-01'
end = '2023-12-31'
interval = '1d'
symbols = ['AAPL', 'NVDA', 'MSFT', 'AMZN', 'META', 'GOOGL', 'BRK.B','GOOG','AVGO', 'TSLA', 'LLY', 'JPM', 'XOM', 'UNH', 'V', 'MA', 'HD', 'PG', 'COST', 'JNJ', 'WMT', 'ABBV', 'NFLX', 'BAC', 'CRM']
data = pd.DataFrame()

In [None]:
for x in symbols:
    current_data = yf.download(x, start=start, end=end, interval=interval)
    current_data.columns = current_data.columns.get_level_values(0)
    current_data.reset_index(inplace=True)
    current_data['Date'] = current_data['Date'].dt.date
    current_data['Symbol'] = x
    data = pd.concat([data, current_data], ignore_index=True)

In [None]:
data["AnnReturn"] = data['Adj Close'].pct_change()
data['v20'] = data['AnnReturn'].rolling(window = 20).var() * 252
data['k20'] = data['AnnReturn'].rolling(window = 20).apply(kurtosis, raw = True)
data['vol10'] = data['Volume'].rolling(window=10).mean()
data['vema12'] = data['Volume'].ewm(span=12, adjust=False).mean()
data['vstd20'] = data['Volume'].rolling(window=20).std()
data['ar'] = (data['High'].rolling(window=26).sum() - data['Open'].rolling(window=26).sum()) / (data['Open'].rolling(window=26).sum() - data['Low'].rolling(window=26).sum()) * 100
data['br'] = (data['High'].rolling(window=26).sum() - data['Close'].shift(1).rolling(window=26).sum()) / (data['Close'].shift(1).rolling(window=26).sum() - data['Low'].rolling(window=26).sum()) * 100


data = data.dropna()
data = data.reset_index(drop=True)
data.columns.name = None
data['close_change_pct'] = data.groupby('Symbol')['Close'].pct_change()

data['Label'] = 0
for x in symbols:
    pct_mean = data[data['Symbol'] == x]['close_change_pct'].mean()
    pct_std = data[data['Symbol'] == x]['close_change_pct'].std()
    for j in data[data['Symbol'] == x].index:
        if data.at[j, 'close_change_pct'] >= (pct_mean + pct_std):
            data.at[j, 'Label'] = 2  
        elif 0 < data.at[j, 'close_change_pct'] < (pct_mean + pct_std):
            data.at[j, 'Label'] = 1 
        else:
            data.at[j, 'Label'] = 0 

data = data.dropna()
print(data.shape)
print(data.describe)

In [None]:
data.head()

In [None]:
from sklearn.model_selection import train_test_split

data = data.reset_index(drop=True)
data['year'] = None
for i in range(len(data)):
    data.at[i,'year'] = data.at[i,'Date'].year

train, _ = train_test_split(data, test_size = (1-10000/len(data)), stratify=data['Label'], random_state=1)
train.drop('year', axis=1, inplace=True)
train = train.reset_index(drop=True)

In [None]:
print(train['Symbol'].value_counts())

In [None]:
y = train['Label'].values
train = train.drop(columns=['Label', 'close_change_pct'])
print(train.dtypes)

In [None]:
sym = train['Symbol'].values
dt = train['Date'].values
train = train.drop(columns=['Symbol', 'Date'])

standard_scaler = StandardScaler()
input = standard_scaler.fit_transform(train.values)

In [None]:
print(input)

In [None]:
layer1 = BernoulliRBM(n_components=100, learning_rate=0.3, n_iter=1500, verbose = True)
l1 = layer1.fit_transform(input)
layer2 = BernoulliRBM(n_components=100, learning_rate=0.3, n_iter=1500, verbose = True)
l2 = layer2.fit_transform(l1)
layer3 = BernoulliRBM(n_components=40, learning_rate=0.3, n_iter=1500, verbose = True)
l3 = layer3.fit_transform(l2)

In [None]:
l3.shape

In [None]:
x = []
for i in range(l3.shape[1]):
    x.append(f"x_{i+1}")

l3_df = pd.DataFrame(l3, columns=x)
l3_df['y'] = y
l3_df['x_41'] = sym

filtered_data = l3_df[l3_df['y'] != 2].reset_index(drop=True)
l3_df.head()