In [108]:
import yfinance as yf
import pandas as pd
import pandas_ta as ta
import numpy as np
from functools import reduce
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [79]:
tickers = ["AMJ","XLY","XLB","^GSPC","^DJI","^IXIC","^NYA","^RUT"]
start_date = "2018-08-19"
end_date = "2023-09-30"
etf_data = {}
for ticker in tickers:
    data = yf.download(ticker, start=start_date, end=end_date)
    
    etf_data[ticker] = data

    
# ETF data frames
amj_df = etf_data["AMJ"]
xly_df = etf_data["XLY"]
xlb_df = etf_data["XLB"]

# Index data frames
gspc_df = etf_data["^GSPC"]
dji_df = etf_data["^DJI"]
ixic_df = etf_data["^IXIC"]
nya_df = etf_data["^NYA"]
rut_df = etf_data["^RUT"]

dji_df = dji_df.rename(columns={'Adj Close': 'DJI_Adj Close', 'Close': 'DJI_Close', 'High': 'DJI_High', 'Low': 'DJI_Low', 'Open': 'DJI_Open','Volume': 'DJI_Volume'})
gspc_df = gspc_df.rename(columns={'Adj Close': 'GSPC_Adj Close', 'Close': 'GSPC_Close', 'High': 'GSPC_High', 'Low': 'GSPC_Low', 'Open': 'GSPC_Open','Volume': 'GSPC_Volume'})
ixic_df = ixic_df.rename(columns={'Adj Close': 'IXIC_Adj Close', 'Close': 'IXIC_Close', 'High': 'IXIC_High', 'Low': 'IXIC_Low', 'Open': 'IXIC_Open','Volume': 'IXIC_Volume'})
nya_df = nya_df.rename(columns={'Adj Close': 'NYA_Adj Close', 'Close': 'NYA_Close', 'High': 'NYA_High', 'Low': 'NYA_Low', 'Open': 'NYA_Open','Volume': 'NYA_Volume'})
rut_df = rut_df.rename(columns={'Adj Close': 'RUT_Adj Close', 'Close': 'RUT_Close', 'High': 'RUT_High', 'Low': 'RUT_Low', 'Open': 'RUT_Open','Volume': 'RUT_Volume'})

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [80]:
etf_data

{'AMJ':                  Open       High        Low      Close  Adj Close   Volume
 Date                                                                      
 2018-08-20  29.200001  29.549999  29.190001  29.510000  19.672085  1523300
 2018-08-21  29.510000  29.610001  29.230000  29.230000  19.485430  1050600
 2018-08-22  29.430000  29.559999  28.809999  29.520000  19.678753  1565700
 2018-08-23  29.410000  29.540001  29.320000  29.540001  19.692085   742500
 2018-08-24  29.469999  29.490000  29.260000  29.410000  19.605423  1288600
 ...               ...        ...        ...        ...        ...      ...
 2023-09-25  24.580000  24.870001  24.370001  24.719999  24.719999   710800
 2023-09-26  24.559999  24.600000  24.270000  24.299999  24.299999   181400
 2023-09-27  24.389999  24.660000  24.389999  24.600000  24.600000   384100
 2023-09-28  24.400000  24.879999  24.400000  24.870001  24.870001   604400
 2023-09-29  24.950001  24.950001  24.639999  24.700001  24.700001   275000
 
 [1

In [82]:
def analytical_inferences(etf_data):
    '''
    Function calculates all inferences possible like RSI, EMA, SMA. 
    Input: 1 data frame containing ETF data 
    Output: The data frame with all calculated values for the particular ETF.
    '''
    rsi_period = 14
    # Calculate RSI
    etf_data['RSI'] = ta.rsi(etf_data['Adj Close'], length=rsi_period)

    # Calculate overbought/oversold conditions
    etf_data['Overbought'] = (etf_data['RSI'] > 70).astype(int)
    etf_data['Oversold'] = (etf_data['RSI'] < 30).astype(int)
    
    # Calculate divergence between price and RSI
    etf_data['Price_RSI_Divergence'] = etf_data['Close'].diff() - etf_data['RSI'].diff()
    
    # Calculate rate of change of RSI
    etf_data['ROC_RSI'] = etf_data['RSI'].pct_change() * 100
    
    # Calculate RSI trend confirmation
    etf_data['RSI_Trend_Confirmation'] = (etf_data['RSI'] > etf_data['RSI'].shift(1)).astype(int)
    
    # Assuming 'Close' is the column containing closing prices
    etf_data['EMA'] = ta.ema(etf_data['Close'], length=14)  # Adjust the period as needed
    
    # Feature 1: EMA over a specific period
    # Already calculated and stored in 'EMA' column
    
    # Feature 2: Difference between current price and EMA
    etf_data['Price_EMA_Difference'] = etf_data['Close'] - etf_data['EMA']
    
    # Feature 3: Slope of EMA
    etf_data['Slope_EMA'] = ta.slope(etf_data['EMA'])
    
    # Feature 4: EMA convergence or divergence
    etf_data['EMA_Convergence'] = (etf_data['Close'] > etf_data['EMA']).astype(int)
    etf_data['EMA_Divergence'] = (etf_data['Close'] < etf_data['EMA']).astype(int)
    
    # Feature 5: Rate of change of EMA
    etf_data['ROC_EMA'] = etf_data['EMA'].pct_change() * 100
    
    # Assuming 'Close' is the column containing closing prices
    etf_data['SMA'] = ta.sma(etf_data['Close'], length=14)  # Adjust the period as needed
    
    # Feature 1: SMA over a specific period
    # Already calculated and stored in 'SMA' column
    
    # Feature 2: Difference between current price and SMA
    etf_data['Price_SMA_Difference'] = etf_data['Close'] - etf_data['SMA']
    
    # Feature 3: Slope of SMA
    etf_data['Slope_SMA'] = ta.slope(etf_data['SMA'])
    
    # Feature 4: SMA convergence or divergence
    etf_data['SMA_Convergence'] = (etf_data['Close'] > etf_data['SMA']).astype(int)
    etf_data['SMA_Divergence'] = (etf_data['Close'] < etf_data['SMA']).astype(int)
    
    # Feature 5: Rate of change of SMA
    etf_data['ROC_SMA'] = etf_data['SMA'].pct_change() * 100
    
    dmi = ta.adx(etf_data.High, etf_data.Low, etf_data.Close)
    etf_data['ADX']=dmi['ADX_14']
    etf_data['DMI+']=dmi['DMP_14']
    etf_data['DMI-']=dmi['DMN_14']
    # Calculate ADX trend strength
    etf_data['ADX_Trend_Strength'] = etf_data['ADX'].rolling(window=3).mean()  # Adjust the rolling window parameter
    
    # Calculate DI convergence or divergence
    etf_data['DI_Convergence_Divergence'] = etf_data['DMI+'] - etf_data['DMI-']  # Adjust the length parameter
    return etf_data

In [83]:
amj_df = analytical_inferences(amj_df)
xly_df = analytical_inferences(xly_df)
xlb_df = analytical_inferences(xlb_df)

In [84]:
amj_combined_df = pd.DataFrame(reduce(lambda left, right: pd.merge(left, right, on='Date', how='outer'), [amj_df, dji_df, gspc_df, ixic_df, nya_df, rut_df]))
xly_combined_df = pd.DataFrame(reduce(lambda left, right: pd.merge(left, right, on='Date', how='outer'), [xly_df, dji_df, gspc_df, ixic_df, nya_df, rut_df]))
xlb_combined_df = pd.DataFrame(reduce(lambda left, right: pd.merge(left, right, on='Date', how='outer'), [xlb_df, dji_df, gspc_df, ixic_df, nya_df, rut_df]))

In [85]:
amj_combined_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1287 entries, 2018-08-20 to 2023-09-29
Data columns (total 59 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Open                       1287 non-null   float64
 1   High                       1287 non-null   float64
 2   Low                        1287 non-null   float64
 3   Close                      1287 non-null   float64
 4   Adj Close                  1287 non-null   float64
 5   Volume                     1287 non-null   int64  
 6   RSI                        1273 non-null   float64
 7   Overbought                 1287 non-null   int32  
 8   Oversold                   1287 non-null   int32  
 9   Price_RSI_Divergence       1272 non-null   float64
 10  ROC_RSI                    1272 non-null   float64
 11  RSI_Trend_Confirmation     1287 non-null   int32  
 12  EMA                        1274 non-null   float64
 13  Price_EMA_Difference       127

In [113]:
from sklearn.model_selection import train_test_split

amj_combined_df = amj_combined_df.dropna()
features = amj_combined_df['Adj Close'].values.reshape(-1, 1)
X = amj_combined_df.drop('Adj Close', axis =1)
y = features

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size = 0.2, random_state = 42)


In [114]:
y

array([[19.01957893],
       [18.93143272],
       [19.0670433 ],
       ...,
       [24.60000038],
       [24.87000084],
       [24.70000076]])

In [115]:
def create_dbn():
    
    model = Sequential()
    
    model.add(Dense(units = 100, activation = 'relu', input_dim = X_train.shape[1]))
    model.add(Dense(units = 80, activation = 'relu'))
    model.add(Dense(units = 60, activation = 'relu'))
    
    
    model.add(Dense(units = 1, activation = 'linear'))
    
    return model


    

In [116]:
dbn_model = create_dbn()


dbn_model.compile(optimizer = 'adam', loss = 'mean_squared_error')

dbn_model.fit(X_train, y_train, epochs = 10, batch_size = 32)




loss = dbn_model.evaluate(X_test, y_test)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [117]:
loss

1.2238080501556396

In [118]:
dbn_model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 100)               5900      
                                                                 
 dense_9 (Dense)             (None, 80)                8080      
                                                                 
 dense_10 (Dense)            (None, 60)                4860      
                                                                 
 dense_11 (Dense)            (None, 1)                 61        
                                                                 
Total params: 18901 (73.83 KB)
Trainable params: 18901 (73.83 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [119]:
for layer in dbn_model.layers:
    weights, biases = layer.get_weights()
    print(f"Layer: {layer.name}")
    print("Weights shape:", weights.shape)
    print("Biases shape:", biases.shape)
    print()

Layer: dense_8
Weights shape: (58, 100)
Biases shape: (100,)

Layer: dense_9
Weights shape: (100, 80)
Biases shape: (80,)

Layer: dense_10
Weights shape: (80, 60)
Biases shape: (60,)

Layer: dense_11
Weights shape: (60, 1)
Biases shape: (1,)



In [120]:
predictions = dbn_model.predict(X_test)

predictions = predictions.flatten()
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)



In [122]:
mse

1.2238079611507062

In [123]:
mae

0.8450415342573135

In [124]:
r2

0.9181528359533399