In [1]:
!pip install yfinance



In [3]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta

# Define the tickers for the indexes
tickers = ['^GSPC', '^DJI', '^IXIC']  # S&P 500, Dow Jones, NASDAQ

# Define the start date as 10 years ago from today
start_date = (datetime.now() - timedelta(days=10*365)).strftime('%Y-%m-%d')

# Initialize a dictionary to store the data for each index
data_dict = {}

# Download the data for each index and store it in the dictionary
for ticker in tickers:
    data_dict[ticker] = yf.download(ticker, start=start_date)

# Save the data to an Excel file, with each index in a different sheet
with pd.ExcelWriter('index_data.xlsx') as writer:
    for ticker, data in data_dict.items():
        data.to_excel(writer, sheet_name=ticker)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [4]:
# Import necessary libraries
import pandas as pd

# Define the tickers for the indexes
tickers = ['^GSPC', '^DJI', '^IXIC']  # S&P 500, Dow Jones, NASDAQ

# Initialize a dictionary to store the data for each index
data_dict = {}

# Read the data for each index from the Excel file and store it in the dictionary
with pd.ExcelFile('index_data.xlsx') as xls:
    for ticker in tickers:
        data_dict[ticker] = pd.read_excel(xls, sheet_name=ticker, index_col=0)

# Now, data_dict contains a DataFrame for each index

In [7]:
data_dict['^GSPC']

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-06-17,1630.640015,1646.500000,1630.339966,1639.040039,1639.040039,3137080000
2013-06-18,1639.770020,1654.189941,1639.770020,1651.810059,1651.810059,3120980000
2013-06-19,1651.829956,1652.449951,1628.910034,1628.930054,1628.930054,3545060000
2013-06-20,1624.619995,1624.619995,1584.319946,1588.189941,1588.189941,4858850000
2013-06-21,1588.619995,1599.189941,1577.699951,1592.430054,1592.430054,5797280000
...,...,...,...,...,...,...
2023-06-07,4285.470215,4299.189941,4263.959961,4267.520020,4267.520020,4537800000
2023-06-08,4268.689941,4298.009766,4261.069824,4293.930176,4293.930176,3826740000
2023-06-09,4304.879883,4322.620117,4291.700195,4298.859863,4298.859863,3786510000
2023-06-12,4308.319824,4340.129883,4304.370117,4338.930176,4338.930176,3945670000


In [8]:
import numpy as np

# Define a function to calculate the DC events
def calculate_dc_events(prices, threshold):
     # Initialize the DC events series with the first price
    dc_events = [pd.Series(prices.iloc[0], index=[prices.index[0]])]

    # Initialize the last event price to the first price
    last_event_price = prices.iloc[0]

    # For each price in the series
    for i in range(1, len(prices)):
        # If the price change since the last event is greater than the threshold
        if np.abs(prices.iloc[i] - last_event_price) > threshold:
            # Record a new event
            dc_events.append(pd.Series(prices.iloc[i], index=[prices.index[i]]))
            # Update the last event price
            last_event_price = prices.iloc[i]

    return pd.concat(dc_events)

# Apply the DC method to the closing prices of each index
dc_dict = {}
for ticker, data in data_dict.items():
    # Calculate the threshold as the standard deviation of the price changes
    threshold = data['Close'].diff().std()
    # Calculate the DC events
    dc_dict[ticker] = calculate_dc_events(data['Close'], threshold)

In [22]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta

# Define the tickers for the indexes
tickers = ['^GSPC', '^DJI', '^IXIC']  # S&P 500, Dow Jones, NASDAQ

# Define the start date as 60 days ago from today
start_date = (datetime.now() - timedelta(days=729)).strftime('%Y-%m-%d')

# Initialize a dictionary to store the data for each index
data_dict = {}

# Download the data for each index and store it in the dictionary
for ticker in tickers:
    data_dict[ticker] = yf.download(ticker, start=start_date, interval='60m')

# Save the data to an Excel file, with each index in a different sheet
with pd.ExcelWriter('index_data_2.xlsx') as writer:
    for ticker, data in data_dict.items():
        data.to_excel(writer, sheet_name=ticker)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [11]:
dc_dict['^GSPC']
snp_dc_events = dc_dict['^GSPC']

In [12]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Define a function to create sequences of previous prices
def create_sequences(data, seq_length):
    xs = []
    ys = []

    for i in range(len(data) - seq_length - 1):
        x = data[i:(i + seq_length)]
        y = data[i + seq_length] > data[i + seq_length - 1]
        xs.append(x)
        ys.append(y)

    return np.array(xs), np.array(ys)

# Define the sequence length and create the sequences
seq_length = 10
X, y = create_sequences(snp_dc_events.values, seq_length)

# Normalize the input data
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape the input data to the format required by LSTM layers
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# Define the RNN model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(50))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

2023-06-13 15:19:16.673796: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-13 15:19:27.216666: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f78aee29f70>

In [14]:
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)

print(f'Test loss: {loss:.4f}')
print(f'Test accuracy: {accuracy:.4f}')


Test loss: 0.6663
Test accuracy: 0.6190
