In [29]:
#import libraries
import pandas as pd
import numpy as np
import yfinance as yf
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import datetime
import warnings
warnings.filterwarnings("ignore")


symbol_to_fetch = 'AAPL'
start_date = '2020-01-01'
end_date = '2024-05-01'

def fetch_ticker_data(symbol, start_date, end_date):
    """Fetches stock data for a given symbol using yfinance."""
    ticker = yf.Ticker(symbol)
    data = ticker.history(start='1980-01-01', end=end_date)
    return data

def label_data(data):
    # Calculate the percentage change in price from one day to the next
    data['Percentage Change'] = data['Close'].pct_change()
    data['Percentage Change'] = data['Percentage Change'].shift(-1)
    # data['Sentiment'] = pd.Series(np.where(data['Percentage Change'] > 0.025, 1, np.where(data['Percentage Change'] < -0.025, -1, 0)), index=data.index)
    # data['perc_change'] = data['Percentage Change']
    # # Drop any rows with missing values
    # data.dropna(inplace=True)
    # data.drop('Percentage Change',axis=1 , inplace=True)
    return data
stock = fetch_ticker_data(symbol_to_fetch, start_date, end_date)

# Calculate deltas, moving averages, and Bollinger Bands
for i in range(1, 30):
    stock[f"open_delta_{i}day"] = stock["Open"].diff(periods=i)
    stock[f"high_delta_{i}day"] = stock["High"].diff(periods=i)
    stock[f"low_delta_{i}day"] = stock["Low"].diff(periods=i)
    stock[f"close_delta_{i}day"] = stock["Close"].diff(periods=i)
    stock[f"rolling_mean_open_{i}day"] = stock["Open"].rolling(window=i).mean()
    stock[f"rolling_mean_high_{i}day"] = stock["High"].rolling(window=i).mean()
    stock[f"rolling_mean_low_{i}day"] = stock["Low"].rolling(window=i).mean()
    stock[f"rolling_mean_close_{i}day"] = stock["Close"].rolling(window=i).mean()
    stock[f"rolling_std_open_{i}day"] = stock["Open"].rolling(window=i).std()
    stock[f"rolling_std_high_{i}day"] = stock["High"].rolling(window=i).std()
    stock[f"rolling_std_low_{i}day"] = stock["Low"].rolling(window=i).std()
    stock[f"rolling_std_close_{i}day"] = stock["Close"].rolling(window=i).std()

stock['fast_ma'] = stock['Close'].rolling(window=20).mean()
stock['slow_ma'] = stock['Close'].rolling(window=50).mean()
stock['bollinger_high'] = stock['Close'].rolling(window=20).mean() + (2 * stock['Close'].rolling(window=20).std())
stock['bollinger_low'] = stock['Close'].rolling(window=20).mean() - (2 * stock['Close'].rolling(window=20).std())
stock['ema'] = stock['Close'].ewm(span=20, adjust=False).mean()
stock['envelope_high'] = stock['Close'].rolling(window=20).mean() * (1 + 0.05)
stock['envelope_low'] = stock['Close'].rolling(window=20).mean() * (1 - 0.05)
stock['macd_line'] = stock['Close'].ewm(span=12, adjust=False).mean() - stock['Close'].ewm(span=26, adjust=False).mean()
stock['macd_signal'] = stock['macd_line'].ewm(span=9, adjust=False).mean()

# RSI calculation
def calculate_rsi(data, rsi_period):
    delta = data['Close'].diff().dropna()
    gain = delta.where(delta > 0, 0).dropna()
    loss = -delta.where(delta < 0, 0).dropna()
    avg_gain = gain.rolling(window=rsi_period).mean()
    avg_loss = loss.rolling(window=rsi_period).mean()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

stock['rsi'] = calculate_rsi(stock, 14)

# # Stochastic Oscillator calculation
# def calculate_stochastic(data, k_window, d_window):
#     high_low = data[['High', 'Low']]
#     c = data['Close']
#     highest = high_low.rolling(window=k_window).max()
#     lowest = high_low.rolling(window=k_window).min()
#     print(((c - lowest) / (highest - lowest)) * 100)
#     stochastic_k = ((c - lowest) / (highest - lowest)) * 100
#     stochastic_d = stochastic_k.rolling(window=d_window).mean()
#     return stochastic_k, stochastic_d
# stock['stochastic_k'], stock['stochastic_d'] = calculate_stochastic(stock, 14, 3)

# stock['stochastic_k']= calculate_stochastic(stock, 14, 3)[0]
# stock['stochastic_d']= calculate_stochastic(stock, 14, 3)[1]
stock['day'] = pd.to_datetime(stock.index).day
stock['month'] = pd.to_datetime(stock.index).month
stock['year'] = pd.to_datetime(stock.index).year
stock['weekday'] = pd.to_datetime(stock.index).weekday
stock['dayofyear'] = pd.to_datetime(stock.index).dayofyear
stock = stock.fillna(method="ffill", axis=0)
stock = stock.fillna(method="bfill", axis=0)
stock.index = stock.index.date
# Split the data into training and test sets
train_data_index = np.searchsorted(stock.index.values, np.datetime64(start_date))
train_data = stock.iloc[:train_data_index]
test_data = stock.iloc[train_data_index:]
train_data = label_data(train_data)
test_data = label_data(test_data)
train_data.fillna(method='ffill',axis = 0, inplace=True)
test_data.fillna(method='ffill',axis = 0, inplace=True)

#trian & test data
X_train_data = train_data.iloc[:,:-1]
y_train_data = train_data.iloc[:,-1]
X_test_data = test_data.iloc[:,:-1]
y_test_data = test_data.iloc[:,-1]
print(len(X_test_data))
# Normalize the data
normalizer = MinMaxScaler()
X_train_data_normalizer = normalizer.fit_transform(X_train_data)
X_test_data_normalizer = normalizer.transform(X_test_data)

# # # Reshape X_train_data_normalizer
X_train_reshaped = X_train_data_normalizer.reshape(X_train_data_normalizer.shape[0], X_train_data_normalizer.shape[1], 1)
X_test_reshaped = X_test_data_normalizer.reshape(X_test_data_normalizer.shape[0], X_test_data_normalizer.shape[1], 1)
data = tf.cast(X_train_data_normalizer,tf.float32)
targets = tf.cast(y_train_data,tf.float32)
sample_length = 22
input_dataset = tf.keras.utils.timeseries_dataset_from_array(data,targets, sequence_length=sample_length,batch_size=256, sequence_stride=sample_length)
# target_dataset = tf.keras.utils.timeseries_dataset_from_array(targets, None, sequence_length=6,batch_size=256, sequence_stride=sample_length)


1089


In [33]:
input_dataset = tf.keras.utils.timeseries_dataset_from_array(data,targets, sequence_length=sample_length,batch_size=256, sequence_stride=sample_length)


In [36]:
input_dataset.

TypeError: '_BatchDataset' object is not subscriptable

In [27]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define the LSTM model
lstm_model = Sequential()
lstm_model.add(LSTM(256, input_shape=(input_dataset.shape[1], 370), return_sequences=True))
lstm_model.add(Dropout(0.3))
lstm_model.add(LSTM(128, return_sequences=True))
lstm_model.add(Dropout(0.3))
lstm_model.add(LSTM(64, return_sequences=True))
lstm_model.add(Dropout(0.3))
lstm_model.add(LSTM(32, return_sequences=True))
lstm_model.add(Dropout(0.3))
lstm_model.add(LSTM(16, return_sequences=True))
lstm_model.add(Dropout(0.3))
lstm_model.add(LSTM(8, return_sequences=True))
lstm_model.add(Dropout(0.3))
lstm_model.add(LSTM(4, return_sequences=True))
lstm_model.add(Dropout(0.3))
lstm_model.add(Dense(64, activation='relu'))
lstm_model.add(Dropout(0.3))
lstm_model.add(Dense(1))

# Compile the model
lstm_model.compile(loss='mean_squared_error', optimizer='adam')

# Define early stopping to prevent overfitting
early_stopping = EarlyStopping(patience=10, monitor='val_loss', mode='min', restore_best_weights=True)

# Define model checkpoint to save the best model during training
model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss', mode='min')

# Reshape the training data to have a sequence of values at each time step
X_train_sequences = []
y_train_sequences = []
for i in range(X_train.shape[0] - X_train.shape[1]):
    X_train_sequences.append(X_train[i:i+X_train.shape[1], :, :])
    y_train_sequences.append(y_train_data[i+X_train.shape[1]])
X_train_sequences = np.array(X_train_sequences)
y_train_sequences = np.array(y_train_sequences)

# Split the training data into training and validation sets
train_size = int(0.8 * X_train_sequences.shape[0])
val_size = X_train_sequences.shape[0] - train_size
X_train_seq, X_val_seq = X_train_sequences[:train_size], X_train_sequences[train_size:]
y_train_seq, y_val_seq = y_train_sequences[:train_size], y_train_sequences[train_size:]

# Train the model on the training data
history = lstm_model.fit(X_train_seq, y_train_seq, epochs=100, batch_size=64, validation_data=(X_val_seq, y_val_seq), callbacks=[early_stopping, model_checkpoint])

# Load the best saved model
best_model = tf.keras.models.load_model('best_model.keras')

# Predict labels for the test set using the best model
y_pred_probs = best_model.predict(X_test)
y_pred = y_pred_probs.flatten()

# Inverse transform the predictions to get the actual predicted values
y_pred_inverse_transformed = normalizer.inverse_transform(y_pred.reshape(-1, 1))

# Calculate the root mean squared error (RMSE) between the actual and predicted values
rmse = np.sqrt(mean_squared_error(y_test_data, y_pred_inverse_transformed))
print("RMSE:", rmse)

# Plot the predictions
plt.figure(figsize=(12, 6))
plt.plot(y_test_data, label='Actual')
plt.plot(y_pred_inverse_transformed, label='Predicted')
plt.legend()
plt.show()


MemoryError: Unable to allocate 3.87 GiB for an array with shape (7581, 370, 370, 1) and data type float32

In [3]:
from transformers import TimeSeriesTransformerConfig, TimeSeriesTransformerModel

# Initializing a Time Series Transformer configuration with 12 time steps for prediction
configuration = TimeSeriesTransformerConfig(prediction_length=12)

# Randomly initializing a model (with random weights) from the configuration
model = TimeSeriesTransformerModel(configuration)

# Accessing the model configuration
configuration = model.config

In [4]:
from huggingface_hub import hf_hub_download
import torch
from transformers import TimeSeriesTransformerModel

file = hf_hub_download(
    repo_id="hf-internal-testing/tourism-monthly-batch", filename="train-batch.pt", repo_type="dataset"
)
batch = torch.load(file)

model = TimeSeriesTransformerModel.from_pretrained("huggingface/time-series-transformer-tourism-monthly")

# during training, one provides both past and future values
# as well as possible additional features
outputs = model(
    past_values=batch["past_values"],
    past_time_features=batch["past_time_features"],
    past_observed_mask=batch["past_observed_mask"],
    static_categorical_features=batch["static_categorical_features"],
    static_real_features=batch["static_real_features"],
    future_values=batch["future_values"],
    future_time_features=batch["future_time_features"],
)

last_hidden_state = outputs.last_hidden_state

In [6]:
from huggingface_hub import hf_hub_download
import torch
from transformers import TimeSeriesTransformerForPrediction

file = hf_hub_download(
    repo_id="hf-internal-testing/tourism-monthly-batch", filename="train-batch.pt", repo_type="dataset"
)
batch = torch.load(file)

model = TimeSeriesTransformerForPrediction.from_pretrained(
    "huggingface/time-series-transformer-tourism-monthly"
)

# during training, one provides both past and future values
# as well as possible additional features
outputs = model(
    past_values=batch["past_values"],
    past_time_features=batch["past_time_features"],
    past_observed_mask=batch["past_observed_mask"],
    static_categorical_features=batch["static_categorical_features"],
    static_real_features=batch["static_real_features"],
    future_values=batch["future_values"],
    future_time_features=batch["future_time_features"],
)

loss = outputs.loss
loss.backward()

# during inference, one only provides past values
# as well as possible additional features
# the model autoregressively generates future values
outputs = model.generate(
    past_values=batch["past_values"],
    past_time_features=batch["past_time_features"],
    past_observed_mask=batch["past_observed_mask"],
    static_categorical_features=batch["static_categorical_features"],
    static_real_features=batch["static_real_features"],
    future_time_features=batch["future_time_features"],
)

mean_prediction = outputs.sequences.mean(dim=1)