In [1]:
import csv

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow import keras

#### Load the dataset

In [5]:
file_path = "data/AAPL.csv"
raw_data = tf.data.experimental.make_csv_dataset(
    file_path,
    batch_size=1,
    column_names=["Date", "Low", "Open", "Volume", "High", "Close", "Adjusted Close"],
    column_defaults=[tf.string, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32],
    header=True,
    shuffle=False
)

In [6]:
# Function to process rows
def process_row(row):
    # Extract the "Close" column as the target and normalize the features
    target = row["Close"]
    features = tf.stack([row["Low"], row["Open"], row["High"], row["Volume"], row["Adjusted Close"]], axis=0)
    features = tf.math.divide_no_nan(features - tf.reduce_mean(features), tf.math.reduce_std(features))
    return features, target

In [7]:
# Apply processing
processed_data = raw_data.map(process_row)

In [8]:
# Create sequences with a lookback window
def create_sequences(data, lookback=30):
    sequences = []
    targets = []
    buffer = []
    buffer_targets = []
    for features, target in data:
        buffer.append(features)
        buffer_targets.append(target)
        if len(buffer) >= lookback:
            sequences.append(tf.stack(buffer[-lookback:]))
            targets.append(buffer_targets[-1])
    return tf.data.Dataset.from_tensor_slices((sequences, targets))

In [9]:
lookback_window = 30
processed_data = create_sequences(processed_data, lookback=lookback_window)

# Split data into training, validation, and test sets
total_samples = sum(1 for _ in processed_data)
train_size = int(total_samples * 0.7)
val_size = int(total_samples * 0.2)

train_data = processed_data.take(train_size).batch(32).cache().shuffle(1000).prefetch(tf.data.AUTOTUNE)
validation_data = processed_data.skip(train_size).take(val_size).batch(32).cache().prefetch(tf.data.AUTOTUNE)
test_data = processed_data.skip(train_size + val_size).batch(32).cache().prefetch(tf.data.AUTOTUNE)

KeyboardInterrupt: 