In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, DataLoader

In [3]:
# Read in the CSV data
df = pd.read_csv('SPY-daily-2022-2023.csv')
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2022-03-15,419.769989,426.839996,418.420013,426.170013,419.366486,106219100
1,2022-03-16,429.890015,435.679993,424.799988,435.619995,428.665619,144954800
2,2022-03-17,433.589996,441.070007,433.190002,441.070007,434.028625,102676900
3,2022-03-18,438.000000,444.859985,437.220001,444.519989,438.782471,106345500
4,2022-03-21,444.339996,446.459991,440.679993,444.390015,438.654144,88349800
...,...,...,...,...,...,...,...
247,2023-03-09,399.739990,401.480011,390.529999,391.559998,391.559998,111945300
248,2023-03-10,390.989990,393.160004,384.320007,385.910004,385.910004,189105300
249,2023-03-13,381.809998,390.390015,380.649994,385.359985,385.359985,157790000
250,2023-03-14,390.500000,393.450012,387.049988,391.730011,391.730011,149504500


In [None]:
import matplotlib.pyplot as plt

# Convert the 'Date' column to datetime objects
df['Date'] = pd.to_datetime(df['Date'])

# Plot the 'Open' column over time
plt.plot(df['Date'], df['Open'])
plt.xlabel('Date')
plt.ylabel('Open')

plt.title('Stock Price Over Time')
plt.show()

Alternatively, you can use Seaborn to create more complex plots with less code. For example, the following code will create a pairplot showing the relationships between the different columns in the data:

In [None]:
import seaborn as sns

# Convert the 'Date' column to datetime objects
df['Date'] = pd.to_datetime(df['Date'])

# Create a pairplot
sns.pairplot(df)
plt.show()

Pre-process and cleaning data
Next, we will pre-process the data by cleaning and normalizing it:


In [None]:
# Convert the 'Date' column to datetime objects
df['Date'] = pd.to_datetime(df['Date'])

# Extract the year, month, and day as separate columns
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day

# Drop the original 'Date' column
df = df.drop(columns=['Date'])


# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
df_scaled = scaler.fit_transform(df)

# Split the data into training and testing sets
train_size = int(len(df_scaled) * 0.8)
test_size = len(df_scaled) - train_size
train, test = df_scaled[0:train_size,:], df_scaled[train_size:len(df_scaled),:]

# Convert the data into a 3D array (a sequence with t timesteps and d dimensions)
def create_sequences(data, t, d):
    X, y = [], []
    for i in range(len(data)-t-1):
        a = data[i:(i+t), :]
        X.append(a)
        y.append(data[i + t, :])
    return np.array(X), np.array(y)

# Create sequences of t timesteps with d dimensions
t = 10 # timesteps
d = 9 # dimensions (including year, month, and day)
X_train, y_train = create_sequences(train, t, d)
X_test, y_test = create_sequences(test, t, d)
Building model
Then, we will build and train the deep learning model:

# Build the model
model = Sequential()
model.add(LSTM(50, input_shape=(t, d)))
model.add(Dense(d))
model.compile(loss='mean_squared_error', optimizer='adam')

# Train the model
history = model.fit(X_train,
                    y_train,
                    epochs=50,
                    batch_size=1,
                    verbose=1
                   )

To visualize the accuracy and loss of a deep learning model, we can use various methods such as:

Plotting the accuracy and loss on the same graph using Matplotlib:

In [None]:
import matplotlib.pyplot as plt

# Plot loss on the same graph

plt.plot(history.history['loss'])
plt.title('Model Loss')
plt.ylabel('Accuracy/Loss')
plt.xlabel('Epoch')
plt.legend(['Loss'], loc='upper left')
plt.show()

Testing accuracy of model
Finally, we will test the model and make predictions:

In [None]:
# Test the model
test_error = model.evaluate(X_test, y_test, verbose=2)
print(f'Test error: {test_error}')
print(f'Accuracy: {(1-test_error) * 100}%')