In [15]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

file_path = "../starbucks_open_7year.csv"  
data = pd.read_csv(file_path)

print(data.head())

data['Date'] = pd.to_datetime(data['Date'])
print(data['Date'][0:5])
data = data.sort_values(by='Date').reset_index(drop=True)

X = data['Time'].values.reshape(-1, 1).astype(float)
y = data['Open'].values.reshape(-1, 1).astype(float)

X_min, X_max = X.min(), X.max()
y_min, y_max = y.min(), y.max()

X_normalized = (X - X_min) / (X_max - X_min)
y_normalized = (y - y_min) / (y_max - y_min)

split_index = int(len(X_normalized) * 0.8)
X_train, X_test = X_normalized[:split_index], X_normalized[split_index:]
y_train, y_test = y_normalized[:split_index], y_normalized[split_index:]

   Unnamed: 0        Date   Open          Time
0           0  11/13/2017  56.81  1.510531e+09
1           1  11/14/2017  56.47  1.510618e+09
2           2  11/15/2017  56.82  1.510704e+09
3           3  11/16/2017  56.92  1.510790e+09
4           4  11/17/2017  57.24  1.510877e+09
0   2017-11-13
1   2017-11-14
2   2017-11-15
3   2017-11-16
4   2017-11-17
Name: Date, dtype: datetime64[ns]


In [6]:
model = Sequential([
    Dense(64, activation='relu', input_dim=1),
    Dense(32, activation='relu'),
    Dense(1) 
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

history = model.fit(X_train, y_train, 
                    epochs=50, batch_size=32, 
                    validation_data=(X_test, y_test),
                    verbose=1)


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2715 - mae: 0.4342 - val_loss: 0.0906 - val_mae: 0.2773
Epoch 2/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 989us/step - loss: 0.0308 - mae: 0.1481 - val_loss: 0.0737 - val_mae: 0.2449
Epoch 3/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 781us/step - loss: 0.0284 - mae: 0.1407 - val_loss: 0.0825 - val_mae: 0.2617
Epoch 4/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 805us/step - loss: 0.0289 - mae: 0.1413 - val_loss: 0.0953 - val_mae: 0.2846
Epoch 5/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 811us/step - loss: 0.0268 - mae: 0.1361 - val_loss: 0.0783 - val_mae: 0.2531
Epoch 6/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 817us/step - loss: 0.0260 - mae: 0.1326 - val_loss: 0.0958 - val_mae: 0.2854
Epoch 7/50
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 827us/step - loss: 0.0254 

In [None]:
loss, mae = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}, Test MAE: {mae}")

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0270 - mae: 0.1289




Test Loss: 0.04102861508727074, Test MAE: 0.16966617107391357


## Conclusion

For this project, my individual contribution focused on building a neural network model that processes the time series data by treating each time instance independently. To achieve this, I used a feedforward neural network (FNN), where each timestamp was treated as an isolated data point, disregarding any temporal dependencies.

The process began with data preprocessing, where I converted the Date column into datetime format and sorted the data chronologically to maintain consistency:

In [None]:
data['Date'] = pd.to_datetime(data['Date'])
data = data.sort_values(by='Date').reset_index(drop=True)

I then extracted the Time column as the feature and Open prices as the target, reshaping and normalizing the data to improve model stability during training.

For the model, I built a simple sequential neural network with two hidden layers:

In [None]:
model = Sequential([
    Dense(64, activation='relu', input_dim=1),
    Dense(32, activation='relu'),
    Dense(1)
])

I compiled the model using the Adam optimizer and trained it over 50 epochs, evaluating its performance using Mean Absolute Error (MAE). The final test loss and MAE metrics provided a clear indication of the model's accuracy on unseen data.

## Reflections on my work

Through this task, I gained practical experience in preprocessing time series data, applying normalization techniques, and building a basic neural network for regression tasks using Keras. By splitting the data into training and test sets, I ensured the model was evaluated on unseen data, providing a realistic measure of its generalization ability.

While this approach does not capture the temporal dependencies inherent in time series data, it serves as a baseline model for comparison with more complex architectures. The expected outcome was to create a straightforward yet effective model that establishes a reference point for evaluating the performance of time series-specific models later in the project.