In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

ModuleNotFoundError: No module named 'matplotlib'

In [None]:
# 2. Load & Preprocess Data
df = pd.read_csv('starbucks_open_7year.csv')
df.ffill(inplace=True)

FileNotFoundError: [Errno 2] No such file or directory: 'starbucks_open_7year.csv'

In [None]:
# Smooth the 'Open' column using moving average
df['Smoothed_Open'] = df['Open'].rolling(window=5).mean().fillna(df['Open'])

In [None]:
#Convert 'Date' with mixed format handling
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True, errors='coerce')
df.dropna(subset=['Date'], inplace=True)
df.set_index('Date', inplace=True)

In [None]:
#Plot smoothed prices
plt.figure(figsize=(12, 6))
plt.plot(df['Smoothed_Open'], label='Smoothed Open Price', color='green')
plt.title('Starbucks Open Price Over Time')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

In [None]:
# 3. Scale the data
scaler = MinMaxScaler()
scaled = scaler.fit_transform(df[['Smoothed_Open']])

In [None]:
# 4. Create lag features (3D) with window size 60
def create_lag_features_3d(data, window=60):
    X, y = [], []
    for i in range(window, len(data)):
        X.append(data[i - window:i])
        y.append(data[i])
    X = np.array(X)
    y = np.array(y)
    return X.reshape((X.shape[0], X.shape[1], 1)), y.reshape(-1)

X, y = create_lag_features_3d(scaled, window=60)

In [None]:
# 5. Train/Validation/Test split
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, shuffle=False)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle=False)

print("Training Data", X_train.shape, y_train.shape)
print("Validation Data", X_val.shape, y_val.shape)
print("Testing Data", X_test.shape, y_test.shape)

In [None]:
# 6. Flatten X for DNN model (DNNs need 2D input)
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_val_flat = X_val.reshape(X_val.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

In [None]:
# 7. Build the DNN Regression Model
model = Sequential([
    Dense(512, activation='relu', input_shape=(X_train_flat.shape[1],)),
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1)
])
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
model.summary()

In [None]:
# 8. Train the model
history = model.fit(
    X_train_flat, y_train,
    validation_data=(X_val_flat, y_val),
    epochs=500,
    batch_size=16,
    verbose=1
)

In [None]:
# 9. Predict & Evaluate on Scaled Test Set
y_pred_scaled = model.predict(X_test_flat)

mae = mean_absolute_error(y_test, y_pred_scaled)
mse = mean_squared_error(y_test, y_pred_scaled)
r2 = r2_score(y_test, y_pred_scaled)
mape = mean_absolute_percentage_error(y_test, y_pred_scaled)

print("\n[Scaled Evaluation]")
print(f"MAE:  {mae:.4f}")
print(f"MSE:  {mse:.4f}")
print(f"R²:   {r2:.4f}")
print(f"MAPE: {mape:.4f}")

In [None]:
# 10. Inverse Transform to Actual Prices
y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
y_pred_actual = scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()

mae_act = mean_absolute_error(y_test_actual, y_pred_actual)
mse_act = mean_squared_error(y_test_actual, y_pred_actual)
r2_act = r2_score(y_test_actual, y_pred_actual)
mape_act = mean_absolute_percentage_error(y_test_actual, y_pred_actual)

print("\n[Actual Price Evaluation]")
print(f"MAE:  {mae_act:.4f}")
print(f"MSE:  {mse_act:.4f}")
print(f"R²:   {r2_act:.4f}")
print(f"MAPE: {mape_act:.4f}")

In [None]:
# 11. Plot Predictions
plt.figure(figsize=(12, 6))
plt.plot(y_test_actual, label='Actual Price', color='blue')
plt.plot(y_pred_actual, label='Predicted Price', color='orange')
plt.title('Starbucks Price Prediction')
plt.xlabel('Days')
plt.ylabel('Price')
plt.legend()
plt.tight_layout()
plt.show()