<a href="https://colab.research.google.com/github/tennissta99660/Linear_regression_apple_stock/blob/main/Linear_regression_apple_stock.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# load the dataset
data_path = "Apple_stock_data_10years.csv"
apple_data = pd.read_csv(data_path)

# select features/column and target
X = apple_data[['open', 'high', 'low', 'volume']].values
y = apple_data[['close']].values

# scale the features to range [0, 1], each column is being scaled to [0,1] with highest to 1 and lowest to 0 and everything else between.
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# initialize random weights and bias
num_features = X_train.shape[1]
weights = {
    'W': np.random.randn(num_features, 1),
    'B': np.random.randn(1, 1)
}

def forward_linear_regression(X_batch: np.ndarray, y_batch: np.ndarray, weights: dict) -> tuple:
    N = np.dot(X_batch, weights['W'])
    P = N + weights['B']
    loss = np.mean(np.power(y_batch - P, 2))
    forward_info = {'X': X_batch, 'N': N, 'P': P, 'y': y_batch}
    return loss, forward_info

def loss_gradients(forward_info: dict, weights: dict) -> dict:
    batch_size = forward_info['X'].shape[0]
    dLdP = -2 * (forward_info['y'] - forward_info['P'])
    dLdW = np.dot(forward_info['X'].T, dLdP) / batch_size
    dLdB = np.sum(dLdP) / batch_size
    loss_grads = {'W': dLdW, 'B': dLdB}
    return loss_grads

# training parameters
learning_rate = 0.001
batch_size = 32
num_epochs = 1000
num_samples = X_train.shape[0]

# training loop
for epoch in range(num_epochs):
    indices = np.random.permutation(num_samples)
    X_shuffled, y_shuffled = X_train[indices], y_train[indices]

    for i in range(0, num_samples, batch_size):
        X_batch = X_shuffled[i:i + batch_size]
        y_batch = y_shuffled[i:i + batch_size]

        # forward pass
        loss, forward_info = forward_linear_regression(X_batch, y_batch, weights)

        # backward pass
        loss_grads = loss_gradients(forward_info, weights)

        # update weights
        for key in weights.keys():
            weights[key] -= learning_rate * loss_grads[key]

    # printng loss occasionaly
    if epoch % 100 == 0:
        print(f'Epoch {epoch}, Loss: {loss:.4f}')

# learned weights and bias
print("Trained Weights:", weights['W'].flatten())
print("Trained Bias:", weights['B'][0][0])


Epoch 0, Loss: 10443.7396
Epoch 100, Loss: 36.1153
Epoch 200, Loss: 5.3681
Epoch 300, Loss: 2.9662
Epoch 400, Loss: 3.6499
Epoch 500, Loss: 2.6566
Epoch 600, Loss: 1.1115
Epoch 700, Loss: 1.9118
Epoch 800, Loss: 1.1863
Epoch 900, Loss: 1.5255
Trained Weights: [69.36920752 70.43892782 69.70180016 -6.84185706]
Trained Bias: 21.333502120670182


In [5]:
#  model on the test set
test_loss, test_info = forward_linear_regression(X_test, y_test, weights)

# compute MSE and MAE
y_pred = test_info['P']
mse = np.mean(np.square(y_test - y_pred))
mae = np.mean(np.abs(y_test - y_pred))

print(f'Test MSE: {mse:.4f}')
print(f'Test MAE: {mae:.4f}')


Test MSE: 1.0353
Test MAE: 0.7506
