In [69]:
import numpy as np
import pandas as pd

In [70]:
data = pd.read_csv("Smarket.csv")
# data

In [71]:
data['Direction'] = data['Direction'].map({'Up': 1, 'Down': 0})
# data

**Train-Test Split**

In [73]:
split_index = int(len(data) * 0.75)

data = data.sample(frac=1, random_state=42).reset_index(drop=True)

train_data = data.iloc[:split_index]
test_data = data.iloc[split_index:]

In [74]:
X_train = train_data.drop(['Year', 'Direction'], axis=1)
y_train = train_data['Direction']

X_test = test_data.drop(['Year', 'Direction'], axis=1)
y_test = test_data['Direction']

**Feature Scaling**

In [76]:
def min_max_normalization(df, columns):
    normalized_df = df.copy()
    for column in columns:
        col_min = df[column].min()
        col_max = df[column].max()
        normalized_df[column] = (df[column] - col_min) / (col_max - col_min)
    return normalized_df

columns_to_scale = ['Lag1', 'Lag2', 'Lag3', 'Lag4', 'Lag5', 'Volume', 'Today']

X_train = min_max_normalization(X_train, columns_to_scale)
X_test = min_max_normalization(X_test, columns_to_scale)

In [77]:
# print(X_train.describe())
# print(X_test.describe())

In [78]:
import numpy as np

# Initialize weights and bias
num_features = X_train.shape[1]
weights = np.zeros(num_features)
bias = 0
learning_rate = 0.02
num_epochs = 2500


In [79]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))


In [80]:
def compute_loss(y_true, y_pred):
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))


In [81]:
for epoch in range(num_epochs):
    # Compute predictions
    linear_model = np.dot(X_train, weights) + bias
    predictions = sigmoid(linear_model)
    
    # Compute gradients
    dw = np.dot(X_train.T, (predictions - y_train)) / len(y_train)
    db = np.sum(predictions - y_train) / len(y_train)
    
    # Update weights and bias
    weights -= learning_rate * dw
    bias -= learning_rate * db

    # Optional: Print loss every 100 epochs
    if epoch % 100 == 0:
        loss = compute_loss(y_train, predictions)
        print(f"Epoch {epoch}, Loss: {loss:.4f}")


Epoch 0, Loss: 0.6931
Epoch 100, Loss: 0.6900
Epoch 200, Loss: 0.6873
Epoch 300, Loss: 0.6845
Epoch 400, Loss: 0.6819
Epoch 500, Loss: 0.6792
Epoch 600, Loss: 0.6766
Epoch 700, Loss: 0.6740
Epoch 800, Loss: 0.6715
Epoch 900, Loss: 0.6690
Epoch 1000, Loss: 0.6665
Epoch 1100, Loss: 0.6640
Epoch 1200, Loss: 0.6616
Epoch 1300, Loss: 0.6591
Epoch 1400, Loss: 0.6568
Epoch 1500, Loss: 0.6544
Epoch 1600, Loss: 0.6521
Epoch 1700, Loss: 0.6497
Epoch 1800, Loss: 0.6475
Epoch 1900, Loss: 0.6452
Epoch 2000, Loss: 0.6430
Epoch 2100, Loss: 0.6408
Epoch 2200, Loss: 0.6386
Epoch 2300, Loss: 0.6364
Epoch 2400, Loss: 0.6343


In [82]:
y_pred = sigmoid(np.dot(X_test, weights) + bias)
y_pred_labels = (y_pred >= 0.5).astype(int)  # Convert probabilities to binary labels


In [83]:
accuracy = np.mean(y_pred_labels == y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Test Accuracy: 92.33%


In [84]:
lambda_reg = 0.05  # Regularization strength

for epoch in range(num_epochs):
    # Compute predictions
    linear_model = np.dot(X_train, weights) + bias
    predictions = sigmoid(linear_model)
    
    # Compute gradients with L2 regularization
    dw = np.dot(X_train.T, (predictions - y_train)) / len(y_train) + lambda_reg * weights
    db = np.sum(predictions - y_train) / len(y_train)
    
    # Update weights and bias
    weights -= learning_rate * dw
    bias -= learning_rate * db

    # Optional: Print loss every 100 epochs
    if epoch % 100 == 0:
        loss = compute_loss(y_train, predictions)
        print(f"Epoch {epoch}, Loss: {loss:.4f}")



Epoch 0, Loss: 0.6321
Epoch 100, Loss: 0.6354
Epoch 200, Loss: 0.6383
Epoch 300, Loss: 0.6410
Epoch 400, Loss: 0.6434
Epoch 500, Loss: 0.6455
Epoch 600, Loss: 0.6475
Epoch 700, Loss: 0.6493
Epoch 800, Loss: 0.6509
Epoch 900, Loss: 0.6523
Epoch 1000, Loss: 0.6536
Epoch 1100, Loss: 0.6548
Epoch 1200, Loss: 0.6559
Epoch 1300, Loss: 0.6568
Epoch 1400, Loss: 0.6577
Epoch 1500, Loss: 0.6585
Epoch 1600, Loss: 0.6592
Epoch 1700, Loss: 0.6598
Epoch 1800, Loss: 0.6604
Epoch 1900, Loss: 0.6609
Epoch 2000, Loss: 0.6614
Epoch 2100, Loss: 0.6618
Epoch 2200, Loss: 0.6622
Epoch 2300, Loss: 0.6625
Epoch 2400, Loss: 0.6628


In [85]:
y_pred = sigmoid(np.dot(X_test, weights) + bias)
y_pred_labels = (y_pred >= 0.5).astype(int)  # Convert probabilities to binary labels


In [86]:
accuracy = np.mean(y_pred_labels == y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Test Accuracy: 97.12%
