In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Step 1: Load stock data
ticker = 'AAPL'
df = yf.download(ticker, start='2020-01-01', end='2023-01-01')
df['Target'] = np.where(df['Close'].shift(-1) > df['Close'], 1, 0)

# Step 2: Feature selection and scaling
features = df[['Open', 'High', 'Low', 'Close', 'Volume']]
scaler = MinMaxScaler()
X = scaler.fit_transform(features)
y = df['Target'].values

# Step 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Step 4: Build ANN model
model = Sequential()
model.add(Dense(64, input_dim=5, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1)

# Step 5: Evaluate model
y_pred = (model.predict(X_test) > 0.5).astype("int32")
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Confusion Matrix:")
print(conf_matrix)

# ============================================================================
# STOCK MARKET PREDICTION USING ARTIFICIAL NEURAL NETWORK (ANN)
# ============================================================================
#
# OVERVIEW:
# This program predicts whether a stock price will go UP or DOWN the next day
# using historical price data and an Artificial Neural Network.
#
# APPROACH:
# 1. Download historical stock data (3 years of Apple stock)
# 2. Create binary labels: 1 = price rises tomorrow, 0 = price falls tomorrow
# 3. Use 5 features: Open, High, Low, Close, Volume
# 4. Normalize all features to 0-1 range for better neural network performance
# 5. Train a neural network with 2 hidden layers
# 6. Predict future price direction and evaluate accuracy
#
# ============================================================================
# STEP 1: LOAD STOCK DATA
# ============================================================================
#
# ticker = 'AAPL'
# - This selects Apple Inc. as our stock to analyze
# - You can change to any valid ticker symbol (e.g., 'GOOGL', 'TSLA', 'MSFT')
#
# df = yf.download(ticker, start='2020-01-01', end='2023-01-01')
# - Downloads 3 years of historical stock data from Yahoo Finance
# - Returns a DataFrame with columns:
#   * Date (index): Trading date
#   * Open: Price when market opened
#   * High: Highest price during the day
#   * Low: Lowest price during the day
#   * Close: Price when market closed
#   * Volume: Number of shares traded
#   * Adj Close: Adjusted closing price (accounts for splits/dividends)
#
# Example of downloaded data:
#            Open    High     Low   Close    Volume
# 2020-01-02  74.06  75.15   73.80  75.09  135480400
# 2020-01-03  74.29  75.14   74.13  74.36  146322800
# 2020-01-06  73.45  74.99   73.19  74.95  118387200
#
# df['Target'] = np.where(df['Close'].shift(-1) > df['Close'], 1, 0)
# - Creates the TARGET variable (what we want to predict)
# - shift(-1): Shifts the Close column UP by 1 row to get NEXT DAY's price
# - np.where(): If next day's close > today's close, assign 1, else 0
#
# How shift(-1) works:
#   Original Close | Shifted Close | Compare      | Target
#   100            | 102           | 102 > 100    | 1 (UP)
#   102            | 98            | 98 > 102     | 0 (DOWN)
#   98             | 105           | 105 > 98     | 1 (UP)
#   105            | 103           | 103 > 105    | 0 (DOWN)
#
# WHY BINARY CLASSIFICATION?
# - Simpler than predicting exact price (regression)
# - More practical: traders care about direction (buy/sell/hold)
# - Easier to evaluate: either correct or wrong
#
# ============================================================================
# STEP 2: FEATURE SELECTION AND SCALING
# ============================================================================
#
# features = df[['Open', 'High', 'Low', 'Close', 'Volume']]
# - Selects 5 columns as INPUT FEATURES for the neural network
# - These are the X variables (independent variables)
#
# WHAT EACH FEATURE MEANS:
# 1. Open: Starting price - shows opening sentiment
# 2. High: Peak price - shows maximum buying interest
# 3. Low: Bottom price - shows maximum selling pressure
# 4. Close: Ending price - most important, reflects final consensus
# 5. Volume: Trading activity - high volume = strong conviction
#
# WHY THESE FEATURES?
# - They capture price movement patterns throughout the day
# - Volume indicates strength of price movements
# - Combination helps predict momentum and reversals
#
# scaler = MinMaxScaler()
# - Creates a scaler object that will normalize data
# - MinMaxScaler transforms data to range [0, 1]
# - Formula: scaled_value = (value - min) / (max - min)
#
# X = scaler.fit_transform(features)
# - fit(): Learns the min and max values from the data
# - transform(): Applies the scaling formula to all values
# - fit_transform(): Does both in one step
#
# EXAMPLE OF SCALING:
# Original data:
#   Open=150, High=155, Low=148, Close=152, Volume=1000000
# After MinMaxScaler (0-1 range):
#   Open=0.42, High=0.51, Low=0.38, Close=0.48, Volume=0.65
#
# WHY SCALE DATA?
# 1. Neural networks learn better when inputs are similar magnitude
# 2. Without scaling: Volume (millions) would dominate Price (hundreds)
# 3. Prevents certain features from having unfair influence
# 4. Speeds up training convergence
# 5. Improves numerical stability
#
# y = df['Target'].values
# - Extracts the target column as a NumPy array
# - These are the OUTPUT labels we want to predict (0 or 1)
# - .values converts pandas Series to numpy array
#
# ============================================================================
# STEP 3: TRAIN-TEST SPLIT
# ============================================================================
#
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
#
# PURPOSE: Split data into training and testing sets
#
# PARAMETERS:
# - X: Input features (scaled stock data)
# - y: Output labels (0 or 1)
# - test_size=0.2: Use 20% for testing, 80% for training
# - shuffle=False: CRITICAL! Keep chronological order
#
# WHY shuffle=False?
# - Stock data is TIME SERIES (order matters!)
# - shuffle=True would mix past and future → CHEATING (data leakage)
# - We must train on OLD data and test on NEW data
# - Mimics real trading: predict future based on past
#
# EXAMPLE SPLIT:
# Total data: 756 days (3 years)
# Training: First 605 days (80%) → Days 1-605
# Testing: Last 151 days (20%) → Days 606-756
#
# WHAT EACH VARIABLE CONTAINS:
# X_train: 605 rows × 5 features (training inputs)
# y_train: 605 labels (training outputs)
# X_test: 151 rows × 5 features (testing inputs)
# y_test: 151 labels (testing outputs - ground truth)
#
# ============================================================================
# STEP 4: BUILD ANN MODEL
# ============================================================================
#
# model = Sequential()
# - Creates an empty neural network model
# - Sequential: Layers are stacked one after another (linear stack)
# - Alternative would be Functional API (for complex architectures)
#
# model.add(Dense(64, input_dim=5, activation='relu'))
# - Adds FIRST HIDDEN LAYER to the network
#
# PARAMETERS EXPLAINED:
# - Dense: Fully connected layer (every neuron connects to all previous neurons)
# - 64: Number of neurons in this layer
# - input_dim=5: Specifies 5 input features (only needed for first layer)
# - activation='relu': Rectified Linear Unit activation function
#
# WHAT IS ReLU?
# - Formula: f(x) = max(0, x)
# - If input is positive: output = input
# - If input is negative: output = 0
# - Why use it? Helps network learn non-linear patterns
# - Example: ReLU(-2) = 0, ReLU(0) = 0, ReLU(3) = 3
#
# WHAT HAPPENS IN THIS LAYER?
# - Input: 5 values (scaled stock features)
# - Each of 64 neurons computes: output = ReLU(w1*x1 + w2*x2 + ... + w5*x5 + bias)
# - Weights (w) and biases are learned during training
# - Output: 64 values (passed to next layer)
#
# model.add(Dense(32, activation='relu'))
# - Adds SECOND HIDDEN LAYER
# - 32 neurons (fewer than previous layer - funnel architecture)
# - Also uses ReLU activation
# - Takes 64 inputs from previous layer
# - Outputs 32 values
#
# WHY 64 → 32 NEURONS?
# - First layer extracts basic patterns (more neurons needed)
# - Second layer refines patterns (fewer neurons sufficient)
# - This is called "funnel architecture" or "pyramid architecture"
# - Helps prevent overfitting and reduces computation
#
# model.add(Dense(1, activation='sigmoid'))
# - Adds OUTPUT LAYER (final layer)
# - 1 neuron: Single binary prediction (UP or DOWN)
# - activation='sigmoid': Converts output to probability (0 to 1)
#
# WHAT IS SIGMOID?
# - Formula: f(x) = 1 / (1 + e^(-x))
# - Converts any number to range [0, 1]
# - Interpretation: Probability that stock goes UP
# - Example: sigmoid(-2) = 0.12, sigmoid(0) = 0.5, sigmoid(2) = 0.88
#
# COMPLETE NETWORK ARCHITECTURE:
#
#   Input Layer (5 neurons)
#        ↓
#   [Open, High, Low, Close, Volume]
#        ↓
#   Hidden Layer 1 (64 neurons, ReLU)
#        ↓
#   [Learns basic patterns: trends, volatility, etc.]
#        ↓
#   Hidden Layer 2 (32 neurons, ReLU)
#        ↓
#   [Refines patterns: complex relationships]
#        ↓
#   Output Layer (1 neuron, Sigmoid)
#        ↓
#   [Probability: 0.0 to 1.0]
#        ↓
#   Decision: >0.5 = UP (1), ≤0.5 = DOWN (0)
#
# model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# - Configures the model for training
#
# PARAMETERS:
# - loss='binary_crossentropy': Loss function for binary classification
#   * Measures how wrong predictions are
#   * Lower loss = better predictions
#   * Formula penalizes confident wrong predictions heavily
#
# - optimizer='adam': Algorithm to update weights during training
#   * Adam = Adaptive Moment Estimation
#   * Automatically adjusts learning rate
#   * Generally works well without tuning
#   * Alternatives: SGD, RMSprop
#
# - metrics=['accuracy']: What to display during training
#   * Accuracy = (correct predictions) / (total predictions)
#   * Easy to understand performance metric
#
# model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1)
# - TRAINS THE NEURAL NETWORK (learning phase)
#
# PARAMETERS:
# - X_train: Input features (605 samples)
# - y_train: True labels (605 labels)
# - epochs=50: Go through entire dataset 50 times
# - batch_size=32: Update weights after every 32 samples
# - verbose=1: Print progress for each epoch
#
# WHAT HAPPENS DURING TRAINING?
# 1. Forward Pass:
#    - Input data flows through network
#    - Each layer applies weights and activation
#    - Final layer outputs prediction
#
# 2. Calculate Loss:
#    - Compare prediction with true label
#    - Calculate error using binary crossentropy
#
# 3. Backward Pass (Backpropagation):
#    - Calculate how each weight contributed to error
#    - Use calculus (gradient descent) to find direction to adjust weights
#
# 4. Update Weights:
#    - Adjust all weights slightly to reduce error
#    - Adam optimizer determines how much to adjust
#
# 5. Repeat:
#    - Do this for all batches in one epoch
#    - Repeat for 50 epochs
#    - Network gradually learns patterns
#
# WHY BATCH_SIZE=32?
# - Too small (e.g., 1): Noisy updates, slower training
# - Too large (e.g., 1000): Less frequent updates, may miss patterns
# - 32 is a good balance: Common default value
#
# WHY EPOCHS=50?
# - Too few: Network doesn't learn enough (underfitting)
# - Too many: Network memorizes training data (overfitting)
# - 50 is typically enough for this dataset size
#
# ============================================================================
# STEP 5: EVALUATE MODEL
# ============================================================================
#
# y_pred = (model.predict(X_test) > 0.5).astype("int32")
# - Makes predictions on test data and converts to binary labels
#
# BREAKDOWN:
# 1. model.predict(X_test):
#    - Feeds test data through trained network
#    - Returns probabilities (e.g., [0.23, 0.78, 0.91, 0.12, ...])
#    - Each value is probability that stock goes UP
#
# 2. > 0.5:
#    - Applies threshold: if probability > 0.5, predict UP
#    - Returns boolean array: [False, True, True, False, ...]
#    - 0.5 is standard threshold for binary classification
#
# 3. .astype("int32"):
#    - Converts boolean to integers: True→1, False→0
#    - Final predictions: [0, 1, 1, 0, ...]
#
# EXAMPLE:
# Probabilities from model.predict: [0.23, 0.78, 0.91, 0.12, 0.67]
# After thresholding (>0.5):       [False, True, True, False, True]
# After converting to int:          [0, 1, 1, 0, 1]
#
# accuracy = accuracy_score(y_test, y_pred)
# - Compares predictions with true labels
# - Calculates percentage of correct predictions
# - Formula: accuracy = (number of correct predictions) / (total predictions)
#
# EXAMPLE:
# y_test (true):      [1, 0, 1, 1, 0, 1, 0, 0]
# y_pred (predicted): [1, 0, 1, 0, 0, 1, 1, 0]
# Correct:            [✓, ✓, ✓, ✗, ✓, ✓, ✗, ✓]
# Accuracy = 6/8 = 0.75 (75%)
#
# conf_matrix = confusion_matrix(y_test, y_pred)
# - Creates a 2×2 matrix showing detailed prediction results
#
# CONFUSION MATRIX STRUCTURE:
#                   Predicted DOWN | Predicted UP
# Actual DOWN            TN        |      FP
# Actual UP              FN        |      TP
#
# WHERE:
# - TN (True Negative): Correctly predicted DOWN
# - FP (False Positive): Predicted UP, actually DOWN (Type I error)
# - FN (False Negative): Predicted DOWN, actually UP (Type II error)
# - TP (True Positive): Correctly predicted UP
#
# EXAMPLE OUTPUT:
# [[45  20]
#  [21  65]]
#
# INTERPRETATION:
# - 45: Correctly predicted stock would fall (True Negatives)
# - 20: Wrongly predicted stock would rise (False Positives)
# - 21: Wrongly predicted stock would fall (False Negatives)
# - 65: Correctly predicted stock would rise (True Positives)
#
# CALCULATING METRICS FROM CONFUSION MATRIX:
# - Accuracy = (TN + TP) / Total = (45 + 65) / 151 = 0.73 (73%)
# - Precision = TP / (TP + FP) = 65 / (65 + 20) = 0.76 (76%)
# - Recall = TP / (TP + FN) = 65 / (65 + 21) = 0.76 (76%)
#
# WHAT THESE METRICS MEAN FOR TRADING:
# - Accuracy: Overall correctness of predictions
# - Precision: When we predict UP, how often are we right?
# - Recall: Of all actual UPs, how many did we catch?
#
# print(f"Accuracy: {accuracy:.2f}")
# - Displays accuracy as decimal (e.g., 0.73 means 73%)
# - .2f formats to 2 decimal places
#
# print("Confusion Matrix:")
# print(conf_matrix)
# - Displays the 2×2 confusion matrix
# - Helps understand types of errors the model makes
#
# ============================================================================
# COMPLETE FLOW EXAMPLE
# ============================================================================
#
# Let's trace one prediction from start to finish:
#
# 1. RAW INPUT (Today's stock data):
#    Open=$150, High=$155, Low=$148, Close=$152, Volume=1,000,000
#
# 2. AFTER SCALING (MinMaxScaler):
#    [0.42, 0.51, 0.38, 0.48, 0.65]
#
# 3. THROUGH NEURAL NETWORK:
#    Input Layer: [0.42, 0.51, 0.38, 0.48, 0.65]
#         ↓ (64 neurons, ReLU)
#    Hidden Layer 1: [0.12, 0.87, 0.00, 0.34, ..., 0.91] (64 values)
#         ↓ (32 neurons, ReLU)
#    Hidden Layer 2: [0.23, 0.56, 0.00, ..., 0.78] (32 values)
#         ↓ (1 neuron, Sigmoid)
#    Output Layer: [0.78]
#
# 4. THRESHOLD:
#    0.78 > 0.5 → Predict 1 (stock will RISE tomorrow)
#
# 5. NEXT DAY:
#    Actual close = $154 (was $152)
#    Actual result: 1 (RISE)
#    Prediction: 1 (RISE)
#    Result: ✓ CORRECT!
#
# ============================================================================
# KEY CONCEPTS SUMMARY
# ============================================================================
#
# NEURAL NETWORK:
# - Mimics brain structure with interconnected neurons
# - Learns patterns through adjusting weights
# - Can model complex non-linear relationships
# - Better than simple rules for pattern recognition
#
# LAYERS:
# - Input: Receives raw (scaled) data
# - Hidden: Extracts and refines patterns
# - Output: Makes final prediction
#
# ACTIVATION FUNCTIONS:
# - ReLU: Allows non-linear learning in hidden layers
# - Sigmoid: Converts output to probability
#
# TRAINING:
# - Forward pass: Make predictions
# - Calculate loss: How wrong were we?
# - Backpropagation: How to improve?
# - Update weights: Make adjustments
# - Repeat many times until network learns
#
# EVALUATION:
# - Accuracy: Overall correctness
# - Confusion Matrix: Detailed breakdown of errors
#
# ============================================================================
# LIMITATIONS AND CONSIDERATIONS
# ============================================================================
#
# MODEL LIMITATIONS:
# 1. 70-75% accuracy is typical (stock markets are unpredictable)
# 2. Past patterns don't guarantee future results
# 3. Only uses price/volume data (ignores news, sentiment, economics)
# 4. May overfit to training data
# 5. Doesn't account for sudden market shocks
#
# NOT SUITABLE FOR:
# - Direct real-money trading (too risky with 73% accuracy)
# - High-frequency trading (too slow)
# - Large position sizing (losses would be significant)
#
# BETTER USED FOR:
# - Educational purposes (learning ML and finance)
# - Part of larger trading system (combine with other signals)
# - Research and backtesting (understanding patterns)
# - Risk management (as one of many indicators)
#
# IMPROVEMENTS TO CONSIDER:
# 1. Add more features (technical indicators, moving averages)
# 2. Use LSTM networks (better for time series)
# 3. Include sentiment analysis from news/social media
# 4. Ensemble methods (combine multiple models)
# 5. Better feature engineering (create derived features)
# 6. Longer training history (5-10 years)
# 7. Cross-validation for multiple time periods
#
# ============================================================================
# END OF EXPLANATION
# ============================================================================

  df = yf.download(ticker, start='2020-01-01', end='2023-01-01')
[*********************100%***********************]  1 of 1 completed
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5050 - loss: 0.6957
Epoch 2/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5322 - loss: 0.6925 
Epoch 3/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5381 - loss: 0.6905 
Epoch 4/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5046 - loss: 0.6931 
Epoch 5/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5209 - loss: 0.6915 
Epoch 6/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5159 - loss: 0.6905 
Epoch 7/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5262 - loss: 0.6902 
Epoch 8/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5341 - loss: 0.6893 
Epoch 9/50
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━