<a href="https://colab.research.google.com/github/submarinejuice/CP322-Final-Project-Group-9/blob/main/cp322_FINAL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Downloading dependencies
!pip install --upgrade pip
!pip install yfinance pandas numpy scikit-learn matplotlib seaborn shap tensorflow



In [7]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import shap
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Attention, Input
from tensorflow.keras.optimizers import Adam

# Project Overview
#
# Neuro + Fintech + Financial Text Sentiment Predictor
#
# Goal: Predict Buy / Sell / Hold decisions by integrating multiple data modalities:
# - Stock price data: historical OHLC, returns, moving averages, volatility indicators
# - Financial news sentiment: daily sentiment scores derived from news headlines or articles
# - Simulated cognitive features: attention, stress, risk appetite, confidence (used until a real dataset is available)
#
# This project demonstrates multi-modal machine learning by combining:
# 1. Market numeric data (stocks)
# 2. Textual data (financial news sentiment)
# 3. Neuro-inspired cognitive signals
#
# Key Features of the Project:
# - Temporal modeling: cognitive features and lagged news sentiment are sequence-dependent
# - Multi-modal integration: numeric, textual, and simulated cognitive features feed into a single model
# - Evaluation & Explainability: model performance measured via accuracy and F1-score, with feature importance explored using SHAP
#
# Objectives:
# 1. Build a sequence-aware model (LSTM/GRU with attention) to predict trading actions
# 2. Demonstrate non-obvious patterns by including temporal and multi-modal dependencies
# 3. Perform ablation studies to quantify the contribution of cognitive and sentiment features
# 4. Provide interpretable insights into feature importance and model behavior

def get_stock_data(tickers, start_date, end_date):
    """Download real stock price data"""
    data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=True)
    return data

TICKERS = ['AAPL', 'TSLA', 'GOOGL']
START_DATE = '2018-01-01'
END_DATE = '2024-01-01'

price_data = get_stock_data(TICKERS, START_DATE, END_DATE)
print(f"Data shape: {price_data.shape}")
print(f"Columns: {price_data.columns.tolist()}")
print(f"Date range: {price_data.index[0]} to {price_data.index[-1]}")

[*********************100%***********************]  3 of 3 completed

Data shape: (1509, 15)
Columns: [('Close', 'AAPL'), ('Close', 'GOOGL'), ('Close', 'TSLA'), ('High', 'AAPL'), ('High', 'GOOGL'), ('High', 'TSLA'), ('Low', 'AAPL'), ('Low', 'GOOGL'), ('Low', 'TSLA'), ('Open', 'AAPL'), ('Open', 'GOOGL'), ('Open', 'TSLA'), ('Volume', 'AAPL'), ('Volume', 'GOOGL'), ('Volume', 'TSLA')]
Date range: 2018-01-02 00:00:00 to 2023-12-29 00:00:00



