In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Download stock data (you can replace 'AAPL' with any stock ticker)
stock = yf.download("AAPL", start="2020-01-01", end="2024-01-01")

# Calculate technical indicators: 50-day and 200-day SMA
stock['SMA_50'] = stock['Close'].rolling(50).mean()
stock['SMA_200'] = stock['Close'].rolling(200).mean()

# Signal for crossover: 1 if 50-day SMA crosses above 200-day SMA, 0 if below
stock['Signal'] = np.where(stock['SMA_50'] > stock['SMA_200'], 1, 0)

# Lagged return as feature (future price movement)
stock['Return'] = stock['Close'].pct_change().shift(-1)

# Drop missing values
data = stock.dropna()

# Features (SMA crossover and lagged return)
X = data[['SMA_50', 'SMA_200', 'Return']]
y = data['Signal']

# Scale features
X_scaled = StandardScaler().fit_transform(X)

# PCA to visualize separability
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# Plot
plt.figure(figsize=(8, 6))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='bwr', alpha=0.6)
plt.xlabel('PCA 1')
plt.ylabel('PCA 2')
plt.title('PCA of 50-day vs. 200-day SMA Crossover and Lagged Return')
plt.grid(True)
plt.show()
