In [None]:
# Financial Trend Prediction Analysis

This notebook demonstrates how to build a machine learning model to predict stock price movements using technical indicators.

## 1. Setup and Data Acquisition

First, let's import the necessary libraries and fetch historical stock data.

```python
# Plot the closing price
plt.figure(figsize=(14, 7))
plt.plot(aapl_data['Date'], aapl_data['Close'])
plt.title('AAPL Stock Price History')
plt.xlabel('Date')
plt.ylabel('Close Price ($)')
plt.grid(True)
plt.tight_layout()
plt.show()

# Calculate daily returns
aapl_data['Daily_Return'] = aapl_data['Close'].pct_change() * 100

# Plot daily returns
plt.figure(figsize=(14, 7))
plt.plot(aapl_data['Date'], aapl_data['Daily_Return'])
plt.title('AAPL Daily Returns')
plt.xlabel('Date')
plt.ylabel('Daily Return (%)')
plt.grid(True)
plt.tight_layout()
plt.show()

# Distribution of daily returns
plt.figure(figsize=(10, 6))
sns.histplot(aapl_data['Daily_Return'].dropna(), kde=True, bins=50)
plt.title('Distribution of Daily Returns')
plt.xlabel('Daily Return (%)')
plt.ylabel('Frequency')
plt.grid(True)
plt.tight_layout()
plt.show()

# Volume analysis
plt.figure(figsize=(14, 7))
plt.bar(aapl_data['Date'], aapl_data['Volume'], alpha=0.5)
plt.title('AAPL Trading Volume')
plt.xlabel('Date')
plt.ylabel('Volume')
plt.grid(True)
plt.tight_layout()
plt.show()

# Calculate monthly average
monthly_data = aapl_data.set_index('Date').resample('M').mean()

# Plot monthly average price
plt.figure(figsize=(14, 7))
plt.plot(monthly_data.index, monthly_data['Close'])
plt.title('AAPL Monthly Average Close Price')
plt.xlabel('Date')
plt.ylabel('Average Close Price ($)')
plt.grid(True)
plt.tight_layout()
plt.show()
```

## 3. Feature Engineering

Now let's calculate various technical indicators to use as features for our model.

```python
def calculate_technical_indicators(df):
    """Calculate various technical indicators for stock price data."""
    # Create a copy of the dataframe
    data = df.copy()
    
    # Simple Moving Averages
    data['SMA_5'] = data['Close'].rolling(window=5).mean()
    data['SMA_10'] = data['Close'].rolling(window=10).mean()
    data['SMA_20'] = data['Close'].rolling(window=20).mean()
    data['SMA_50'] = data['Close'].rolling(window=50).mean()
    
    # Exponential Moving Averages
    data['EMA_5'] = data['Close'].ewm(span=5, adjust=False).mean()
    data['EMA_10'] = data['Close'].ewm(span=10, adjust=False).mean()
    data['EMA_20'] = data['Close'].ewm(span=20, adjust=False).mean()
    
    # Bollinger Bands (20-day, 2 standard deviations)
    data['BB_middle'] = data['Close'].rolling(window=20).mean()
    data['BB_std'] = data['Close'].rolling(window=20).std()
    data['BB_upper'] = data['BB_middle'] + 2 * data['BB_std']
    data['BB_lower'] = data['BB_middle'] - 2 * data['BB_std']
    data['BB_width'] = (data['BB_upper'] - data['BB_lower']) / data['BB_middle']
    
    # MACD (Moving Average Convergence Divergence)
    data['EMA_12'] = data['Close'].ewm(span=12, adjust=False).mean()
    data['EMA_26'] = data['Close'].ewm(span=26, adjust=False).mean()
    data['MACD'] = data['EMA_12'] - data['EMA_26']
    data['MACD_signal'] = data['MACD'].ewm(span=9, adjust=False).mean()
    data['MACD_hist'] = data['MACD'] - data['MACD_signal']
    
    # Relative Strength Index (RSI)
    delta = data['Close'].diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    
    avg_gain = gain.rolling(window=14).mean()
    avg_loss = loss.rolling(window=14).mean()
    
    # Handle division by zero
    rs = avg_gain / avg_loss.replace(0, np.finfo(float).eps)
    data['RSI'] = 100 - (100 / (1 + rs))
    
    # Momentum
    data['Momentum_5'] = data['Close'] / data['Close'].shift(5) - 1
    data['Momentum_10'] = data['Close'] / data['Close'].shift(10) - 1
    data['Momentum_20'] = data['Close'] / data['Close'].shift(20) - 1
    
    # Price Rate of Change
    data['ROC_5'] = (data['Close'] - data['Close'].shift(5)) / data['Close'].shift(5) * 100
    data['ROC_10'] = (data['Close'] - data['Close'].shift(10)) / data['Close'].shift(10) * 100
    
    # Volume-based indicators
    data['Volume_SMA_5'] = data['Volume'].rolling(window=5).mean()
    data['Volume_SMA_10'] = data['Volume'].rolling(window=10).mean()
    data['Volume_Change'] = data['Volume'] / data['Volume'].shift(1) - 1
    
    # Price to Volume Ratio
    data['Price_Volume_Ratio'] = data['Close'] / (data['Volume'] + 1)
    
    return data

# Calculate technical indicators
aapl_features = calculate_technical_indicators(aapl_data)

# Create target variable: 1 if price goes up next day, 0 otherwise
aapl_features['Target'] = (aapl_features['Close'].shift(-1) > aapl_features['Close']).astype(int)

# Display features with technical indicators
aapl_features.tail()

# Plot some technical indicators
plt.figure(figsize=(14, 10))

# Plot closing price with moving averages
plt.subplot(3, 1, 1)
plt.plot(aapl_features['Date'], aapl_features['Close'], label='Close')
plt.plot(aapl_features['Date'], aapl_features['SMA_20'], label='SMA 20')
plt.plot(aapl_features['Date'], aapl_features['SMA_50'], label='SMA 50')
plt.title('Closing Price with Moving Averages')
plt.legend()

# Plot RSI
plt.subplot(3, 1, 2)
plt.plot(aapl_features['Date'], aapl_features['RSI'])
plt.axhline(y=70, color='r', linestyle='-', alpha=0.3)
plt.axhline(y=30, color='g', linestyle='-', alpha=0.3)
plt.title('Relative Strength Index (RSI)')

# Plot MACD
plt.subplot(3, 1, 3)
plt.plot(aapl_features['Date'], aapl_features['MACD'], label='MACD')
plt.plot(aapl_features['Date'], aapl_features['MACD_signal'], label='Signal')
plt.bar(aapl_features['Date'], aapl_features['MACD_hist'], label='Histogram', alpha=0.3)
plt.title('MACD')
plt.legend()

plt.tight_layout()
plt.show()
```

## 4. Data Preparation

Let's prepare the data for model training.

```python
# Drop NaN values and unwanted columns
aapl_features_clean = aapl_features.dropna()

# Drop columns that shouldn't be used as features
features_to_drop = ['Date', 'Target', 'Adj Close']
X = aapl_features_clean.drop(features_to_drop, axis=1)

# Target variable
y = aapl_features_clean['Target']

# Get feature names
feature_names = X.columns.tolist()

# Scale features
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=feature_names)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

print(f"Training features shape: {X_train.shape}")
print(f"Testing features shape: {X_test.shape}")
print(f"Training target shape: {y_train.shape}")
print(f"Testing target shape: {y_test.shape}")
```

## 5. Model Training and Evaluation

Now we'll train a Gradient Boosting Classifier and evaluate its performance.

```python
# Create and train model
gb_model = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)

gb_model.fit(X_train, y_train)

# Make predictions
y_pred = gb_model.predict(X_test)
y_prob = gb_model.predict_proba(X_test)[:, 1]

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# ROC curve
fpr, tpr, _ = roc_curve(y_test, y_prob)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()
```

## 6. Feature Importance

Let's examine which features are most important for our prediction.

```python
# Get feature importances
importances = gb_model.feature_importances_
indices = np.argsort(importances)[::-1]

# Plot feature importances
plt.figure(figsize=(12, 8))
plt.bar(range(len(importances)), importances[indices])
plt.xticks(range(len(importances)), [feature_names[i] for i in indices], rotation=90)
plt.title('Feature Importances')
plt.tight_layout()
plt.show()

# Print top 10 features
print("Top 10 most important features:")
for i in range(10):
    print(f"{i+1}. {feature_names[indices[i]]}: {importances[indices[i]]:.4f}")
```

## 7. Hyperparameter Tuning

Let's optimize our model using grid search.

```python
# Define parameter grid
param_grid = {
    'learning_rate': [0.01, 0.05, 0.1],
    'n_estimators': [50, 100, 200],
    'max_depth': [2, 3, 4],
    'min_samples_split': [2, 5, 10],
    'subsample': [0.8, 0.9, 1.0]
}

# Setup grid search
grid_search = GridSearchCV(
    estimator=GradientBoostingClassifier(random_state=42),
    param_grid=param_grid,
    cv=5,
    n_jobs=-1,
    scoring='f1',
    verbose=1
)

# Fit grid search
print("Performing grid search for hyperparameter optimization...")
grid_search.fit(X_train, y_train)

# Print results
print(f"Best parameters: {grid_search.best_params_}")
print(f"Best F1 score: {grid_search.best_score_:.4f}")

# Train model with best parameters
best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)

# Evaluate optimized model
y_pred_best = best_model.predict(X_test)
print("\nOptimized Model Evaluation:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_best):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_best):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_best):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_best):.4f}")
```

## 8. Model Visualization and Interpretation

Let's visualize our model's predictions over time.

```python
# Get predictions for the entire dataset
X_full = pd.DataFrame(scaler.transform(X), columns=feature_names)
y_full_pred = best_model.predict(X_full)
y_full_prob = best_model.predict_proba(X_full)[:, 1]

# Add predictions to the original dataframe
aapl_features_clean['Predicted_Target'] = y_full_pred
aapl_features_clean['Predicted_Probability'] = y_full_prob

# Calculate cumulative returns
aapl_features_clean['Actual_Return'] = aapl_features_clean['Close'].pct_change()
aapl_features_clean['Actual_Cumulative_Return'] = (1 + aapl_features_clean['Actual_Return']).cumprod() - 1

# Simulated trading strategy
aapl_features_clean['Strategy_Return'] = aapl_features_clean['Actual_Return'].shift(-1) * (aapl_features_clean['Predicted_Target'] * 2 - 1)
aapl_features_clean['Strategy_Cumulative_Return'] = (1 + aapl_features_clean['Strategy_Return'].fillna(0)).cumprod() - 1

# Plot cumulative returns
plt.figure(figsize=(14, 7))
plt.plot(aapl_features_clean['Date'], aapl_features_clean['Actual_Cumulative_Return'], label='Buy and Hold')
plt.plot(aapl_features_clean['Date'], aapl_features_clean['Strategy_Cumulative_Return'], label='ML Strategy')
plt.title('Cumulative Returns: Buy and Hold vs ML Strategy')
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Calculate performance metrics
buy_hold_return = aapl_features_clean['Actual_Cumulative_Return'].iloc[-1]
strategy_return = aapl_features_clean['Strategy_Cumulative_Return'].iloc[-1]

print(f"Buy and Hold Return: {buy_hold_return:.2%}")
print(f"ML Strategy Return: {strategy_return:.2%}")
print(f"Outperformance: {strategy_return - buy_hold_return:.2%}")
```

## 9. Conclusion and Next Steps

In this notebook, we've built a machine learning model to predict stock price movements using technical indicators. Our model achieved [accuracy] accuracy in predicting next-day price movements for Apple stock.

Key findings:
1. Technical indicators like [top features] were most predictive
2. The optimized model parameters were [best parameters]
3. Our trading strategy [outperformed/underperformed] a simple buy-and-hold approach

Next steps for improvement:
1. Incorporate more data sources (e.g., sentiment analysis, macroeconomic indicators)
2. Experiment with different machine learning algorithms
3. Implement more sophisticated trading strategies
4. Test the model on different stocks and time periods
5. Consider time-series specific approaches like LSTM networks

Remember that past performance does not guarantee future results, and this model should be considered for educational purposes only.
```python
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report, roc_curve, auc
)

# Set visualization style
plt.style.use('ggplot')
sns.set_theme(style="whitegrid")

# Fetch historical stock data
def fetch_stock_data(ticker='AAPL', period='5y', interval='1d'):
    print(f"Fetching data for {ticker}...")
    data = yf.download(ticker, period=period, interval=interval)
    return data.reset_index()

# Example: Fetch Apple stock data for the past 5 years
aapl_data = fetch_stock_data('AAPL', '5y', '1d')

# Display the first few rows
aapl_data.head()
```

## 2. Exploratory Data Analysis

Let's explore the data to understand the stock's price history.

```