# Step 1: Data Acquisition

In [11]:
import yfinance as yf
import pandas as pd

# Download historical stock data for Apple
stock_data = yf.download('AAPL', start='2020-01-01', end='2025-01-01')

# Inspect the first few rows
print(stock_data.head())

[*********************100%***********************]  1 of 1 completed

Price           Close       High        Low       Open     Volume
Ticker           AAPL       AAPL       AAPL       AAPL       AAPL
Date                                                             
2020-01-02  72.716087  72.776613  71.466827  71.721034  135480400
2020-01-03  72.009125  72.771752  71.783969  71.941336  146322800
2020-01-06  72.582924  72.621661  70.876090  71.127881  118387200
2020-01-07  72.241524  72.849201  72.021208  72.592571  108872000
2020-01-08  73.403648  73.706279  71.943759  71.943759  132079200





# Step 2: Feature Engineering 

In [25]:
# Extract correct column name dynamically from 'Close AAPL' style
close_column = [col for col in stock_data.columns if 'Close' in col][0]

# Calculate daily returns
stock_data['Daily Return'] = stock_data[close_column].pct_change()

# Calculate moving averages
stock_data['MA20'] = stock_data[close_column].rolling(window=20).mean()
stock_data['MA50'] = stock_data[close_column].rolling(window=50).mean()

# Calculate volatility (standard deviation of returns)
stock_data['Volatility'] = stock_data['Daily Return'].rolling(window=20).std()

# Drop rows with missing values
stock_data.dropna(inplace=True)

# Display the enriched dataset
print(stock_data.tail())


Price            Close        High         Low        Open    Volume  \
Ticker            AAPL        AAPL        AAPL        AAPL      AAPL   
Date                                                                   
2024-12-23  254.989655  255.369227  253.171646  254.490204  40858800   
2024-12-24  257.916443  257.926411  255.009620  255.209412  23234700   
2024-12-26  258.735504  259.814335  257.347047  257.906429  27237100   
2024-12-27  255.309296  258.415896  252.782075  257.546826  42355300   
2024-12-30  251.923019  253.221595  250.474615  251.952985  35557500   

Price      Daily Return        MA20        MA50 Volatility    Tomorrow Trend  
Ticker                                                                        
Date                                                                          
2024-12-23     0.003065  244.757892  235.021529   0.008893  257.916443     1  
2024-12-24     0.011478  246.023002  235.564016   0.008828  258.735504     1  
2024-12-26     0.003176  247

# Step 3: Preparing Data for Modeling

In [27]:
import numpy as np

# Extract close column name
close_column = [col for col in stock_data.columns if 'Close' in col][0]

# Create the target variable based on next-day price movement
stock_data['Tomorrow'] = stock_data[close_column].shift(-1)
stock_data['Trend'] = np.where(stock_data['Tomorrow'] > stock_data[close_column], 1, 0)

# Define features and target
features = ['MA20', 'MA50', 'Volatility', 'Daily Return']
X = stock_data[features]
y = stock_data['Trend']

# Split data into training and testing datasets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=False)


# Step 4: Building and Training the Model

In [29]:
from sklearn.ensemble import RandomForestClassifier

# Initialize and train the Random Forest model
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)

# Predict market trends
predictions = model.predict(X_test)

# Step 5: Model Optimisation (Hyperparameter Tuning)

In [31]:
from sklearn.model_selection import GridSearchCV

# Define the hyperparameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

# Conduct grid search for optimization
grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Extract and utilize the best model
optimized_model = grid_search.best_estimator_
print("Best parameters found:", grid_search.best_params_)

Best parameters found: {'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 100}


# Step 6: Model Evaluation

In [40]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Evaluate the optimized model
optimized_predictions = optimized_model.predict(X_test)

accuracy = accuracy_score(y_test, optimized_predictions)
conf_matrix = confusion_matrix(y_test, optimized_predictions)
class_report = classification_report(y_test, optimized_predictions)

print(f"Accuracy: {accuracy:.2f}")
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)

Accuracy: 0.47
Confusion Matrix:
 [[114  37]
 [149  48]]
Classification Report:
               precision    recall  f1-score   support

           0       0.43      0.75      0.55       151
           1       0.56      0.24      0.34       197

    accuracy                           0.47       348
   macro avg       0.50      0.50      0.45       348
weighted avg       0.51      0.47      0.43       348



# Step 7: Implementing the Model in Trading

In [23]:
# Integrate predictions into the test dataset
X_test = X_test.copy()
X_test['Actual'] = y_test
X_test['Predicted'] = optimized_predictions

# Generate trade signals based on predictions
X_test['Signal'] = np.where(X_test['Predicted'] == 1, 'Buy', 'Sell')

print(X_test.tail(10))

Price             MA20        MA50 Volatility Daily Return Actual Predicted  \
Ticker                                                                        
Date                                                                          
2024-12-17  240.166439  233.089609   0.005960     0.009720      0         1   
2024-12-18  241.153854  233.539677   0.008421    -0.021422      1         0   
2024-12-19  242.192212  233.949271   0.008440     0.007015      1         1   
2024-12-20  243.489286  234.462741   0.008878     0.018816      1         1   
2024-12-23  244.757892  235.021529   0.008893     0.003065      1         1   
2024-12-24  246.023002  235.564016   0.008828     0.011478      1         1   
2024-12-26  247.219685  236.071997   0.008782     0.003176      0         1   
2024-12-27  248.251551  236.552763   0.009615    -0.013242      0         0   
2024-12-30  248.994234  236.958420   0.010262    -0.013263      0         0   
2024-12-31  249.535140  237.271641   0.010385    -0.