In [2]:
!pip install xgboost
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import xgboost as xgb

# Step 1: Load Stock Data (example: Apple Inc.)
data = yf.download('AAPL', start='2010-01-01', end='2025-01-01')

# Step 2: Calculate Technical Indicators (SMA, RSI, MACD)
data['SMA'] = data['Close'].rolling(window=14).mean()

delta = data['Close'].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
RS = gain / loss
data['RSI'] = 100 - (100 / (1 + RS))

data['MACD'] = data['Close'].ewm(span=12, adjust=False).mean() - data['Close'].ewm(span=26, adjust=False).mean()

data = data.dropna()

# Step 3: Define Features and Target Variable
X = data[['SMA', 'RSI', 'MACD']].values
y = data['Close'].values

# Step 4: Scale Data
scaler_X = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler_X.fit_transform(X)

scaler_y = MinMaxScaler(feature_range=(0, 1))
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()

# Step 5: Split Data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, shuffle=False)

# Step 6: Train XGBoost Model
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1, max_depth=5)
xgb_model.fit(X_train, y_train)

# Step 7: Make Predictions
predictions = xgb_model.predict(X_test)

# Step 8: Inverse Scale Predictions
y_test_rescaled = scaler_y.inverse_transform(y_test.reshape(-1, 1))
predictions_rescaled = scaler_y.inverse_transform(predictions.reshape(-1, 1))

# Step 9: Calculate Performance Metrics
mse = mean_squared_error(y_test_rescaled, predictions_rescaled)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test_rescaled, predictions_rescaled)

print(f'Mean Squared Error (MSE): {mse}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'Mean Absolute Error (MAE): {mae}')

# Step 10: Plot Actual vs Predicted Prices
plt.figure(figsize=(10, 6))
plt.plot(y_test_rescaled, label='Actual', color='blue')
plt.plot(predictions_rescaled, label='Predicted', color='red', linestyle='--')
plt.title('Actual vs Predicted Stock Prices (XGBoost)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()


Collecting xgboost
  Downloading xgboost-2.1.4-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-2.1.4-py3-none-win_amd64.whl (124.9 MB)
   ---------------------------------------- 0.0/124.9 MB ? eta -:--:--
    --------------------------------------- 2.4/124.9 MB 15.0 MB/s eta 0:00:09
   -- ------------------------------------- 7.3/124.9 MB 19.7 MB/s eta 0:00:06
   ---- ----------------------------------- 14.7/124.9 MB 24.9 MB/s eta 0:00:05
   ------ --------------------------------- 21.8/124.9 MB 27.0 MB/s eta 0:00:04
   -------- ------------------------------- 26.7/124.9 MB 26.1 MB/s eta 0:00:04
   ---------- ----------------------------- 33.8/124.9 MB 27.5 MB/s eta 0:00:04
   ------------- -------------------------- 40.9/124.9 MB 28.6 MB/s eta 0:00:03
   --------------- ------------------------ 47.2/124.9 MB 29.2 MB/s eta 0:00:03
   ---------------- ----------------------- 53.0/124.9 MB 28.6 MB/s eta 0:00:03
   ----------------- ---------------------- 55.8/124.9 MB 27.1 


[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['AAPL']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')


ValueError: Found array with 0 sample(s) (shape=(0, 3)) while a minimum of 1 is required by MinMaxScaler.

In [3]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import xgboost as xgb

# Step 1: Load Stock Data (example: Apple Inc.)
data = yf.download('AAPL', start='2010-01-01', end='2025-01-01')

# Step 2: Calculate Daily Price Change
data['Price Change'] = data['Close'].diff()

# Step 3: Separate Gains and Losses
data['Gain'] = data['Price Change'].where(data['Price Change'] > 0, 0)
data['Loss'] = -data['Price Change'].where(data['Price Change'] < 0, 0)

# Step 4: Calculate the Average Gain and Loss over the 14-day window
data['Avg Gain'] = data['Gain'].rolling(window=14).mean()
data['Avg Loss'] = data['Loss'].rolling(window=14).mean()

# Step 5: Calculate the Relative Strength (RS)
data['RS'] = data['Avg Gain'] / data['Avg Loss']

# Step 6: Calculate the RSI
data['RSI'] = 100 - (100 / (1 + data['RS']))

# Step 7: Calculate Technical Indicators
data['SMA'] = data['Close'].rolling(window=14).mean()  # Simple Moving Average
data['MACD'] = data['Close'].ewm(span=12, adjust=False).mean() - data['Close'].ewm(span=26, adjust=False).mean()

# Remove NaN values created by rolling or technical indicators
data = data.dropna()

# Step 8: Define Features and Target Variable
X = data[['SMA', 'RSI', 'MACD']]
y = data['Close']

# Step 9: Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Step 10: Train the Model
model = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, objective='reg:squarederror')
model.fit(X_train, y_train)

# Step 11: Make Predictions
y_pred = model.predict(X_test)

# Step 12: Evaluate the Model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)

# Print the evaluation metrics
print(f'Mean Squared Error (MSE): {mse}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'Mean Absolute Error (MAE): {mae}')

# Step 13: Plot Actual vs Predicted Prices
plt.figure(figsize=(10, 6))
plt.plot(y_test.index, y_test, label='Actual', color='blue')
plt.plot(y_test.index, y_pred, label='Predicted', color='red', linestyle='--')
plt.title('Actual vs Predicted Stock Prices using XGBoost')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['AAPL']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.