In [2]:
# Import necessary libraries
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt

In [None]:
# Step 1: Download Stock Data (e.g., Apple Stock
stock_symbol = 'AAPL'  # Example: Apple Stock
start_date = '2015-01-01'
end_date = '2023-01-01'

# Download stock data using yfinance
dataset = yf.download(stock_symbol, start=start_date, end=end_date)

# Display the first few rows of data
print(dataset)

In [None]:
# Step 2: Feature Engineering
# Calculate Moving Averages
stock_data['5_day_MA'] = stock_data['Close'].rolling(window=5).mean()
stock_data['10_day_MA'] = stock_data['Close'].rolling(window=10).mean()
stock_data['50_day_MA'] = stock_data['Close'].rolling(window=50).mean()

# Calculate Relative Strength Index (RSI)
delta = stock_data['Close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)

avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()

rs = avg_gain / avg_loss
stock_data['RSI'] = 100 - (100 / (1 + rs))

# Calculate MACD (Moving Average Convergence Divergence)
stock_data['26_day_EMA'] = stock_data['Close'].ewm(span=26, adjust=False).mean()
stock_data['12_day_EMA'] = stock_data['Close'].ewm(span=12, adjust=False).mean()
stock_data['MACD'] = stock_data['12_day_EMA'] - stock_data['26_day_EMA']
stock_data['Signal_Line'] = stock_data['MACD'].ewm(span=9, adjust=False).mean()


In [None]:
# Drop rows with missing values
stock_data.dropna(inplace=True)

In [None]:
# Step 3: Define Features and Target Variable
# Use technical indicators as features
features = ['5_day_MA', '10_day_MA', '50_day_MA', 'RSI', 'MACD', 'Signal_Line', 'Volume']
X = stock_data[features]

# Target variable is the future closing price (next day's Close)
y = stock_data['Close'].shift(-1).dropna()

# Align the data (features and target)
X = X.iloc[:-1]
y = y.iloc[:-1]

In [None]:
# Step 4: Split Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [None]:
 # Step 5: Train a Random Forest Model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
# Step 6: Make Predictions
predictions = model.predict(X_test)

In [None]:
# Step 7: Evaluate the Model
mae = mean_absolute_error(y_test, predictions)
print(f'Mean Absolute Error: {mae}')



In [None]:
# Step 8: Visualize the Predictions vs Actual Prices
plt.figure(figsize=(10, 6))
plt.plot(y_test.index, y_test.values, label='Actual Prices', color='blue')
plt.plot(y_test.index, predictions, label='Predicted Prices', color='red', linestyle='--')
plt.title(f'{stock_symbol} Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.legend()
plt.show()



In [None]:
# Step 9: Predict Future Price (for the next day)
latest_data = stock_data[features].iloc[-1].values.reshape(1, -1)
next_day_prediction = model.predict(latest_data)
print(f'Predicted Next Day Closing Price: {next_day_prediction[0]}')