In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from textblob import TextBlob
import yfinance as yf
import requests
from datetime import datetime, timedelta

ModuleNotFoundError: No module named 'textblob'

In [None]:


# -------------------------------------
# Step 1: Data Collection
# -------------------------------------

# Fetch stock price data
def get_stock_data(ticker, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date)
    data['Date'] = data.index
    data.reset_index(drop=True, inplace=True)
    return data

# Fetch news data for sentiment analysis
def get_news_sentiment(keyword, start_date, end_date):
    # Example API: Replace with your News API Key
    API_KEY = 'YOUR_NEWSAPI_KEY'
    url = f'https://newsapi.org/v2/everything?q={keyword}&from={start_date}&to={end_date}&apiKey={API_KEY}'
    response = requests.get(url).json()
    articles = response.get('articles', [])
    
    sentiments = []
    for article in articles:
        title = article['title']
        description = article.get('description', '')
        text = f"{title} {description}"
        sentiment = TextBlob(text).sentiment.polarity
        sentiments.append(sentiment)
    return np.mean(sentiments) if sentiments else 0

# Get macroeconomic data (example: simulated data for illustration)
def get_macro_factors():
    # Replace this with actual macroeconomic data sources
    return {"interest_rate": 3.5, "inflation": 4.2, "gdp_growth": 2.7}

# -------------------------------------
# Step 2: Data Preprocessing
# -------------------------------------

# Fetch stock data
ticker = "AAPL"
start_date = "2022-01-01"
end_date = "2024-12-01"
stock_data = get_stock_data(ticker, start_date, end_date)

# Add technical indicators
stock_data['SMA_20'] = stock_data['Close'].rolling(window=20).mean()
stock_data['RSI'] = 100 - (100 / (1 + stock_data['Close'].pct_change().rolling(window=14).mean() /
                                      stock_data['Close'].pct_change().rolling(window=14).std()))
stock_data['MACD'] = stock_data['Close'].ewm(span=12).mean() - stock_data['Close'].ewm(span=26).mean()

# Add sentiment analysis
stock_data['Sentiment'] = stock_data['Date'].apply(
    lambda x: get_news_sentiment(ticker, x.strftime('%Y-%m-%d'), x.strftime('%Y-%m-%d'))
)

# Add macroeconomic factors
macro_factors = get_macro_factors()
stock_data['Interest_Rate'] = macro_factors['interest_rate']
stock_data['Inflation'] = macro_factors['inflation']
stock_data['GDP_Growth'] = macro_factors['gdp_growth']

# Drop NaN values
stock_data.dropna(inplace=True)

# Scale the data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(stock_data[['Close', 'SMA_20', 'RSI', 'MACD', 'Sentiment',
                                               'Interest_Rate', 'Inflation', 'GDP_Growth']])

# Create time-series data for LSTM
def create_sequences(data, seq_length=60):
    x, y = [], []
    for i in range(seq_length, len(data)):
        x.append(data[i-seq_length:i])
        y.append(data[i, 0])  # Predict the 'Close' price
    return np.array(x), np.array(y)

seq_length = 60
x, y = create_sequences(scaled_data, seq_length)

# Split into training and testing datasets
train_size = int(len(x) * 0.8)
x_train, x_test = x[:train_size], x[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# -------------------------------------
# Step 3: Build and Train LSTM Model
# -------------------------------------

model = Sequential([
    LSTM(128, return_sequences=True, input_shape=(x_train.shape[1], x_train.shape[2])),
    Dropout(0.2),
    LSTM(64, return_sequences=False),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, batch_size=32, epochs=20, validation_data=(x_test, y_test))

# -------------------------------------
# Step 4: Evaluate the Model
# -------------------------------------

# Predict on test data
predicted_prices = model.predict(x_test)
predicted_prices = scaler.inverse_transform(np.concatenate((predicted_prices, np.zeros((predicted_prices.shape[0], 7))), axis=1))[:, 0]

# Actual prices
actual_prices = scaler.inverse_transform(np.concatenate((y_test.reshape(-1, 1), np.zeros((y_test.shape[0], 7))), axis=1))[:, 0]

# Calculate RMSE
rmse = np.sqrt(np.mean((predicted_prices - actual_prices) ** 2))
print(f"RMSE: {rmse}")

# -------------------------------------
# Step 5: Visualization
# -------------------------------------

import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
plt.plot(actual_prices, label="Actual Prices", color='blue')
plt.plot(predicted_prices, label="Predicted Prices", color='red')
plt.title(f"{ticker} Stock Price Prediction")
plt.xlabel("Time")
plt.ylabel("Stock Price")
plt.legend()
plt.show()
