In [5]:
import tweepy
import yfinance as yf
import pandas as pd
import numpy as np
from textblob import TextBlob
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from ta.trend import MACD
from ta.momentum import RSIIndicator
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta


In [10]:
client_id = 'd3piSzhjYWxhdHRKNFpnU1Azakc6MTpjaQ'
client_id_secret = 'iTyHTvVdyGmR-232UBk1RswW0s1SFPW8aQflF1-3PFptF-OJbh'
api_secret = 'OggcdFdwI3U8iag9IbEgqc9TPk3tIzD8dTgTJi7C2zfUmWOXFw'
api_key = 'BpcTsRKW0jnflmCvCrjk6EcNt'
bearer_token = 'AAAAAAAAAAAAAAAAAAAAABzx3gEAAAAAFal9LhOHENKlwO%2FqBZ0E1shIUtg%3DDdSjaV1zqYTFdC3DfBm8i1eIG8nDz5BL2B4WnjqO9cPI197pzQ'
access_token = '934895513090101250-wPIZaSff0C1ulOrPOIDSv7UHK0s73R5'
access_token_secret = 'AbVdhq1oo5ooKyPttSL5hI8VrSkHVU4P3aBy8uVBeNmL0'
# X API setup (replace with your credentials)
auth = tweepy.OAuthHandler(api_key, api_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)

# Stock list
stocks = ['AAPL', 'TSLA', 'MSFT', 'RIVN']

# Number of lags for features
LAGS = [1, 2, 3, 7]

# Prediction horizon (days)
HORIZON = 30

# Confidence thresholds for neural network
BUY_THRESHOLD = 0.7
SELL_THRESHOLD = 0.3

# Function to fetch sentiment from X
def get_sentiment(stock):
    query = f"${stock} -from:stockbot"
    tweets = api.search_tweets(q=query, count=100, lang="en", tweet_mode="extended")
    sentiments = []
    for tweet in tweets:
        try:
            text = tweet.retweeted_status.full_text
        except AttributeError:
            text = tweet.full_text
        analysis = TextBlob(text)
        sentiments.append(analysis.sentiment.polarity)
    return np.mean(sentiments) if sentiments else 0.0

# Function to fetch stock data, technical indicators, lagging features, and temporal features
def get_stock_data(stock, start_date, end_date):
    df = yf.download(stock, start=start_date, end=end_date)
    df['Returns'] = df['Close'].pct_change()
    # Long-term trend target: 1 if price increases over next 30 days, 0 otherwise
    df['Future_Close'] = df['Close'].shift(-HORIZON)
    df['Target'] = (df['Future_Close'] > df['Close']).astype(int)
    #df['Sentiment'] = get_sentiment(stock)
    # Technical indicators
    df['RSI'] = RSIIndicator(df['Close']).rsi()
    macd = MACD(df['Close'])
    df['MACD'] = macd.macd()
    df['MACD_Signal'] = macd.macd_signal()
    # Lagging features
    features_to_lag = ['Close', 'Volume', 'RSI', 'MACD', 'MACD_Signal']#, 'Sentiment']
    for feature in features_to_lag:
        for lag in LAGS:
            df[f'{feature}_lag_{lag}'] = df[feature].shift(lag)
    # Day-of-week and month features
    df.index = pd.to_datetime(df.index)
    df['Day_of_Week'] = df.index.dayofweek + 1  # Monday=1, ..., Sunday=7
    df['Month'] = df.index.month  # January=1, ..., December=12
    return df.dropna()

# Function to prepare data for ML
def prepare_data(df):
    features = ['Close', 'Volume', 'RSI', 'MACD', 'MACD_Signal', 'Day_of_Week', 'Month']#, 'Sentiment']
    for feature in ['Close', 'Volume', 'RSI', 'MACD', 'MACD_Signal']:#, 'Sentiment']:
        for lag in LAGS:
            features.append(f'{feature}_lag_{lag}')
    X = df[features]
    y = df['Target']
    return X, y

# Neural Network model
def build_neural_network(input_dim):
    model = Sequential([
        Dense(64, activation='relu', input_dim=input_dim),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Train and evaluate models
def train_models(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Neural Network
    nn_model = build_neural_network(X_train.shape[1])
    history = nn_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=0)
    nn_pred = (nn_model.predict(X_test) > 0.5).astype(int)
    nn_accuracy = accuracy_score(y_test, nn_pred)
    
    # Random Forest with GridSearchCV
    rf = RandomForestClassifier(random_state=42)
    rf_params = {'n_estimators': [50, 100], 'max_depth': [None, 10]}
    rf_grid = GridSearchCV(rf, rf_params, cv=5)
    rf_grid.fit(X_train, y_train)
    rf_pred = rf_grid.predict(X_test)
    rf_accuracy = accuracy_score(y_test, rf_pred)
    
    # SVM with GridSearchCV
    svm = SVC(random_state=42, probability=True)  # Enable probabilities for SVM
    svm_params = {'C': [0.1, 1], 'kernel': ['linear', 'rbf']}
    svm_grid = GridSearchCV(svm, svm_params, cv=5)
    svm_grid.fit(X_train, y_train)
    svm_pred = svm_grid.predict(X_test)
    svm_accuracy = accuracy_score(y_test, svm_pred)
    
    return {
        'nn': {'model': nn_model, 'accuracy': nn_accuracy, 'history': history},
        'rf': {'model': rf_grid, 'accuracy': rf_accuracy},
        'svm': {'model': svm_grid, 'accuracy': svm_accuracy}
    }

# Generate buy/sell signals with confidence threshold
def generate_signals(df, model, model_type):
    X, _ = prepare_data(df)
    if model_type == 'nn':
        probabilities = model.predict(X)
        signals = np.where(probabilities > BUY_THRESHOLD, 'Buy',
                          np.where(probabilities < SELL_THRESHOLD, 'Sell', 'Hold')).flatten()
    else:
        probabilities = model.predict_proba(X)[:, 1]  # Probability of class 1 (uptrend)
        signals = np.where(probabilities > BUY_THRESHOLD, 'Buy',
                          np.where(probabilities < SELL_THRESHOLD, 'Sell', 'Hold')).flatten()
    df['Signal'] = signals
    return df

# Calculate gains/losses over the prediction horizon
def calculate_gains(df):
    df['Position'] = df['Signal'].shift(1)  # Shift to avoid look-ahead bias
    # Calculate return over the horizon period after a Buy signal
    df['Horizon_Return'] = (df['Future_Close'] - df['Close']) / df['Close']
    df['Trade_Return'] = df['Horizon_Return'] * (df['Position'] == 'Buy').astype(int)
    df['Cumulative_Gain'] = (1 + df['Trade_Return']).cumprod() - 1
    return df


In [16]:
end_date = datetime.now()
start_date = end_date - timedelta(days=365 + HORIZON)
df = yf.download('AAPL', start=start_date, end=end_date)

Failed to get ticker 'AAPL' reason: Expecting value: line 1 column 1 (char 0)
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AAPL']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')


In [11]:
# Fetch data
end_date = datetime.now()
start_date = end_date - timedelta(days=365 + HORIZON)
results = {}
for stock in stocks:
    df = get_stock_data(stock, start_date, end_date)
    X, y = prepare_data(df)
    models = train_models(X, y)
    results[stock] = {'df': df, 'models': models}

df.head()

Failed to get ticker 'AAPL' reason: Expecting value: line 1 column 1 (char 0)
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AAPL']: YFTzMissingError('$%ticker%: possibly delisted; No timezone found')


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [None]:
# Streamlit Dashboard
def main():
    st.title("Stock Trend Prediction Dashboard (30-Day Horizon)")
    
    st.write("Signals predict whether the stock will trend upward or downward over the next 30 days. "
             "'Buy' indicates an expected uptrend, 'Sell' a downtrend, and 'Hold' low confidence.")
    
    # Fetch data
    end_date = datetime.now()
    start_date = end_date - timedelta(days=365 + HORIZON)
    results = {}
    for stock in stocks:
        df = get_stock_data(stock, start_date, end_date)
        X, y = prepare_data(df)
        models = train_models(X, y)
        results[stock] = {'df': df, 'models': models}
    
    # Bar chart of accuracies
    accuracies = []
    for stock in stocks:
        for model_name, model_info in results[stock]['models'].items():
            accuracies.append({'Stock': stock, 'Model': model_name, 'Accuracy': model_info['accuracy']})
    acc_df = pd.DataFrame(accuracies)
    acc_df = acc_df.sort_values('Accuracy', ascending=False)
    
    st.subheader("Model Accuracy by Stock")
    fig = px.bar(acc_df, x='Stock', y='Accuracy', color='Model', barmode='group')
    st.plotly_chart(fig)
    
    # Stock and model selection
    selected_stock = st.selectbox("Select Stock", stocks)
    selected_model = st.selectbox("Select Model", ['nn', 'rf', 'svm'])
    
    df = results[selected_stock]['df']
    model_info = results[selected_stock]['models'][selected_model]
    model = model_info['model']
    
    # Generate signals and calculate gains
    df_signals = generate_signals(df, model, selected_model)
    df_signals = calculate_gains(df_signals)
    
    # Buy/Sell/Hold signals plot
    st.subheader(f"Buy/Sell/Hold Signals for {selected_stock} (30-Day Trend)")
    fig_signals = go.Figure()
    fig_signals.add_trace(go.Scatter(x=df_signals.index, y=df_signals['Close'], name='Close Price'))
    buy_signals = df_signals[df_signals['Signal'] == 'Buy']
    sell_signals = df_signals[df_signals['Signal'] == 'Sell']
    hold_signals = df_signals[df_signals['Signal'] == 'Hold']
    fig_signals.add_trace(go.Scatter(x=buy_signals.index, y=buy_signals['Close'], mode='markers', 
                                    name='Buy', marker=dict(symbol='triangle-up', size=10, color='green')))
    fig_signals.add_trace(go.Scatter(x=sell_signals.index, y=sell_signals['Close'], mode='markers', 
                                    name='Sell', marker=dict(symbol='triangle-down', size=10, color='red')))
    fig_signals.add_trace(go.Scatter(x=hold_signals.index, y=hold_signals['Close'], mode='markers', 
                                    name='Hold', marker=dict(symbol='circle', size=8, color='gray')))
    st.plotly_chart(fig_signals)
    
    # Cumulative gains
    st.subheader("Cumulative Gains (30-Day Holding Periods)")
    st.write(f"Total Gain/Loss: {df_signals['Cumulative_Gain'].iloc[-1]:.2%}")
    fig_gains = px.line(df_signals, x=df_signals.index, y='Cumulative_Gain', title='Cumulative Returns')
    st.plotly_chart(fig_gains)
    
    # Sentiment analysis chart
    st.subheader("Sentiment Analysis")
    sentiment = df['Sentiment'].iloc[-1]
    st.write(f"Latest Sentiment Score: {sentiment:.2f} (Positive > 0, Negative < 0)")
    fig_sentiment = px.line(df, x=df.index, y='Sentiment', title='Sentiment Trend')
    st.plotly_chart(fig_sentiment)
    
    # Backpropagation trend (for neural network)
    if selected_model == 'nn':
        st.subheader("Neural Network Backpropagation Trend")
        history = model_info['history']
        fig_loss = go.Figure()
        fig_loss.add_trace(go.Scatter(y=history.history['loss'], name='Training Loss'))
        fig_loss.add_trace(go.Scatter(y=history.history['val_loss'], name='Validation Loss'))
        fig_loss.update_layout(title='Neural Network Loss Over Epochs', xaxis_title='Epoch', yaxis_title='Loss')
        st.plotly_chart(fig_loss)
    
    # Technical indicators
    st.subheader("Technical Indicators")
    fig_tech = go.Figure()
    fig_tech.add_trace(go.Scatter(x=df.index, y=df['RSI'], name='RSI'))
    fig_tech.add_trace(go.Scatter(x=df.index, y=df['MACD'], name='MACD'))
    fig_tech.add_trace(go.Scatter(x=df.index, y=df['MACD_Signal'], name='MACD Signal'))
    st.plotly_chart(fig_tech)

if __name__ == "__main__":
    main()