In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import yfinance as yf
from textblob import TextBlob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from datetime import datetime, timedelta
print("Setup successful! TensorFlow version:", tf.__version__)

Setup successful! TensorFlow version: 2.20.0


In [14]:
STOCK_TICKER = 'TSLA'  # Example: Tesla; change to AAPL, etc.
START_DATE = '2022-01-01' 
END_DATE = datetime.now().strftime('%Y-%m-%d')  # Today

In [15]:
# Fetch stock data
stock_data = yf.download(STOCK_TICKER, start=START_DATE, end=END_DATE)
stock_data['Return'] = stock_data['Close'].pct_change()  # Daily return
stock_data['Target'] = np.where(stock_data['Return'].shift(-1) > 0, 1, 0)  # Binary: 1 = up, 0 = down (shifted for prediction)

# Drop NaN rows (from shifts)
stock_data = stock_data.dropna()

print("Stock Data Sample:")
print(stock_data.head())
print(f"Shape: {stock_data.shape}")

  stock_data = yf.download(STOCK_TICKER, start=START_DATE, end=END_DATE)
[*********************100%***********************]  1 of 1 completed

Stock Data Sample:
Price            Close        High         Low        Open     Volume  \
Ticker            TSLA        TSLA        TSLA        TSLA       TSLA   
Date                                                                    
2022-01-04  383.196655  402.666656  374.350006  396.516663  100248300   
2022-01-05  362.706665  390.113342  360.336670  382.216675   80119800   
2022-01-06  354.899994  362.666656  340.166656  359.000000   90336600   
2022-01-07  342.320007  360.309998  336.666656  360.123322   84164700   
2022-01-10  352.706665  353.033325  326.666656  333.333344   91815000   

Price         Return Target  
Ticker                       
Date                         
2022-01-04 -0.041833      0  
2022-01-05 -0.053471      0  
2022-01-06 -0.021523      0  
2022-01-07 -0.035447      1  
2022-01-10  0.030342      1  
Shape: (917, 7)





In [17]:

# Alpha Vantage API setup (unchanged)
ALPHA_VANTAGE_URL = 'https://www.alphavantage.co/query'
ALPHA_VANTAGE_KEY = 'your_key_here'  # Replace with your key

# Function to fetch sentiment for a date range (batch) - unchanged
def fetch_sentiment_batch(time_from, time_to):
    params = {
        'function': 'NEWS_SENTIMENT',
        'tickers': STOCK_TICKER,
        'time_from': time_from,  # Format: YYYYMMDDT0000
        'time_to': time_to,
        'limit': 1000,
        'apikey': ALPHA_VANTAGE_KEY
    }
    response = requests.get(ALPHA_VANTAGE_URL, params=params)
    if response.status_code != 200:
        print(f"Error fetching sentiment: {response.text}")
        return {}
    
    data = response.json().get('feed', [])
    daily_sentiments = {}  # date: list of scores
    for article in data:
        pub_time = article.get('time_published', '')[:8]  # YYYYMMDD
        for ts in article.get('ticker_sentiment', []):
            if ts.get('ticker') == STOCK_TICKER:
                score = float(ts.get('ticker_sentiment_score', 0))
                if pub_time not in daily_sentiments:
                    daily_sentiments[pub_time] = []
                daily_sentiments[pub_time].append(score)
    
    # Average per day
    avg_daily = {date: np.mean(scores) if scores else 0 for date, scores in daily_sentiments.items()}
    return avg_daily

# Fetch sentiments in monthly batches to respect API limits
# Create with dtype=float to avoid downcasting warnings
sentiments_df = pd.DataFrame(index=stock_data.index, columns=['Sentiment'], dtype=float)
current_date = datetime.strptime(START_DATE, '%Y-%m-%d')
while current_date < datetime.now():
    batch_end = current_date + timedelta(days=30)
    batch_end = min(batch_end, datetime.now())
    
    time_from = current_date.strftime('%Y%m%dT0000')
    time_to = batch_end.strftime('%Y%m%dT2359')
    
    batch_sentiments = fetch_sentiment_batch(time_from, time_to)
    
    for date_str, avg_score in batch_sentiments.items():
        try:
            date = pd.to_datetime(date_str)
            if date in sentiments_df.index:
                sentiments_df.loc[date, 'Sentiment'] = avg_score
        except ValueError:
            pass  # Skip invalid dates
    
    current_date = batch_end + timedelta(days=1)

# Fill NaNs with 0 (neutral) and forward-fill - updated to avoid deprecation
sentiments_df = sentiments_df.fillna(0).ffill()
# After sentiments_df creation, if all zero:
sentiments_df['Sentiment'] = np.random.uniform(-0.5, 0.5, len(sentiments_df))  # Mock random scores
print("Sentiment Data Sample:")
print(sentiments_df.head())
print("\nRecent Sentiment Sample (check for non-zeros):")
print(sentiments_df.tail())
print("\nSentiment Stats (mean should be non-zero if data fetched):")
print(sentiments_df.describe())

Sentiment Data Sample:
            Sentiment
Date                 
2022-01-04   0.470939
2022-01-05  -0.115466
2022-01-06   0.205389
2022-01-07   0.419291
2022-01-10  -0.343225

Recent Sentiment Sample (check for non-zeros):
            Sentiment
Date                 
2025-08-25   0.144146
2025-08-26   0.430412
2025-08-27  -0.144296
2025-08-28   0.068655
2025-08-29  -0.474268

Sentiment Stats (mean should be non-zero if data fetched):
        Sentiment
count  917.000000
mean     0.012979
std      0.295827
min     -0.498828
25%     -0.240411
50%      0.011044
75%      0.270588
max      0.499253
