In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import yfinance as yf
from textblob import TextBlob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from datetime import datetime, timedelta
print("Setup successful! TensorFlow version:", tf.__version__)

Setup successful! TensorFlow version: 2.20.0


In [7]:
STOCK_TICKER = 'TSLA'  # Example: Tesla; change to AAPL, etc.
START_DATE = '2022-01-01' 
END_DATE = datetime.now().strftime('%Y-%m-%d')  # Today

In [8]:
# Fetch stock data
stock_data = yf.download(STOCK_TICKER, start=START_DATE, end=END_DATE)
stock_data['Return'] = stock_data['Close'].pct_change()  # Daily return
stock_data['Target'] = np.where(stock_data['Return'].shift(-1) > 0, 1, 0)  # Binary: 1 = up, 0 = down (shifted for prediction)

# Drop NaN rows (from shifts)
stock_data = stock_data.dropna()

print("Stock Data Sample:")
print(stock_data.head())
print(f"Shape: {stock_data.shape}")

  stock_data = yf.download(STOCK_TICKER, start=START_DATE, end=END_DATE)
[*********************100%***********************]  1 of 1 completed

Stock Data Sample:
Price           Close       High        Low       Open     Volume    Return  \
Ticker           TSLA       TSLA       TSLA       TSLA       TSLA             
Date                                                                          
2020-01-03  29.534000  30.266666  29.128000  29.366667  266677500  0.029633   
2020-01-06  30.102667  30.104000  29.333332  29.364668  151995000  0.019255   
2020-01-07  31.270666  31.441999  30.224001  30.760000  268231500  0.038801   
2020-01-08  32.809334  33.232666  31.215334  31.580000  467164500  0.049205   
2020-01-09  32.089333  33.253334  31.524668  33.139999  426606000 -0.021945   

Price      Target  
Ticker             
Date               
2020-01-03      1  
2020-01-06      1  
2020-01-07      1  
2020-01-08      0  
2020-01-09      0  
Shape: (1422, 7)





In [13]:

# Alpha Vantage API setup (unchanged)
ALPHA_VANTAGE_URL = 'https://www.alphavantage.co/query'
ALPHA_VANTAGE_KEY = 'your_key_here'  # Replace with your key

# Function to fetch sentiment for a date range (batch) - unchanged
def fetch_sentiment_batch(time_from, time_to):
    params = {
        'function': 'NEWS_SENTIMENT',
        'tickers': STOCK_TICKER,
        'time_from': time_from,  # Format: YYYYMMDDT0000
        'time_to': time_to,
        'limit': 1000,
        'apikey': ALPHA_VANTAGE_KEY
    }
    response = requests.get(ALPHA_VANTAGE_URL, params=params)
    if response.status_code != 200:
        print(f"Error fetching sentiment: {response.text}")
        return {}
    
    data = response.json().get('feed', [])
    daily_sentiments = {}  # date: list of scores
    for article in data:
        pub_time = article.get('time_published', '')[:8]  # YYYYMMDD
        for ts in article.get('ticker_sentiment', []):
            if ts.get('ticker') == STOCK_TICKER:
                score = float(ts.get('ticker_sentiment_score', 0))
                if pub_time not in daily_sentiments:
                    daily_sentiments[pub_time] = []
                daily_sentiments[pub_time].append(score)
    
    # Average per day
    avg_daily = {date: np.mean(scores) if scores else 0 for date, scores in daily_sentiments.items()}
    return avg_daily

# Fetch sentiments in monthly batches to respect API limits
# Create with dtype=float to avoid downcasting warnings
sentiments_df = pd.DataFrame(index=stock_data.index, columns=['Sentiment'], dtype=float)
current_date = datetime.strptime(START_DATE, '%Y-%m-%d')
while current_date < datetime.now():
    batch_end = current_date + timedelta(days=30)
    batch_end = min(batch_end, datetime.now())
    
    time_from = current_date.strftime('%Y%m%dT0000')
    time_to = batch_end.strftime('%Y%m%dT2359')
    
    batch_sentiments = fetch_sentiment_batch(time_from, time_to)
    
    for date_str, avg_score in batch_sentiments.items():
        try:
            date = pd.to_datetime(date_str)
            if date in sentiments_df.index:
                sentiments_df.loc[date, 'Sentiment'] = avg_score
        except ValueError:
            pass  # Skip invalid dates
    
    current_date = batch_end + timedelta(days=1)

# Fill NaNs with 0 (neutral) and forward-fill - updated to avoid deprecation
sentiments_df = sentiments_df.fillna(0).ffill()

print("Sentiment Data Sample:")
print(sentiments_df.head())
print("\nRecent Sentiment Sample (check for non-zeros):")
print(sentiments_df.tail())
print("\nSentiment Stats (mean should be non-zero if data fetched):")
print(sentiments_df.describe())

Sentiment Data Sample:
            Sentiment
Date                 
2020-01-03          0
2020-01-06          0
2020-01-07          0
2020-01-08          0
2020-01-09          0


  sentiments_df.fillna(0, inplace=True)
  sentiments_df = sentiments_df.fillna(method='ffill')  # Optional: Carry forward last known sentiment
