## Stock Market Data - Intraday Analysis
This notebook filters stock market data based on earnings day, focusing on the stock price movements around that date.
Data is fetched using the `yfinance` library, and the analysis includes plotting stock prices using `plotly`.

In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
import plotly.graph_objects as go


## User Date Input for Filtering
Prompt the user to input a **start** and **end** date for analysis.  
- These dates will be used to filter both the earnings release events and the historical stock data.  
- Validation ensures that the dates are in the correct format and that the end date is not earlier than the start date.


In [2]:
# Ask user to input two dates
while True:
    try:
        date_input_1 = input("Enter start date (YYYY-MM-DD): ")
        date_input_2 = input("Enter end date (YYYY-MM-DD): ")
        
        start_filter = datetime.strptime(date_input_1, "%Y-%m-%d").date()
        end_filter = datetime.strptime(date_input_2, "%Y-%m-%d").date()
        
        if end_filter < start_filter:
            print("End date must be later than or equal to start date. Try again.")
        else:
            break
    except ValueError:
        print("Invalid date format. Please use YYYY-MM-DD.")

### Select Stock Ticker
Manually define the stock symbol (`"AAPL"` for Apple, `"NVDA"` for NVIDIA, `"AMZN"` for Amazon).  
This ticker will be used for both historical stock data and earnings calendar retrieval.

In [3]:

# PARAMETER: choose your stock symbol
TICKER = "AMZN"  

### Retrieve and Clean Earnings Dates
- Convert the index to a timezone-aware datetime and store dates in a clean list.  
- Remove any future earnings dates so only past events are considered.
- Create a custom function that classifies the market reaction to earnings into **five categories**:Strong Positive, Positive, Neutral, Negative, Strong Negative  
- Classification is based on the percentage change in stock price between **T-2** (two trading days before earnings) and **T+2** (two trading days after earnings).

In [4]:

# Download earnings calendar from yfinance
stock = yf.Ticker(TICKER)
earnings = stock.get_earnings_dates(limit=20)

# Clean up the earnings dataframe
earnings.index = pd.to_datetime(earnings.index).tz_localize(None)
earnings["Earnings Date"] = earnings.index.date
earnings_dates = earnings["Earnings Date"].tolist()

# Filter out future earnings
today = datetime.today().date()
earnings_dates = [d for d in earnings_dates if d <= today]

# Define sentiment classifier
def categorize_sentiment(change):
    if change > 5:
        return "Strong Positive"
    elif change > 1:
        return "Positive"
    elif change >= -1:
        return "Neutral"
    elif change >= -5:
        return "Negative"
    else:
        return "Strong Negative"

# Loop through earnings dates and calculate t-2 to t+2 price change
results = []

for ed in earnings_dates:
    ed = pd.to_datetime(ed)
    start_date = ed - pd.Timedelta(days=5)
    end_date = ed + pd.Timedelta(days=5)
    
    data = yf.Ticker(TICKER).history(start=start_date, end=end_date)[['Close']].copy()
    data.index = data.index.tz_localize(None)
    trading_days = data.index

    try:
        t_minus_2 = trading_days[trading_days < ed][-2]
        t_plus_2 = trading_days[trading_days > ed][1]
    except IndexError:
        continue

    price_t_minus_2 = data.loc[t_minus_2, 'Close']
    price_t_plus_2 = data.loc[t_plus_2, 'Close']
    pct_change = ((price_t_plus_2 - price_t_minus_2) / price_t_minus_2) * 100
    sentiment = categorize_sentiment(pct_change)
    
    eps_row = earnings.loc[earnings["Earnings Date"] == ed.date()].iloc[0]

    results.append({
        "Earnings Date": ed.date(),
        "T-2 Date": t_minus_2.date(),
        "T+2 Date": t_plus_2.date(),
        "T-2 Price": round(price_t_minus_2, 2),
        "T+2 Price": round(price_t_plus_2, 2),
        "% Change": round(pct_change, 2),
        "Sentiment": sentiment,
        "EPS Estimate": eps_row["EPS Estimate"],
        "Reported EPS": eps_row["Reported EPS"],
        "Surprise(%)": eps_row["Surprise(%)"]
    })

df_results = pd.DataFrame(results)

In [5]:

# Filter by user-specified date range
df_results = df_results[(df_results["Earnings Date"] >= start_filter) & (df_results["Earnings Date"] <= end_filter)]

if df_results.empty:
    print("No earnings dates found within this range.")
else:
    print(df_results)

    earliest_date = pd.to_datetime(min(df_results["Earnings Date"])) - pd.Timedelta(days=30)

    df_full = yf.Ticker(TICKER).history(start=earliest_date)[['Close']].copy()
    df_full.index = df_full.index.tz_localize(None)
    df_full['Date'] = df_full.index

    # Filter full stock price data to match chart window
    df_full = df_full[(df_full['Date'].dt.date >= start_filter) & (df_full['Date'].dt.date <= end_filter)]

    sentiment_colors = {
        "Strong Positive": "green",
        "Positive": "lime",
        "Neutral": "gray",
        "Negative": "orange",
        "Strong Negative": "red"
    }

# Plotly figure
fig = go.Figure()

# Add stock price line
fig.add_trace(go.Scatter(
    x=df_full['Date'], y=df_full['Close'],
    mode='lines', name=f'{TICKER} Price',
    line=dict(color='black')
))

# Add sentiment scatter markers
for _, row in df_results.iterrows():
    earnings_date = pd.to_datetime(row["Earnings Date"])
    price = df_full.loc[df_full['Date'] == earnings_date, 'Close']
    if price.empty:
        closest_idx = (df_full['Date'] - earnings_date).abs().idxmin()
        price = df_full.loc[closest_idx, 'Close']
        earnings_date = df_full.loc[closest_idx, 'Date']
    else:
        price = price.values[0]

    fig.add_trace(go.Scatter(
        x=[earnings_date], y=[price],
        mode='markers+text',
        marker=dict(
            color=sentiment_colors[row["Sentiment"]],
            size=12,
            line=dict(color='black', width=1)
        ),
        name=row["Sentiment"],
        hovertemplate=(
            f"<b>{earnings_date.date()}</b><br>"
            f"Surprise: {row['Surprise(%)']}%<br>"
            f"Change: {row['% Change']}%<br>"
            f"Reported EPS: {row['Reported EPS']}<br>"
            f"Estimate EPS: {row['EPS Estimate']}"
        ),
        showlegend=False
    ))

# Customize layout
fig.update_layout(
    title=f"{TICKER} Earnings Sentiment — EPS Surprise & Stock Reaction",
    xaxis_title="Date",
    yaxis_title="Close Price (USD)",
    template="plotly_white",
    legend_title="Sentiment",
    hovermode="closest"
)

# Manual sentiment legend
for sentiment, color in sentiment_colors.items():
    fig.add_trace(go.Scatter(
        x=[None], y=[None],
        mode='markers',
        marker=dict(size=10, color=color),
        legendgroup=sentiment,
        showlegend=True,
        name=sentiment
    ))

fig.show()

  Earnings Date    T-2 Date    T+2 Date  T-2 Price  T+2 Price  % Change  \
2    2024-10-31  2024-10-29  2024-11-04     190.83     195.78      2.59   
3    2024-08-01  2024-07-30  2024-08-05     181.71     161.02    -11.39   
4    2024-04-30  2024-04-26  2024-05-02     179.62     184.72      2.84   
5    2024-02-01  2024-01-30  2024-02-05     159.00     170.31      7.11   

         Sentiment  EPS Estimate  Reported EPS  Surprise(%)  
2         Positive          1.14          1.43        25.17  
3  Strong Negative          1.03          1.26        22.58  
4         Positive          0.83          0.98        17.91  
5  Strong Positive          0.80          1.00        24.55  


EPS Surprise (%), which measures how much the company’s reported earnings per share (EPS) differ from analyst expectations.

## Intraday Earning Release Stock Price Data

In [6]:
print("Available Earnings Dates:")
for idx, date in enumerate(df_results["Earnings Date"]):
    print(f"{idx + 1}: {date}")


Available Earnings Dates:
1: 2024-10-31
2: 2024-08-01
3: 2024-04-30
4: 2024-02-01


### Analyze Stock Price Movements
- Focus on the stock price movements around the earnings release dates (limited to 730 days by `yfinance`).
- Use the filtered historical stock data to analyze price changes.
- Visualize the stock price movements using `plotly` for better insights.


In [7]:
# Prompt user to choose a date by index
while True:
    try:
        choice = int(input("Enter the number of the earnings date you'd like to view intraday (e.g., 1): "))
        if 1 <= choice <= len(df_results):
            selected_row = df_results.iloc[choice - 1]
            break
        else:
            print("Invalid choice. Try again.")
    except ValueError:
        print("Please enter a valid integer.")

# Extract selected date and use the same ticker from earlier analysis
selected_date = selected_row["Earnings Date"]
print(f"Fetching intraday data for {TICKER} on {selected_date}")

intraday_data = None

for interval in ["60m"]:
    try:
        # Get data for the earnings date plus other dates
        start_date = pd.to_datetime(selected_date) - timedelta(days=1)
        end_date = pd.to_datetime(selected_date) + timedelta(days=2)
        
        temp_data = yf.download(
            TICKER,
            start=start_date.strftime('%Y-%m-%d'),
            end=end_date.strftime('%Y-%m-%d'),
            interval=interval
        )
        
        if not temp_data.empty:
            # Filter to just the earnings date
            temp_data.index = temp_data.index.tz_localize(None)
            earnings_date_data = temp_data[temp_data.index.date == selected_date]
            
            if not earnings_date_data.empty:
                intraday_data = earnings_date_data
                print(f"Found {len(intraday_data)} data points")
                break
            else:
                print(f"No data for earnings date with {interval} interval")
        else:
            print(f"No data available with {interval} interval")
    except Exception as e:
        print(f"Error with {interval} interval: {e}")
        continue

# Handle empty data
if intraday_data is None or intraday_data.empty:
    print(f"No intraday data found for {TICKER} on {selected_date}.")
    print("This could be because:")
    print("- The date falls on a weekend or holiday")
    print("- Limited trading data available for this date")
    print("- The stock was not actively traded on this date")
    print("- The date is too far in the past (yfinance only provides intraday data for the last 730 days)")
else:
    # Clean and prepare the data
    intraday_data.reset_index(inplace=True)
    
    # Prepare data for plotting 
    data = intraday_data[['Datetime', 'Close']].copy()
    data.rename(columns={'Datetime': 'Time', 'Close': 'Price'}, inplace=True)

    # Flatten multi-index columns
    if isinstance(data.columns, pd.MultiIndex):
        data.columns = ['_'.join(col).strip() if col[1] else col[0] for col in data.columns]

    # Create the plot
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=data['Time'],
        y=data[f'Price_{TICKER.upper()}'],
        mode='lines+markers',
        name=TICKER,
        line=dict(color='grey', width=2),
        marker=dict(color='grey', size=8, line=dict(color='grey', width=1)),
        hovertemplate="<b>%{x|%H:%M}</b><br>Price: $%{y:.2f}<extra></extra>"
    ))

    # Layout and axis formatting
    fig.update_layout(
        title=f"{TICKER} Intraday Price on {selected_date.strftime('%Y-%m-%d')}",
        xaxis_title="Time (Hour:Minute)",
        yaxis_title="Price (USD)",
        template="plotly_white",
        hovermode="closest",
        width=1000,
        height=600
    )

    fig.update_xaxes(tickformat="%H:%M", tickangle=45)
    fig.update_yaxes(tickformat=".2f", dtick=1)

    fig.show()

    print(f"\n=== Data Summary ===")
    print(f"Date: {selected_date}")
    print(f"Ticker: {TICKER}")
    print(f"Data points: {len(data)}")
    print(f"Price range: ${data[f'Price_{TICKER.upper()}'].min():.2f} - ${data[f'Price_{TICKER.upper()}'].max():.2f}")
    print(f"Time span: {data['Time'].dt.time.min()} to {data['Time'].dt.time.max()}")

Fetching intraday data for AMZN on 2024-10-31



YF.download() has changed argument auto_adjust default to True

[*********************100%***********************]  1 of 1 completed

Found 7 data points






=== Data Summary ===
Date: 2024-10-31
Ticker: AMZN
Data points: 7
Price range: $185.63 - $187.30
Time span: 13:30:00 to 19:30:00


## Looking at the Intraday Stock Price Movements 
- Pull intraday stock price data for the **day before** and **day after** each earnings date 
- Compare each intraday datapoint's price to the corresponding time the previous day.  
- Calculate and plot **hourly percentage changes** to visualize market reaction in finer detail.

In [8]:
# If the DataFrame has multi-index columns, flatten them
if isinstance(temp_data.columns, pd.MultiIndex):
    temp_data.columns = ['_'.join(col).strip() if col[1] else col[0] for col in temp_data.columns]

# Keep only Datetime and Close price
temp_data = temp_data.reset_index()[['Datetime', f'Close_{TICKER.upper()}']].copy()

# Rename
temp_data.rename(columns={f'Close_{TICKER.upper()}': 'Price'}, inplace=True)

temp_data['Time'] = pd.to_datetime(temp_data['Datetime'])
temp_data['Price'] = temp_data['Price'].round(2)
temp_data['Date'] = temp_data['Time'].dt.date
temp_data['HourKey'] = temp_data['Time'].dt.strftime('%H:%M')   # hour-of-day key

# Build previous-day reference aligned by hour-of-day
prev = temp_data[['Date', 'HourKey', 'Price']].rename(columns={'Price': 'PrevDayPrice'})
prev['Date'] = prev['Date'] + timedelta(days=1)  # shift forward so it lines up on merge

# Merge to attach previous day's same-hour price
temp_data = temp_data.merge(prev, on=['Date', 'HourKey'], how='left')

# % change vs previous day same hour
temp_data['PctChange_vsPrevDay'] = 100.0 * (temp_data['Price'] - temp_data['PrevDayPrice']) / temp_data['PrevDayPrice']
temp_data['PctLabel'] = temp_data['PctChange_vsPrevDay'].map(lambda v: f"{v:.2f}%" if pd.notna(v) else "N/A")

# Split three days
day_before = selected_date - timedelta(days=1)
day_after  = selected_date + timedelta(days=1)

df_before   = temp_data[temp_data['Date'] == day_before]
df_selected = temp_data[temp_data['Date'] == selected_date]
df_after    = temp_data[temp_data['Date'] == day_after]

# Plot
fig = go.Figure()

# Day -1 
fig.add_trace(go.Scatter(
    x=df_before['Time'], y=df_before['Price'],
    mode='lines+markers', name=f"{day_before} (prev day)",
    line=dict(color='grey', width=2),
    marker=dict(color='grey', size=6, line=dict(color='grey', width=1)),
    hovertemplate="<b>%{x|%H:%M}</b><br>Price: $%{y:.2f}<extra></extra>"
))

# Day of
fig.add_trace(go.Scatter(
    x=df_selected['Time'], y=df_selected['Price'],
    mode='lines+markers', name=f"{selected_date} (selected day)",
    line=dict(color='black', width=3),
    marker=dict(color='black', size=8, line=dict(color='black', width=1)),
    customdata=df_selected[['PctLabel']].values,  
    hovertemplate="<b>%{x|%H:%M}</b><br>"
                  "Price: $%{y:.2f}<br>"
                  "%Δ vs prev day: %{customdata[0]}<extra></extra>"
))

# Day +1
fig.add_trace(go.Scatter(
    x=df_after['Time'], y=df_after['Price'],
    mode='lines+markers', name=f"{day_after} (next day)",
    line=dict(color='grey', width=2),
    marker=dict(color='grey', size=6, line=dict(color='grey', width=1)),
    hovertemplate="<b>%{x|%H:%M}</b><br>Price: $%{y:.2f}<extra></extra>"
))

fig.update_layout(
    title=f"AAPL Stock Price Intraday on {selected_date} ±1 Day",
    xaxis_title="Time (Hour:Minute)",
    yaxis_title="Price (USD)",
    template="plotly_white",
    hovermode="closest",
    width=1000, height=600
)
fig.update_xaxes(tickformat="%H:%M", tickangle=45)
fig.update_yaxes(tickformat=".2f", dtick=1)

fig.show()
