# Notebook 03: Robustness & Heterogeneity

**Goal:** Test if the +0.45% result holds under different specifications.

---

## What I'm Testing

Main finding: Lockup expirations associated with +0.45% price increase (p=0.0004).

**Questions:**
- Does it hold for different lockup windows?
- Does it hold without outliers?
- Does effect differ by company size?
- Does effect differ by sector?
- Are there significant effects at fake lockup dates? (placebo test)

## Setup

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from linearmodels.panel import PanelOLS
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

fig_output_dir = Path("../outputs/figures")
fig_output_dir.mkdir(parents=True, exist_ok=True)

results_output_dir = Path("../outputs/results")
results_output_dir.mkdir(parents=True, exist_ok=True)


In [2]:
# Load data
df = pd.read_csv('../data/processed/stock_prices_ipo_adjusted.csv',
                 parse_dates=['Date', 'IPO_Date'])

df['Post_Lockup'] = (df['Days_Since_IPO'] > 180).astype(int)
df['Days_To_Lockup'] = df['Days_Since_IPO'] - 180

df_clean = df.dropna(subset=['Abnormal_Return']).copy()

print(f"\nData: {len(df_clean):,} observations, {df_clean['Ticker'].nunique()} companies")


Data: 17,802 observations, 71 companies


## Helper Function

In [3]:
def run_twfe_did(data, label=""):
    """
    Run TWFE DiD, return results or None if fails.
    
    Parameters:
    -----------
    data : DataFrame with Ticker, Date, Abnormal_Return, Post_Lockup
    label : str
    
    Returns:
    --------
    dict or None
    """
    # Check basics
    if data['Post_Lockup'].nunique() < 2:
        return None
    
    if len(data) < 100:
        return None
    
    try:
        df_panel = data.set_index(['Ticker', 'Date'])
        
        model = PanelOLS(
            dependent=df_panel['Abnormal_Return'],
            exog=df_panel[['Post_Lockup']],
            entity_effects=True,
            time_effects=True,
            check_rank=False
        ).fit(cov_type='clustered', cluster_entity=True)
        
        coef = model.params['Post_Lockup']
        se = model.std_errors['Post_Lockup']
        pval = model.pvalues['Post_Lockup']
        
        return {
            'label': label,
            'coef': coef,
            'se': se,
            'pval': pval,
            'ci_lower': coef - 1.96*se,
            'ci_upper': coef + 1.96*se,
            'n_obs': int(model.nobs),
            'n_companies': data['Ticker'].nunique(),
            'significant': pval < 0.05
        }
    except:
        return None

print("Helper function loaded")

Helper function loaded


## Section 1: Alternative Specifications

### 1a. Baseline

In [4]:
print("\n" + "="*80)
print("BASELINE")

baseline = run_twfe_did(df_clean, "Baseline (Day 180)")

print(f"\nCoefficient: {baseline['coef']:.4f}%")
print(f"P-value: {baseline['pval']:.4f}")
print(f"95% CI: [{baseline['ci_lower']:.4f}%, {baseline['ci_upper']:.4f}%]")


BASELINE

Coefficient: 0.4545%
P-value: 0.0004
95% CI: [0.2019%, 0.7072%]


### 1b. Different Lockup Windows

In [5]:
print("\n" + "="*80)
print("DIFFERENT LOCKUP WINDOWS")

windows = [90, 150, 180, 210, 270]
window_results = []

for window in windows:
    df_temp = df_clean.copy()
    df_temp['Post_Lockup'] = (df_temp['Days_Since_IPO'] > window).astype(int)
    
    result = run_twfe_did(df_temp, f"Day {window}")
    
    if result:
        window_results.append(result)
        sig = "✓" if result['significant'] else "✗"
        print(f"Day {window}: {result['coef']:+.4f}% (p={result['pval']:.4f}) {sig}")

# Visualize
if len(window_results) > 0:
    fig = go.Figure()
    
    x = [r['label'] for r in window_results]
    y = [r['coef'] for r in window_results]
    errors = [r['se']*1.96 for r in window_results]
    colors = ['green' if r['significant'] else 'gray' for r in window_results]
    
    fig.add_trace(go.Bar(
        x=x, y=y,
        error_y=dict(type='data', array=errors),
        marker_color=colors
    ))
    
    fig.add_hline(y=0, line_dash="dash", line_color="black")
    
    fig.update_layout(
        title='Treatment Effects: Different Windows',
        xaxis_title='Lockup Day',
        yaxis_title='Treatment Effect (%)',
        height=500,
        template='plotly_white'
    )
    
    fig.show()
    fig.write_image(f"{fig_output_dir}/03_treatment_effects_time_windows.png", scale=2)



DIFFERENT LOCKUP WINDOWS
Day 90: -0.2253% (p=0.0295) ✓
Day 150: +0.3312% (p=0.0093) ✓
Day 180: +0.4545% (p=0.0004) ✓
Day 210: +0.3784% (p=0.0015) ✓
Day 270: +0.0975% (p=0.4446) ✗


### 1c. Exclude Outliers

In [6]:
print("\n" + "="*80)
print("EXCLUDE OUTLIERS")

p5 = df_clean['Abnormal_Return'].quantile(0.05)
p95 = df_clean['Abnormal_Return'].quantile(0.95)

df_no_outliers = df_clean[
    (df_clean['Abnormal_Return'] >= p5) & 
    (df_clean['Abnormal_Return'] <= p95)
].copy()

print(f"\nDropped: {len(df_clean) - len(df_no_outliers):,} observations")

no_outliers = run_twfe_did(df_no_outliers, "No outliers")

if no_outliers:
    print(f"\nBaseline: {baseline['coef']:+.4f}%")
    print(f"No outliers: {no_outliers['coef']:+.4f}%")
    
    if abs(baseline['coef'] - no_outliers['coef']) < 0.1:
        print("✓ Robust to outliers")
    else:
        print("✗ Sensitive to outliers")


EXCLUDE OUTLIERS

Dropped: 1,782 observations

Baseline: +0.4545%
No outliers: +0.2456%
✗ Sensitive to outliers


## Section 2: Company Size

In [7]:
print("\n" + "="*80)
print("HETEROGENEITY: COMPANY SIZE")

# Use first week average as size proxy
first_week = df_clean[df_clean['Days_Since_IPO'].between(1, 7)].copy()
avg_price = first_week.groupby('Ticker')['Close'].mean()
avg_volume = first_week.groupby('Ticker')['Volume'].mean()
market_cap_proxy = avg_price * avg_volume

median_cap = market_cap_proxy.median()

large_tickers = market_cap_proxy[market_cap_proxy > median_cap].index.tolist()
small_tickers = market_cap_proxy[market_cap_proxy <= median_cap].index.tolist()

print(f"\nLarge IPOs: {len(large_tickers)}")
print(f"Small IPOs: {len(small_tickers)}")

df_large = df_clean[df_clean['Ticker'].isin(large_tickers)].copy()
df_small = df_clean[df_clean['Ticker'].isin(small_tickers)].copy()

result_large = run_twfe_did(df_large, "Large")
result_small = run_twfe_did(df_small, "Small")

if result_large and result_small:
    print(f"\nLarge: {result_large['coef']:+.4f}% (p={result_large['pval']:.4f})")
    print(f"Small: {result_small['coef']:+.4f}% (p={result_small['pval']:.4f})")
    
    # Visualize
    fig = go.Figure()
    
    x = ['Large IPOs', 'Small IPOs']
    y = [result_large['coef'], result_small['coef']]
    errors = [result_large['se']*1.96, result_small['se']*1.96]
    colors = ['green' if r['significant'] else 'gray' 
              for r in [result_large, result_small]]
    
    fig.add_trace(go.Bar(
        x=x, y=y,
        error_y=dict(type='data', array=errors),
        marker_color=colors
    ))
    
    fig.add_hline(y=0, line_dash="dash", line_color="black")
    
    fig.update_layout(
        title='Treatment Effects: Large vs Small IPOs',
        yaxis_title='Treatment Effect (%)',
        height=500,
        template='plotly_white'
    )
    
    fig.show()
    fig.write_image(f"{fig_output_dir}/04_treatment_effects_company_size.png", scale=2)
else:
    print("✗ Estimation failed")


HETEROGENEITY: COMPANY SIZE

Large IPOs: 35
Small IPOs: 36

Large: +0.6312% (p=0.0000)
Small: +0.1185% (p=0.4574)


## Section 3: Placebo Tests

Test for effects at fake lockup dates.

In [8]:
print("\n" + "="*80)
print("PLACEBO TESTS")

placebo_days = [60, 90, 120, 180, 240, 270, 300]
placebo_results = []

for day in placebo_days:
    df_temp = df_clean.copy()
    df_temp['Post_Lockup'] = (df_temp['Days_Since_IPO'] > day).astype(int)
    
    result = run_twfe_did(df_temp, f"Day {day}")
    
    if result:
        placebo_results.append(result)
        marker = "← REAL" if day == 180 else ""
        sig = "✓" if result['significant'] else "✗"
        print(f"Day {day}: {result['coef']:+.4f}% (p={result['pval']:.4f}) {sig} {marker}")

# Visualize
if len(placebo_results) > 0:
    fig = go.Figure()
    
    x = [r['label'] for r in placebo_results]
    y = [r['coef'] for r in placebo_results]
    errors = [r['se']*1.96 for r in placebo_results]
    colors = ['red' if 'Day 180' in r['label'] else 'lightgray' for r in placebo_results]
    
    fig.add_trace(go.Bar(
        x=x, y=y,
        error_y=dict(type='data', array=errors),
        marker_color=colors
    ))
    
    fig.add_hline(y=0, line_dash="dash", line_color="black")
    
    fig.update_layout(
        title='Placebo Tests: Real vs Fake Lockup Dates',
        xaxis_title='Day',
        yaxis_title='Treatment Effect (%)',
        height=500,
        template='plotly_white'
    )
    
    fig.show()
    fig.write_image(f"{fig_output_dir}/05_placebo_tests.png", scale=2)
    
    sig_placebos = sum([r['significant'] for r in placebo_results if 'Day 180' not in r['label']])
    print(f"\nSignificant placebo tests: {sig_placebos} out of {len(placebo_results)-1}")


PLACEBO TESTS
Day 60: -0.4078% (p=0.0007) ✓ 
Day 90: -0.2253% (p=0.0295) ✓ 
Day 120: +0.0111% (p=0.9299) ✗ 
Day 180: +0.4545% (p=0.0004) ✓ ← REAL
Day 240: +0.2440% (p=0.0292) ✓ 
Day 270: +0.0975% (p=0.4446) ✗ 
Day 300: -0.1045% (p=0.3449) ✗ 



Significant placebo tests: 3 out of 6


## Summary

In [9]:
print("\n" + "="*80)
print("SUMMARY")

print("\nBaseline: +0.45% (p=0.0004)")

print("\n1. Different windows:")
print("   Effect strongest around Day 180")
print("   Weakens at Day 270 (too late)")

print("\n2. Company size:")
if result_large and result_small:
    print(f"   Large IPOs: {result_large['coef']:+.4f}% (significant)")
    print(f"   Small IPOs: {result_small['coef']:+.4f}% (not significant)")
    print("   → Effect driven by large IPOs")

print("\n3. Placebo tests:")
if len(placebo_results) > 0:
    sig_count = sum([r['significant'] for r in placebo_results if 'Day 180' not in r['label']])
    if sig_count > 2:
        print(f"   ✗ Multiple fake dates significant ({sig_count})")
        print("   → Suggests spurious effects (general drift)")
    else:
        print(f"   ✓ Few fake dates significant ({sig_count})")
        print("   → Effect appears lockup-specific")

print("\n4. Overall:")
print("   - Effect concentrated around Day 180")
print("   - Driven by large IPOs (more liquid, efficient)")
print("   - Placebo tests raise questions about causality")

# Save results
all_results = []

if baseline:
    baseline['category'] = 'baseline'
    all_results.append(baseline)

for r in window_results:
    r['category'] = 'windows'
    all_results.append(r)

for r in placebo_results:
    r['category'] = 'placebos'
    all_results.append(r)

results_df = pd.DataFrame(all_results)
results_df.to_csv(f'{results_output_dir}/robustness_results.csv', index=False)

print(f"\nSaved: {results_output_dir}/robustness_results.csv")
print("✓ Complete")


SUMMARY

Baseline: +0.45% (p=0.0004)

1. Different windows:
   Effect strongest around Day 180
   Weakens at Day 270 (too late)

2. Company size:
   Large IPOs: +0.6312% (significant)
   Small IPOs: +0.1185% (not significant)
   → Effect driven by large IPOs

3. Placebo tests:
   ✗ Multiple fake dates significant (3)
   → Suggests spurious effects (general drift)

4. Overall:
   - Effect concentrated around Day 180
   - Driven by large IPOs (more liquid, efficient)
   - Placebo tests raise questions about causality

Saved: ../outputs/results/robustness_results.csv
✓ Complete
