In [1]:
pip install pyarrow

[0mNote: you may need to restart the kernel to use updated packages.


In [1]:
import os
import pandas as pd

merged = pd.read_parquet("selected_features_83.parquet")
print(merged.head())

        date  Volume_AAPL  Volume_AMZN  Volume_AMD  Volume_CSCO  Volume_ERIC  \
0 1997-01-01  535796800.0  322352000.0   7843200.0   53076000.0    5240800.0   
1 1997-02-01  535796800.0  322352000.0   7843200.0   53076000.0    5240800.0   
2 1997-03-01  535796800.0  322352000.0   7843200.0   53076000.0    5240800.0   
3 1997-04-01  535796800.0  322352000.0   7843200.0   53076000.0    5240800.0   
4 1997-05-01  535796800.0  322352000.0   7843200.0   53076000.0    5240800.0   

   Volume_ADBE  Volume_FCEL  Volume_FORD  Rolling_Std_30d  ...  \
0    7384400.0         22.0       4260.0        20.100536  ...   
1    7384400.0         22.0       4260.0        20.100536  ...   
2    7384400.0         22.0       4260.0        20.100536  ...   
3    7384400.0         22.0       4260.0        20.100536  ...   
4    7384400.0         22.0       4260.0        20.100536  ...   

   Interest Rate_lag30_lag90  Unemployment_lag90_rolling90_rolling180  \
0                       1.01                     

In [3]:
# --- Tag market stress, shock, and recession ---
merged["market_stress_flag"] = 0
merged["market_shock_flag"] = 0
merged["nber_recession_flag"] = 0

# Define recession (NBER-style macro periods)
recession_periods = [
    ("2000-03-01", "2002-10-31"),
    ("2007-12-01", "2009-06-30"),
    ("2011-08-01", "2011-08-31"),
    ("2020-03-01", "2020-03-31"),
    ("2022-01-01", "2022-06-30"),
    ("2023-03-01", "2023-03-31"),
]

for start, end in recession_periods:
    mask = (merged['date'] >= start) & (merged['date'] <= end)
    merged.loc[mask, "market_stress_flag"] = 1
    merged.loc[mask, "nber_recession_flag"] = 1

# Define market shock by VIX > 40 from best available column
vix_columns = [col for col in merged.columns if "vix" in col.lower()]
vix_column = None
for vix in ["vix_index", "linchpin__vix_index", "adj close_^vix"]:
    if vix in merged.columns:
        vix_column = vix
        break

if vix_column:
    merged.loc[merged[vix_column] > 40, "market_shock_flag"] = 1
    print(f"Shock tag applied using {vix_column}")
else:
    print("No VIX column found for shock tagging.")

# Save to disk
merged.to_parquet("selected_features_83_w_rare_events.parquet", index=False)
merged.to_csv("selected_features_83_w_rare_events.csv", index=False)
print("Final dataset saved to selected_features_83_w_rare_events")

Shock tag applied using vix_index
Final dataset saved to selected_features_83_w_rare_events


#### **🗓️ Historic Economic Stress Periods (sed in tagging logic)**
| Period                  | Labelled As              | Description                                  |
|------------------------|--------------------------|----------------------------------------------|
| 2000-03 to 2002-10     | market_stress_flag       | Dot-com bubble burst                        |
| 2007-12 to 2009-06     | market_stress_flag       | Global Financial Crisis                     |
| 2011-08                | market_stress_flag       | U.S. debt downgrade                         |
| 2020-03                | market_stress_flag       | COVID-19 crash                              |
| 2022-01 to 2022-06     | market_stress_flag       | Inflation shock, supply chain crisis        |
| 2023-03                | market_stress_flag       | SVB & regional banking panic                |

#### **Shock Logic:**
If VIX > 40 from any available volatility feature (e.g. `vix_index`, `adj close_^vix`), we label that row with:
- `market_shock_flag = 1`