In [3]:
import pandas as pd
import numpy as np
import requests
from datetime import datetime
from time import sleep
from sklearn.preprocessing import StandardScaler

### API Key and Info

In [4]:
FRED_API_KEY = "5bcb629a19526b04c40ac4dc78c8bb0b"
FRED_SERIES = {
    "UNRATE": "Unemployment Rate",
    "CIVPART": "Labor Force Participation Rate",
    "LNS12300060": "Employment-Population (Men)",
    "LNS12300002": "Employment-Population (Women)",
    "U6RATE": "U-6 Unemployment Rate (Underemployment)",
}

In [5]:
def fetch_fred_series(series_id, start_date="2000-01-01", end_date="2024-12-01"):
    url = (
        f"https://api.stlouisfed.org/fred/series/observations?"
        f"series_id={series_id}&api_key={FRED_API_KEY}&file_type=json"
        f"&observation_start={start_date}&observation_end={end_date}"
    )
    response = requests.get(url)
    response.raise_for_status()
    data = response.json()["observations"]
    df = pd.DataFrame(data)[["date", "value"]]
    df["date"] = pd.to_datetime(df["date"])
    df["value"] = pd.to_numeric(df["value"], errors="coerce")
    df.set_index("date", inplace=True)
    df.rename(columns={"value": series_id}, inplace=True)
    return df


### Fetch Data

In [6]:
dfs = []
for series_id in FRED_SERIES:
    print(f"Fetching {FRED_SERIES[series_id]}...")
    df = fetch_fred_series(series_id)
    dfs.append(df)
    sleep(1)  #Rate limit issue

merged = pd.concat(dfs, axis=1)

merged.dropna(inplace=True)

merged.columns = [FRED_SERIES[col] for col in merged.columns]

Fetching Unemployment Rate...
Fetching Labor Force Participation Rate...
Fetching Employment-Population (Men)...
Fetching Employment-Population (Women)...
Fetching U-6 Unemployment Rate (Underemployment)...


### Store as csv

In [7]:
merged.to_csv("data/multivariate_unemployment_LSTNet.csv")
#save in data folder


In [8]:
df = pd.read_csv("data/multivariate_unemployment_LSTNet.csv")
df.head()

Unnamed: 0,date,Unemployment Rate,Labor Force Participation Rate,Employment-Population (Men),Employment-Population (Women),U-6 Unemployment Rate (Underemployment)
0,2000-01-01,4.0,67.3,81.8,57.6,7.0
1,2000-02-01,4.1,67.3,81.8,57.5,7.1
2,2000-03-01,4.0,67.3,81.7,57.5,7.1
3,2000-04-01,3.8,67.3,81.9,58.0,6.9
4,2000-05-01,4.0,67.1,81.5,57.5,7.1


### Date and data size

In [9]:
print("first date: ", df.date.min())
print("last date: ", df.date.max())
print("data shape: ", df.shape)

first date:  2000-01-01
last date:  2024-12-01
data shape:  (300, 6)


### Scale Data

In [15]:
X_only = df.select_dtypes(include='number')
print("X-only columns", X_only.columns)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_only)
print("X_scaled shape: ", X_scaled.shape)
X_scaled 

X-only columns Index(['Unemployment Rate', 'Labor Force Participation Rate',
       'Employment-Population (Men)', 'Employment-Population (Women)',
       'U-6 Unemployment Rate (Underemployment)'],
      dtype='object')
X_scaled shape:  (300, 5)


array([[-0.86622494,  1.72280372,  1.67486276,  1.6713414 , -1.0186159 ],
       [-0.81514032,  1.72280372,  1.67486276,  1.60973648, -0.9878482 ],
       [-0.86622494,  1.72280372,  1.62606614,  1.60973648, -0.9878482 ],
       ...,
       [-0.81514032, -0.98612547,  1.08930328,  0.06961356, -0.80324202],
       [-0.7640557 , -0.98612547,  0.99171004,  0.00800864, -0.80324202],
       [-0.81514032, -0.98612547,  1.04050666,  0.06961356, -0.86477741]])

### Ready for data to be sent to LSTNet