# Join and preprocess multiple time series data

## Load data

In [1]:
import pandas as pd

path = '../../data/FRED/CORESTICKM159SFRBATL.csv'
df1 = pd.read_csv(path, index_col=0, parse_dates=True)

path = '../../data/FRED/UNRATE.csv'
df2 = pd.read_csv(path, index_col=0, parse_dates=True)

path = '../../data/FRED/FEDFUNDS.csv'
df3 = pd.read_csv(path, index_col=0, parse_dates=True)

path = '../../data/FRED/T10YIE.csv'
df4 = pd.read_csv(path, index_col=0, parse_dates=True)

## Join data

### Inner join

In [2]:
df = pd.concat([df1, df2, df3, df4], axis=1, join='inner')
df

Unnamed: 0_level_0,CORESTICKM159SFRBATL,UNRATE,FEDFUNDS,T10YIE
observation_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-05-01,2.000025,13.2,0.05,1.06
2020-06-01,1.998261,11.0,0.08,1.18
...,...,...,...,...
2024-11-01,3.876177,4.2,4.64,2.33
2025-01-01,3.625276,4.0,4.33,


In [3]:
import plotly.io as pio
pio.templates.default = "plotly_dark"

pd.options.plotting.backend = "plotly"

In [4]:
fig = df.plot(title="FRED Data", width=1000, height=500)
fig.update_layout(hovermode="x unified")

In [5]:
df1.loc['2025']

Unnamed: 0_level_0,CORESTICKM159SFRBATL
observation_date,Unnamed: 1_level_1
2025-01-01,3.625276
2025-02-01,3.516515
2025-03-01,3.257815


In [6]:
df2.loc['2025']

Unnamed: 0_level_0,UNRATE
observation_date,Unnamed: 1_level_1
2025-01-01,4.0
2025-02-01,4.1
2025-03-01,4.2


In [7]:
df3.loc['2025']

Unnamed: 0_level_0,FEDFUNDS
observation_date,Unnamed: 1_level_1
2025-01-01,4.33
2025-02-01,4.33
2025-03-01,4.33


In [8]:
df4.loc['2025'].style.format(precision=2)

Unnamed: 0_level_0,T10YIE
observation_date,Unnamed: 1_level_1
2025-01-01 00:00:00,
2025-01-02 00:00:00,2.34
2025-01-03 00:00:00,2.34
2025-01-06 00:00:00,2.34
2025-01-07 00:00:00,2.38
2025-01-08 00:00:00,2.4
2025-01-09 00:00:00,2.39
2025-01-10 00:00:00,2.43
2025-01-13 00:00:00,2.45
2025-01-14 00:00:00,2.44


### Outer join

In [9]:
df = pd.concat([df1, df2, df3, df4], axis=1, join='outer', sort=True)
df

Unnamed: 0_level_0,CORESTICKM159SFRBATL,UNRATE,FEDFUNDS,T10YIE
observation_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1948-01-01,,3.4,,
1948-02-01,,3.8,,
...,...,...,...,...
2025-04-10,,,,2.19
2025-04-11,,,,2.20


In [10]:
df.loc['2025'].style.format(precision=2)

Unnamed: 0_level_0,CORESTICKM159SFRBATL,UNRATE,FEDFUNDS,T10YIE
observation_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-01-01 00:00:00,3.63,4.0,4.33,
2025-01-02 00:00:00,,,,2.34
2025-01-03 00:00:00,,,,2.34
2025-01-06 00:00:00,,,,2.34
2025-01-07 00:00:00,,,,2.38
2025-01-08 00:00:00,,,,2.4
2025-01-09 00:00:00,,,,2.39
2025-01-10 00:00:00,,,,2.43
2025-01-13 00:00:00,,,,2.45
2025-01-14 00:00:00,,,,2.44


In [11]:
fig = df.plot(title="FRED Data", width=1000, height=500)
fig.update_layout(hovermode="x unified")

## Interpolate missing values

In [12]:
df = df.interpolate(method='linear')
df

Unnamed: 0_level_0,CORESTICKM159SFRBATL,UNRATE,FEDFUNDS,T10YIE
observation_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1948-01-01,,3.4,,
1948-02-01,,3.8,,
...,...,...,...,...
2025-04-10,3.257815,4.2,4.33,2.19
2025-04-11,3.257815,4.2,4.33,2.20


In [13]:
fig = df.plot(title="FRED Data", width=1000, height=500)
fig.update_layout(hovermode="x unified")

## Export data

In [14]:
df.to_parquet('../../data/FRED/FRED_joined.parquet')