# Load and preprocess time series data files

In [1]:
path = '../../data/FRED/CORESTICKM159SFRBATL.csv'

## Load data from CSV

### Having datetime column

In [2]:
import pandas as pd
df = pd.read_csv(path)

df

Unnamed: 0,observation_date,CORESTICKM159SFRBATL
0,1968-01-01,3.651861
1,1968-02-01,3.673819
...,...,...
685,2025-02-01,3.516515
686,2025-03-01,3.257815


In [3]:
df.observation_date

0      1968-01-01
1      1968-02-01
          ...    
685    2025-02-01
686    2025-03-01
Name: observation_date, Length: 687, dtype: object

In [4]:
df.observation_date.dt.to_period('Q')

AttributeError: Can only use .dt accessor with datetimelike values

In [5]:
df.observation_date = pd.to_datetime(df.observation_date)

df.observation_date

0     1968-01-01
1     1968-02-01
         ...    
685   2025-02-01
686   2025-03-01
Name: observation_date, Length: 687, dtype: datetime64[ns]

In [6]:
df.observation_date.dt.to_period('Q')

0      1968Q1
1      1968Q1
        ...  
685    2025Q1
686    2025Q1
Name: observation_date, Length: 687, dtype: period[Q-DEC]

### Having no datetime column

In [9]:
path = '../../data/FRED/CORESTICKM159SFRBATL_raw.csv'

In [13]:
df = pd.read_csv(path)

df

Unnamed: 0,year,month,day,CORESTICKM159SFRBATL
0,1968,1,1,3.651861
1,1968,2,1,3.673819
...,...,...,...,...
685,2025,2,1,3.516515
686,2025,3,1,3.257815


In [14]:
df['datetime'] = pd.to_datetime(df[['year', 'month', 'day']])

df

Unnamed: 0,year,month,day,CORESTICKM159SFRBATL,datetime
0,1968,1,1,3.651861,1968-01-01
1,1968,2,1,3.673819,1968-02-01
...,...,...,...,...,...
685,2025,2,1,3.516515,2025-02-01
686,2025,3,1,3.257815,2025-03-01


## Temporal column to index

In [16]:
df.datetime.dt.to_period('Q')

0      1968Q1
1      1968Q1
        ...  
685    2025Q1
686    2025Q1
Name: datetime, Length: 687, dtype: period[Q-DEC]

In [17]:
df = df.set_index('datetime')

df

Unnamed: 0_level_0,year,month,day,CORESTICKM159SFRBATL
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1968-01-01,1968,1,1,3.651861
1968-02-01,1968,2,1,3.673819
...,...,...,...,...
2025-02-01,2025,2,1,3.516515
2025-03-01,2025,3,1,3.257815


In [18]:
df.index.to_period('Q')

PeriodIndex(['1968Q1', '1968Q1', '1968Q1', '1968Q2', '1968Q2', '1968Q2',
             '1968Q3', '1968Q3', '1968Q3', '1968Q4',
             ...
             '2024Q2', '2024Q3', '2024Q3', '2024Q3', '2024Q4', '2024Q4',
             '2024Q4', '2025Q1', '2025Q1', '2025Q1'],
            dtype='period[Q-DEC]', name='datetime', length=687)

## Load data from Excel

In [20]:
path = '../../data/FRED/CORESTICKM159SFRBATL.xlsx'

In [21]:
pd.read_excel(path)

Unnamed: 0,FRED Graph Observations,Unnamed: 1,Unnamed: 2
0,"Federal Reserve Economic Data, Federal Reserve...",,
1,Link: https://fred.stlouisfed.org,,
...,...,...,...
5,,,
6,CORESTICKM159SFRBATL,Sticky Price Consumer Price Index less Food an...,Data Updated: 2025-04-10


In [22]:
pd.read_excel(path, sheet_name=1)

Unnamed: 0,observation_date,CORESTICKM159SFRBATL
0,1968-01-01,3.651861
1,1968-02-01,3.673819
...,...,...
685,2025-02-01,3.516515
686,2025-03-01,3.257815


In [23]:
df = pd.read_excel(path, sheet_name=1)

df

Unnamed: 0,observation_date,CORESTICKM159SFRBATL
0,1968-01-01,3.651861
1,1968-02-01,3.673819
...,...,...
685,2025-02-01,3.516515
686,2025-03-01,3.257815


In [27]:
df.observation_date

0     1968-01-01
1     1968-02-01
         ...    
685   2025-02-01
686   2025-03-01
Name: observation_date, Length: 687, dtype: datetime64[ns]

In [28]:
df = pd.read_excel(path, sheet_name=1, parse_dates=True, index_col=0)

df

Unnamed: 0_level_0,CORESTICKM159SFRBATL
observation_date,Unnamed: 1_level_1
1968-01-01,3.651861
1968-02-01,3.673819
...,...
2025-02-01,3.516515
2025-03-01,3.257815
