# Load and preprocess time series data files

In [1]:
path = '../../data/FRED/CORESTICKM159SFRBATL.csv'

## Load data from CSV

### Having datetime column

In [2]:
import pandas as pd
df = pd.read_csv(path)

df

Unnamed: 0,observation_date,CORESTICKM159SFRBATL
0,1968-01-01,3.651861
1,1968-02-01,3.673819
...,...,...
685,2025-02-01,3.516515
686,2025-03-01,3.257815


In [3]:
df.observation_date

0      1968-01-01
1      1968-02-01
          ...    
685    2025-02-01
686    2025-03-01
Name: observation_date, Length: 687, dtype: object

In [4]:
df.observation_date.dt.to_period('Q')

AttributeError: Can only use .dt accessor with datetimelike values

In [5]:
df.observation_date = pd.to_datetime(df.observation_date)

df.observation_date

0     1968-01-01
1     1968-02-01
         ...    
685   2025-02-01
686   2025-03-01
Name: observation_date, Length: 687, dtype: datetime64[ns]

In [6]:
df.observation_date.dt.to_period('Q')

0      1968Q1
1      1968Q1
        ...  
685    2025Q1
686    2025Q1
Name: observation_date, Length: 687, dtype: period[Q-DEC]

In [11]:
df = pd.read_csv(path, parse_dates=['observation_date'])

df

Unnamed: 0,observation_date,CORESTICKM159SFRBATL
0,1968-01-01,3.651861
1,1968-02-01,3.673819
...,...,...
685,2025-02-01,3.516515
686,2025-03-01,3.257815


In [12]:
df.observation_date

0     1968-01-01
1     1968-02-01
         ...    
685   2025-02-01
686   2025-03-01
Name: observation_date, Length: 687, dtype: datetime64[ns]

### Having no datetime column

In [12]:
path = '../../data/EIA/demand_cal_historical_datetime-split.csv'

In [13]:
df = pd.read_csv(path)

df

Unnamed: 0,year,month,day,hour,value
0,2019,1,1,0,28487
1,2019,1,1,1,30507
...,...,...,...,...,...
55090,2025,4,14,10,24600
55091,2025,4,14,11,22598


In [14]:
df['datetime'] = pd.to_datetime(df[['year', 'month', 'day', 'hour']])

df

Unnamed: 0,year,month,day,hour,value,datetime
0,2019,1,1,0,28487,2019-01-01 00:00:00
1,2019,1,1,1,30507,2019-01-01 01:00:00
...,...,...,...,...,...,...
55090,2025,4,14,10,24600,2025-04-14 10:00:00
55091,2025,4,14,11,22598,2025-04-14 11:00:00


## Temporal column to index

In [15]:
df.datetime.dt.to_period('Q')

0        2019Q1
1        2019Q1
          ...  
55090    2025Q2
55091    2025Q2
Name: datetime, Length: 55092, dtype: period[Q-DEC]

In [None]:
df_idx = df.set_index('datetime')

df_idx

Unnamed: 0_level_0,year,month,day,hour,value
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-01-01 00:00:00,2019,1,1,0,28487
2019-01-01 01:00:00,2019,1,1,1,30507
...,...,...,...,...,...
2025-04-14 10:00:00,2025,4,14,10,24600
2025-04-14 11:00:00,2025,4,14,11,22598


In [None]:
df_idx.index.to_period('Q')

PeriodIndex(['2019Q1', '2019Q1', '2019Q1', '2019Q1', '2019Q1', '2019Q1',
             '2019Q1', '2019Q1', '2019Q1', '2019Q1',
             ...
             '2025Q2', '2025Q2', '2025Q2', '2025Q2', '2025Q2', '2025Q2',
             '2025Q2', '2025Q2', '2025Q2', '2025Q2'],
            dtype='period[Q-DEC]', name='datetime', length=55092)

In [1]:
df_idx.resample('Q').sum().plot.line()

NameError: name 'df_idx' is not defined

In [None]:
df_idx.loc[:'2024'].resample('Q').sum().plot.line()

NameError: name 'df_idx' is not defined

## Load data from Excel

In [13]:
path = '../../data/FRED/CORESTICKM159SFRBATL.xlsx'

In [20]:
pd.read_excel(path)

Unnamed: 0,FRED Graph Observations,Unnamed: 1,Unnamed: 2
0,"Federal Reserve Economic Data, Federal Reserve...",,
1,Link: https://fred.stlouisfed.org,,
2,Help: https://fredhelp.stlouisfed.org,,
3,This data may be copyrighted. Please refer to ...,,
4,File Created: 2025-04-14 6:07 am CDT,,
5,,,
6,CORESTICKM159SFRBATL,Sticky Price Consumer Price Index less Food an...,Data Updated: 2025-04-10


In [16]:
pd.read_excel(path, sheet_name=1)

Unnamed: 0,observation_date,CORESTICKM159SFRBATL
0,1968-01-01,3.651861
1,1968-02-01,3.673819
...,...,...
685,2025-02-01,3.516515
686,2025-03-01,3.257815


In [17]:
df = pd.read_excel(path, sheet_name=1)

df

Unnamed: 0,observation_date,CORESTICKM159SFRBATL
0,1968-01-01,3.651861
1,1968-02-01,3.673819
...,...,...
685,2025-02-01,3.516515
686,2025-03-01,3.257815


In [18]:
df.observation_date

0     1968-01-01
1     1968-02-01
         ...    
685   2025-02-01
686   2025-03-01
Name: observation_date, Length: 687, dtype: datetime64[ns]

In [19]:
df = pd.read_excel(path, sheet_name=1, parse_dates=True, index_col=0)

df

Unnamed: 0_level_0,CORESTICKM159SFRBATL
observation_date,Unnamed: 1_level_1
1968-01-01,3.651861
1968-02-01,3.673819
...,...
2025-02-01,3.516515
2025-03-01,3.257815
