#Introduction to Time Series data

In [None]:
#@title imports
import pandas as pd
import numpy as np

In [None]:
#title 1.Introduction to Time Series
date_rng = pd.date_range(start='2024-01-01', periods=100, freq='D')
df = pd.DataFrame({'date': date_rng, 'value': np.random.randn(100)})
display(df.head(20))
print("Number of data points/rows: ", len(df))

Unnamed: 0,date,value
0,2024-01-01,-0.999547
1,2024-01-02,-0.898925
2,2024-01-03,-0.221074
3,2024-01-04,0.28396
4,2024-01-05,-1.258068
5,2024-01-06,0.374246
6,2024-01-07,-1.547433
7,2024-01-08,-0.127621
8,2024-01-09,1.617508
9,2024-01-10,-1.671548


Number of data points/rows:  100


In [None]:
#@title 2. Working with Dates
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['weekday'] = df['date'].dt.weekday

display(df.head(10))
display(df.tail())

Unnamed: 0,date,value,year,month,day,weekday
0,2024-01-01,-0.999547,2024,1,1,0
1,2024-01-02,-0.898925,2024,1,2,1
2,2024-01-03,-0.221074,2024,1,3,2
3,2024-01-04,0.28396,2024,1,4,3
4,2024-01-05,-1.258068,2024,1,5,4
5,2024-01-06,0.374246,2024,1,6,5
6,2024-01-07,-1.547433,2024,1,7,6
7,2024-01-08,-0.127621,2024,1,8,0
8,2024-01-09,1.617508,2024,1,9,1
9,2024-01-10,-1.671548,2024,1,10,2


Unnamed: 0,date,value,year,month,day,weekday
95,2024-04-05,1.251672,2024,4,5,4
96,2024-04-06,0.883013,2024,4,6,5
97,2024-04-07,-0.449769,2024,4,7,6
98,2024-04-08,-0.119976,2024,4,8,0
99,2024-04-09,-0.087728,2024,4,9,1


In [None]:
#@title 3. Setting Datetime Index
df.set_index('date', inplace=True)
display(df.head())
display(df.tail())

Unnamed: 0_level_0,value,year,month,day,weekday
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-01,-0.999547,2024,1,1,0
2024-01-02,-0.898925,2024,1,2,1
2024-01-03,-0.221074,2024,1,3,2
2024-01-04,0.28396,2024,1,4,3
2024-01-05,-1.258068,2024,1,5,4


Unnamed: 0_level_0,value,year,month,day,weekday
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-04-05,1.251672,2024,4,5,4
2024-04-06,0.883013,2024,4,6,5
2024-04-07,-0.449769,2024,4,7,6
2024-04-08,-0.119976,2024,4,8,0
2024-04-09,-0.087728,2024,4,9,1


In [None]:
#@title 4. Slicing and Filtering Time Series Data

print("Displaying the data for a specific month")
january_data = df.loc['2024-01']  # Filter all data from January 2024
display(january_data.head(31))

print("\n\nDisplaying the data for a specific day")
specific_day_data = df.loc['2024-01-15']  # Get data for a specific day
display(specific_day_data.head())


Displaying the data for a specific month


Unnamed: 0_level_0,value,year,month,day,weekday
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-01,-0.999547,2024,1,1,0
2024-01-02,-0.898925,2024,1,2,1
2024-01-03,-0.221074,2024,1,3,2
2024-01-04,0.28396,2024,1,4,3
2024-01-05,-1.258068,2024,1,5,4
2024-01-06,0.374246,2024,1,6,5
2024-01-07,-1.547433,2024,1,7,6
2024-01-08,-0.127621,2024,1,8,0
2024-01-09,1.617508,2024,1,9,1
2024-01-10,-1.671548,2024,1,10,2




Displaying the data for a specific day


Unnamed: 0,2024-01-15
value,-0.362886
year,2024.0
month,1.0
day,15.0
weekday,0.0


In [None]:
#@title 5. Resampling and Aggregation
print("Number of rows (days): ", len(df))
df_resampled = df.resample('W').mean()  # Resample weekly and take mean
print("Number of rows (weeks): ", len(df_resampled))
display(df_resampled.head())


Number of rows (days):  100
Number of rows (weeks):  15


Unnamed: 0_level_0,value,year,month,day,weekday
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-07,-0.609549,2024.0,1.0,4.0,3.0
2024-01-14,0.378157,2024.0,1.0,11.0,3.0
2024-01-21,-0.489427,2024.0,1.0,18.0,3.0
2024-01-28,-0.499948,2024.0,1.0,25.0,3.0
2024-02-04,0.000679,2024.0,1.571429,14.285714,3.0


In [None]:
#@title 6. Rolling Window Operations
df['rolling_mean'] = df['value'].rolling(window=7).mean()  # 7-day rolling mean
display(df.head())
display(df.head(20))


Unnamed: 0_level_0,value,year,month,day,weekday,rolling_mean
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-01-01,-0.999547,2024,1,1,0,
2024-01-02,-0.898925,2024,1,2,1,
2024-01-03,-0.221074,2024,1,3,2,
2024-01-04,0.28396,2024,1,4,3,
2024-01-05,-1.258068,2024,1,5,4,


Unnamed: 0_level_0,value,year,month,day,weekday,rolling_mean
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-01-01,-0.999547,2024,1,1,0,
2024-01-02,-0.898925,2024,1,2,1,
2024-01-03,-0.221074,2024,1,3,2,
2024-01-04,0.28396,2024,1,4,3,
2024-01-05,-1.258068,2024,1,5,4,
2024-01-06,0.374246,2024,1,6,5,
2024-01-07,-1.547433,2024,1,7,6,-0.609549
2024-01-08,-0.127621,2024,1,8,0,-0.484988
2024-01-09,1.617508,2024,1,9,1,-0.125498
2024-01-10,-1.671548,2024,1,10,2,-0.332708


In [None]:
#@title 7. Shifting and Lagging
df['lagged_value'] = df['value'].shift(1)  # Lag by one time step
df['diff_value'] = df['value'].diff()  # Difference from previous step
display(df.head(10))


Unnamed: 0_level_0,value,year,month,day,weekday,rolling_mean,lagged_value,diff_value
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-01-01,-0.995902,2023,1,1,6,,,
2023-01-02,0.303281,2023,1,2,0,,-0.995902,1.299184
2023-01-03,0.359075,2023,1,3,1,,0.303281,0.055794
2023-01-04,0.844931,2023,1,4,2,,0.359075,0.485856
2023-01-05,-0.078782,2023,1,5,3,,0.844931,-0.923713
2023-01-06,0.785549,2023,1,6,4,,-0.078782,0.864331
2023-01-07,-0.712449,2023,1,7,5,0.072243,0.785549,-1.497999
2023-01-08,-0.73043,2023,1,8,6,0.110168,-0.712449,-0.01798
2023-01-09,-1.503267,2023,1,9,0,-0.14791,-0.73043,-0.772837
2023-01-10,2.391826,2023,1,10,1,0.142483,-1.503267,3.895092
