# Time Series methods with Pandas in Python

In [1]:
import pandas as pd
import numpy as np

# Create a simple time series
date_range = pd.date_range(start="2023-01-01", periods=10, freq="D")
data = {"value": np.arange(10)}
df = pd.DataFrame(data, index=date_range)

# Shift data by one day to create a lagged feature
df["lagged_value"] = df["value"].shift(1)

df.head()

Unnamed: 0,value,lagged_value
2023-01-01,0,
2023-01-02,1,0.0
2023-01-03,2,1.0
2023-01-04,3,2.0
2023-01-05,4,3.0


In [2]:
# Compute a 3-day rolling mean
df["rolling_mean"] = df["value"].rolling(window=3).mean()

df.head()

Unnamed: 0,value,lagged_value,rolling_mean
2023-01-01,0,,
2023-01-02,1,0.0,
2023-01-03,2,1.0,1.0
2023-01-04,3,2.0,2.0
2023-01-05,4,3.0,3.0


In [3]:
# Compute a 3-day rolling standard deviation
df["rolling_std"] = df["value"].rolling(window=3).std()

df.head()

Unnamed: 0,value,lagged_value,rolling_mean,rolling_std
2023-01-01,0,,,
2023-01-02,1,0.0,,
2023-01-03,2,1.0,1.0,1.0
2023-01-04,3,2.0,2.0,1.0
2023-01-05,4,3.0,3.0,1.0


In [4]:
# Resample daily data to weekly sums
weekly = df.resample("W").sum()
weekly.head()

Unnamed: 0,value,lagged_value,rolling_mean,rolling_std
2023-01-01,0,0.0,0.0,0.0
2023-01-08,28,21.0,21.0,6.0
2023-01-15,17,15.0,15.0,2.0


In [5]:
# Resample weekly data to daily, filling missing values with interpolation
upsampled = weekly.resample("D").interpolate("linear")
upsampled.head()

Unnamed: 0,value,lagged_value,rolling_mean,rolling_std
2023-01-01,0.0,0.0,0.0,0.0
2023-01-02,4.0,3.0,3.0,0.857143
2023-01-03,8.0,6.0,6.0,1.714286
2023-01-04,12.0,9.0,9.0,2.571429
2023-01-05,16.0,12.0,12.0,3.428571


In [6]:
# Simulate missing data
df.loc["2023-01-05", "value"] = np.nan

# Fill missing values with the last available value
df["forward_fill"] = df["value"].fillna(method="ffill")

df.head()

Unnamed: 0,value,lagged_value,rolling_mean,rolling_std,forward_fill
2023-01-01,0.0,,,,0.0
2023-01-02,1.0,0.0,,,1.0
2023-01-03,2.0,1.0,1.0,1.0,2.0
2023-01-04,3.0,2.0,2.0,1.0,3.0
2023-01-05,,3.0,3.0,1.0,3.0


In [7]:
# Fill missing values with the mean of the column
df["mean_fill"] = df["value"].fillna(df["value"].mean())

df.head()

Unnamed: 0,value,lagged_value,rolling_mean,rolling_std,forward_fill,mean_fill
2023-01-01,0.0,,,,0.0,0.0
2023-01-02,1.0,0.0,,,1.0,1.0
2023-01-03,2.0,1.0,1.0,1.0,2.0,2.0
2023-01-04,3.0,2.0,2.0,1.0,3.0,3.0
2023-01-05,,3.0,3.0,1.0,3.0,4.555556


In [9]:
# Combine techniques: Shift, resample, and fill missing values
df["weekly_sum"] = df["value"].resample("W").sum()
df["weekly_sum_shifted"] = df["weekly_sum"].shift(1).fillna(0)
df["weekly_avg"] = df["weekly_sum"].rolling(window=2).mean()

df.tail()

Unnamed: 0,value,lagged_value,rolling_mean,rolling_std,forward_fill,mean_fill,weekly_sum,weekly_sum_shifted,weekly_avg
2023-01-06,5.0,4.0,4.0,1.0,5.0,5.0,,0.0,
2023-01-07,6.0,5.0,5.0,1.0,6.0,6.0,,0.0,
2023-01-08,7.0,6.0,6.0,1.0,7.0,7.0,24.0,0.0,
2023-01-09,8.0,7.0,7.0,1.0,8.0,8.0,,24.0,
2023-01-10,9.0,8.0,8.0,1.0,9.0,9.0,,0.0,
