In [1]:
# Extract features like year, month, day, and weekday from a datetime column to add temporal information.

import pandas as pd

# Example dataset with a date column
data = {'Date': pd.to_datetime(['2022-01-01', '2022-02-15', '2022-03-10', '2022-04-05', '2022-05-20'])}
df = pd.DataFrame(data)

# Extracting year, month, day, and weekday features
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day
df['Weekday'] = df['Date'].dt.weekday  # Monday=0, Sunday=6

print("Data with Extracted Date Features:\n", df)

Data with Extracted Date Features:
         Date  Year  Month  Day  Weekday
0 2022-01-01  2022      1    1        5
1 2022-02-15  2022      2   15        1
2 2022-03-10  2022      3   10        3
3 2022-04-05  2022      4    5        1
4 2022-05-20  2022      5   20        4


In [2]:
# Create lag features by shifting data values to reflect past observations, which is useful for modeling time series dependencies.

# Example time series data
data = {'Date': pd.date_range(start='2022-01-01', periods=5, freq='D'), 'Value': [100, 120, 130, 125, 140]}
df = pd.DataFrame(data)

# Set Date as the index (common in time series data)
df.set_index('Date', inplace=True)

# Create lag features
df['Value_Lag1'] = df['Value'].shift(1)  # 1-day lag
df['Value_Lag2'] = df['Value'].shift(2)  # 2-day lag

print("Data with Lag Features:\n", df)


Data with Lag Features:
             Value  Value_Lag1  Value_Lag2
Date                                     
2022-01-01    100         NaN         NaN
2022-01-02    120       100.0         NaN
2022-01-03    130       120.0       100.0
2022-01-04    125       130.0       120.0
2022-01-05    140       125.0       130.0


In [3]:
# Create rolling window features like moving averages or rolling sums to capture trends over time.

# Example dataset with time series values
data = {'Date': pd.date_range(start='2022-01-01', periods=10, freq='D'), 'Sales': [100, 110, 105, 120, 130, 125, 135, 140, 145, 150]}
df = pd.DataFrame(data)

# Set Date as the index
df.set_index('Date', inplace=True)

# Calculate rolling average (window of 3 days)
df['Sales_MA3'] = df['Sales'].rolling(window=3).mean()

print("Data with Rolling Average Feature:\n", df)


Data with Rolling Average Feature:
             Sales   Sales_MA3
Date                         
2022-01-01    100         NaN
2022-01-02    110         NaN
2022-01-03    105  105.000000
2022-01-04    120  111.666667
2022-01-05    130  118.333333
2022-01-06    125  125.000000
2022-01-07    135  130.000000
2022-01-08    140  133.333333
2022-01-09    145  140.000000
2022-01-10    150  145.000000


In [4]:
# Expanding window features calculate cumulative statistics over time, which can highlight longer-term trends.

# Example data with cumulative sales values
data = {'Date': pd.date_range(start='2022-01-01', periods=10, freq='D'), 'Sales': [100, 110, 105, 120, 130, 125, 135, 140, 145, 150]}
df = pd.DataFrame(data)

# Set Date as the index
df.set_index('Date', inplace=True)

# Calculate expanding cumulative sum and mean
df['Sales_CumSum'] = df['Sales'].expanding().sum()
df['Sales_CumMean'] = df['Sales'].expanding().mean()

print("Data with Expanding Window Features:\n", df)


Data with Expanding Window Features:
             Sales  Sales_CumSum  Sales_CumMean
Date                                          
2022-01-01    100         100.0     100.000000
2022-01-02    110         210.0     105.000000
2022-01-03    105         315.0     105.000000
2022-01-04    120         435.0     108.750000
2022-01-05    130         565.0     113.000000
2022-01-06    125         690.0     115.000000
2022-01-07    135         825.0     117.857143
2022-01-08    140         965.0     120.625000
2022-01-09    145        1110.0     123.333333
2022-01-10    150        1260.0     126.000000


In [5]:
# Encode cyclical features (e.g., month, day of week) using sine and cosine transformations to preserve periodicity.

# Example with months represented as cyclical data
df['Month_Sin'] = np.sin(2 * np.pi * df['Month'] / 12)
df['Month_Cos'] = np.cos(2 * np.pi * df['Month'] / 12)

print("Data with Cyclical Month Features:\n", df[['Month', 'Month_Sin', 'Month_Cos']])


NameError: name 'np' is not defined