In [1]:
import pandas as pd
import numpy as np

In [2]:
np.random.seed(42)

In [3]:
n_orders = 20000

date = pd.DataFrame({
    "date": pd.to_datetime("2024-01-01") +
            pd.to_timedelta(np.random.randint(0, 365, n_orders), unit="D"),

    "product_id": np.random.randint(100, 120, n_orders),   # 20 products
    "city_id": np.random.randint(1, 15, n_orders),         # 14 cities

    "orders": np.random.poisson(lam=20, size=n_orders) + 1
})

date.head()


Unnamed: 0,date,product_id,city_id,orders
0,2024-04-12,102,14,24
1,2024-12-14,117,14,24
2,2024-09-27,107,2,27
3,2024-04-16,100,9,28
4,2024-03-12,113,14,26


In [4]:
n_messages = 10000

message_templates = [
    "Price drop announced",
    "Promotion launched",
    "Delivery delay reported",
    "Inventory restocked",
    "High demand alert",
    "Service issue resolved",
    "Flash sale started",
    "Regional outage notice"
]

time = pd.DataFrame({
    "dtime": pd.to_datetime("2024-01-01") +
             pd.to_timedelta(np.random.randint(0, 365*24, n_messages), unit="h"),

    "msg": np.random.choice(message_templates, n_messages)
})

time.head()


Unnamed: 0,dtime,msg
0,2024-05-17 07:00:00,Price drop announced
1,2024-09-21 22:00:00,Price drop announced
2,2024-02-20 14:00:00,Flash sale started
3,2024-05-07 23:00:00,Delivery delay reported
4,2024-02-24 06:00:00,Regional outage notice


In [5]:
date.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        20000 non-null  datetime64[ns]
 1   product_id  20000 non-null  int64         
 2   city_id     20000 non-null  int64         
 3   orders      20000 non-null  int64         
dtypes: datetime64[ns](1), int64(3)
memory usage: 625.1 KB


In [6]:
time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   dtime   10000 non-null  datetime64[ns]
 1   msg     10000 non-null  object        
dtypes: datetime64[ns](1), object(1)
memory usage: 156.4+ KB


#**extract year**

In [7]:
date['year'] = date['date'].dt.year
date.head()

Unnamed: 0,date,product_id,city_id,orders,year
0,2024-04-12,102,14,24,2024
1,2024-12-14,117,14,24,2024
2,2024-09-27,107,2,27,2024
3,2024-04-16,100,9,28,2024
4,2024-03-12,113,14,26,2024


#**extract month name**

In [9]:
date['month_name'] = date['date'].dt.month_name()
date.head()

Unnamed: 0,date,product_id,city_id,orders,year,month_name
0,2024-04-12,102,14,24,2024,April
1,2024-12-14,117,14,24,2024,December
2,2024-09-27,107,2,27,2024,September
3,2024-04-16,100,9,28,2024,April
4,2024-03-12,113,14,26,2024,March


# **day of week**

In [11]:
date['weekday'] = date['date'].dt.dayofweek
date.head()

Unnamed: 0,date,product_id,city_id,orders,year,month_name,weekday
0,2024-04-12,102,14,24,2024,April,4
1,2024-12-14,117,14,24,2024,December,5
2,2024-09-27,107,2,27,2024,September,4
3,2024-04-16,100,9,28,2024,April,1
4,2024-03-12,113,14,26,2024,March,1


In [12]:
date['weekday_name'] = date['date'].dt.day_name(
)
date.head()

Unnamed: 0,date,product_id,city_id,orders,year,month_name,weekday,weekday_name
0,2024-04-12,102,14,24,2024,April,4,Friday
1,2024-12-14,117,14,24,2024,December,5,Saturday
2,2024-09-27,107,2,27,2024,September,4,Friday
3,2024-04-16,100,9,28,2024,April,1,Tuesday
4,2024-03-12,113,14,26,2024,March,1,Tuesday
