In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
time = pd.read_csv("./DATA/messages.csv")
date = pd.read_csv("./DATA/orders.csv")

In [3]:
date.head(10)

Unnamed: 0,date,product_id,city_id,orders
0,2019-12-10,5628,25,3
1,2018-08-15,3646,14,157
2,2018-10-23,1859,25,1
3,2019-08-17,7292,25,1
4,2019-01-06,4344,25,3
5,2018-08-23,1811,25,4
6,2018-11-21,1282,26,1
7,2019-03-27,5022,2,41
8,2019-06-29,3699,3,15
9,2018-08-30,4373,11,3


In [4]:
time.head(10)

Unnamed: 0,date,msg
0,2013-12-15 00:50:00,ищу на сегодня мужика 37
1,2014-04-29 23:40:00,ПАРЕНЬ БИ ИЩЕТ ДРУГА СЕЙЧАС!! СМС ММС 0955532826
2,2012-12-30 00:21:00,Днепр.м 43 позн.с д/ж *.о 067.16.34.576
3,2014-11-28 00:31:00,КИЕВ ИЩУ Д/Ж ДО 45 МНЕ СЕЙЧАС СКУЧНО 093 629 9...
4,2013-10-26 23:11:00,Зая я тебя никогда не обижу люблю тебя!) Даше
5,2016-03-08 22:52:00,пар.32прок.дев.на авто по ночн.городу.Днепр.06...
6,2014-02-18 00:23:00,Семейная пара познакомится для отношений
7,2012-11-23 01:10:00,ДЛЯ С.П СНАЧАЛО СМС 066.679.400.8
8,2014-12-23 01:20:00,Дедушки ау вы где? СНЕГУРОЧКИ СКУЧАЮТ. Здесь е...
9,2012-11-03 23:46:00,ищу парня с авто.063 528 90 72


In [5]:
time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    1000 non-null   object
 1   msg     1000 non-null   object
dtypes: object(2)
memory usage: 15.8+ KB


In [6]:
date.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   date        1000 non-null   object
 1   product_id  1000 non-null   int64 
 2   city_id     1000 non-null   int64 
 3   orders      1000 non-null   int64 
dtypes: int64(3), object(1)
memory usage: 31.4+ KB


## Working with Date Data

In [7]:
date["date"] = pd.to_datetime(date["date"])

date.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        1000 non-null   datetime64[ns]
 1   product_id  1000 non-null   int64         
 2   city_id     1000 non-null   int64         
 3   orders      1000 non-null   int64         
dtypes: datetime64[ns](1), int64(3)
memory usage: 31.4 KB


In [8]:
## Extracting year
date["date_year"] = date["date"].dt.year
date.head(10)

Unnamed: 0,date,product_id,city_id,orders,date_year
0,2019-12-10,5628,25,3,2019
1,2018-08-15,3646,14,157,2018
2,2018-10-23,1859,25,1,2018
3,2019-08-17,7292,25,1,2019
4,2019-01-06,4344,25,3,2019
5,2018-08-23,1811,25,4,2018
6,2018-11-21,1282,26,1,2018
7,2019-03-27,5022,2,41,2019
8,2019-06-29,3699,3,15,2019
9,2018-08-30,4373,11,3,2018


In [9]:
## Extractind date month in data
date["date_month"] = date["date"].dt.month
date["data_month_name"] = date["date"].dt.month_name()
date.head(10)

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month,data_month_name
0,2019-12-10,5628,25,3,2019,12,December
1,2018-08-15,3646,14,157,2018,8,August
2,2018-10-23,1859,25,1,2018,10,October
3,2019-08-17,7292,25,1,2019,8,August
4,2019-01-06,4344,25,3,2019,1,January
5,2018-08-23,1811,25,4,2018,8,August
6,2018-11-21,1282,26,1,2018,11,November
7,2019-03-27,5022,2,41,2019,3,March
8,2019-06-29,3699,3,15,2019,6,June
9,2018-08-30,4373,11,3,2018,8,August


In [10]:
## Extracting days
date["date_days"] = date["date"].dt.day
date["date_days_name"] = date["date"].dt.day_name()

date.head(5)

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month,data_month_name,date_days,date_days_name
0,2019-12-10,5628,25,3,2019,12,December,10,Tuesday
1,2018-08-15,3646,14,157,2018,8,August,15,Wednesday
2,2018-10-23,1859,25,1,2018,10,October,23,Tuesday
3,2019-08-17,7292,25,1,2019,8,August,17,Saturday
4,2019-01-06,4344,25,3,2019,1,January,6,Sunday


In [11]:
## Is weekend

date["date_is_weekend"] = np.where(date["date_days_name"].isin(["Sunday", "Saturday"]), 1, 0)

date.sample(10)

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month,data_month_name,date_days,date_days_name,date_is_weekend
948,2018-08-04,5389,24,2,2018,8,August,4,Saturday,1
615,2018-10-19,2846,22,3,2018,10,October,19,Friday,0
669,2018-10-31,1647,4,1,2018,10,October,31,Wednesday,0
208,2019-06-24,3919,2,22,2019,6,June,24,Monday,0
242,2018-11-20,5450,13,1,2018,11,November,20,Tuesday,0
119,2019-08-15,650,25,2,2019,8,August,15,Thursday,0
375,2019-07-14,2002,1,3,2019,7,July,14,Sunday,1
465,2018-09-13,6566,18,8,2018,9,September,13,Thursday,0
383,2019-11-28,820,30,6,2019,11,November,28,Thursday,0
488,2019-10-30,6663,4,1,2019,10,October,30,Wednesday,0


In [13]:
## Extracting week of the year
date["date_week"] = date["date"].dt.weekday

date.head(5)

Unnamed: 0,date,product_id,city_id,orders,date_year,date_month,data_month_name,date_days,date_days_name,date_is_weekend,date_week
0,2019-12-10,5628,25,3,2019,12,December,10,Tuesday,0,1
1,2018-08-15,3646,14,157,2018,8,August,15,Wednesday,0,2
2,2018-10-23,1859,25,1,2018,10,October,23,Tuesday,0,1
3,2019-08-17,7292,25,1,2019,8,August,17,Saturday,1,5
4,2019-01-06,4344,25,3,2019,1,January,6,Sunday,1,6


## Time Data

In [14]:
import datetime

today = datetime.datetime.today()

today

datetime.datetime(2025, 5, 12, 12, 25, 11, 453538)

In [17]:
(today - date["date"]).dt.days[:10]

0    1980
1    2462
2    2393
3    2095
4    2318
5    2454
6    2364
7    2238
8    2144
9    2447
Name: date, dtype: int64

In [19]:
time[:10]

Unnamed: 0,date,msg
0,2013-12-15 00:50:00,ищу на сегодня мужика 37
1,2014-04-29 23:40:00,ПАРЕНЬ БИ ИЩЕТ ДРУГА СЕЙЧАС!! СМС ММС 0955532826
2,2012-12-30 00:21:00,Днепр.м 43 позн.с д/ж *.о 067.16.34.576
3,2014-11-28 00:31:00,КИЕВ ИЩУ Д/Ж ДО 45 МНЕ СЕЙЧАС СКУЧНО 093 629 9...
4,2013-10-26 23:11:00,Зая я тебя никогда не обижу люблю тебя!) Даше
5,2016-03-08 22:52:00,пар.32прок.дев.на авто по ночн.городу.Днепр.06...
6,2014-02-18 00:23:00,Семейная пара познакомится для отношений
7,2012-11-23 01:10:00,ДЛЯ С.П СНАЧАЛО СМС 066.679.400.8
8,2014-12-23 01:20:00,Дедушки ау вы где? СНЕГУРОЧКИ СКУЧАЮТ. Здесь е...
9,2012-11-03 23:46:00,ищу парня с авто.063 528 90 72


In [20]:
time["date"] = pd.to_datetime(time["date"])

time.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    1000 non-null   datetime64[ns]
 1   msg     1000 non-null   object        
dtypes: datetime64[ns](1), object(1)
memory usage: 15.8+ KB


In [21]:
time["hour"] = time["date"].dt.hour
time["minute"] = time["date"].dt.minute
time["second"] = time["date"].dt.second

time.head(10)

Unnamed: 0,date,msg,hour,minute,second
0,2013-12-15 00:50:00,ищу на сегодня мужика 37,0,50,0
1,2014-04-29 23:40:00,ПАРЕНЬ БИ ИЩЕТ ДРУГА СЕЙЧАС!! СМС ММС 0955532826,23,40,0
2,2012-12-30 00:21:00,Днепр.м 43 позн.с д/ж *.о 067.16.34.576,0,21,0
3,2014-11-28 00:31:00,КИЕВ ИЩУ Д/Ж ДО 45 МНЕ СЕЙЧАС СКУЧНО 093 629 9...,0,31,0
4,2013-10-26 23:11:00,Зая я тебя никогда не обижу люблю тебя!) Даше,23,11,0
5,2016-03-08 22:52:00,пар.32прок.дев.на авто по ночн.городу.Днепр.06...,22,52,0
6,2014-02-18 00:23:00,Семейная пара познакомится для отношений,0,23,0
7,2012-11-23 01:10:00,ДЛЯ С.П СНАЧАЛО СМС 066.679.400.8,1,10,0
8,2014-12-23 01:20:00,Дедушки ау вы где? СНЕГУРОЧКИ СКУЧАЮТ. Здесь е...,1,20,0
9,2012-11-03 23:46:00,ищу парня с авто.063 528 90 72,23,46,0
