# DateDiffTransformer
This notebook shows the functionality in the `DateDifferenceTransformer` class. This transformer calculates the difference between 2 date fields in specified units

In [1]:
import datetime
import pandas as pd
import numpy as np

In [2]:
import tubular
from tubular.dates import DateDifferenceTransformer

In [3]:
tubular.__version__

'1.0.0'

## Create and Load datetime data

In [4]:
def create_datetime_data():
    seconds_1 = np.random.randint(0, 59, 10)
    mins_1 = np.random.randint(0, 59, 10)
    hours_1 = np.random.randint(0, 23, 10)
    days_1 = np.random.randint(1, 29, 10)
    months_1 = np.random.randint(1, 13, 10)
    years_1 = np.random.randint(1970, 2000, 10)

    seconds_2 = np.random.randint(0, 59, 10)
    mins_2 = np.random.randint(0, 59, 10)
    hours_2 = np.random.randint(0, 23, 10)
    days_2 = np.random.randint(1, 29, 10)
    months_2 = np.random.randint(1, 13, 10)
    years_2 = np.random.randint(2010, 2020, 10)

    date_1 = [
        datetime.datetime(a, b, c, x, y, z)
        for a, b, c, x, y, z in zip(
            years_1, months_1, days_1, hours_1, mins_1, seconds_1
        )
    ]
    date_2 = [
        datetime.datetime(a, b, c, x, y, z)
        for a, b, c, x, y, z in zip(
            years_2, months_2, days_2, hours_2, mins_2, seconds_2
        )
    ]

    data = pd.DataFrame({"date_of_birth": date_1, "sale_date": date_2})

    return data

In [5]:
datetime_data = create_datetime_data()

In [6]:
datetime_data

Unnamed: 0,date_of_birth,sale_date
0,1989-08-23 04:22:45,2014-10-21 12:07:34
1,1984-07-07 14:23:31,2010-01-08 00:39:31
2,1982-08-08 20:19:41,2012-10-19 02:10:58
3,1990-12-05 22:14:44,2010-01-19 16:16:44
4,1982-07-02 20:35:35,2012-03-06 02:00:43
5,1983-12-11 17:03:47,2019-10-05 06:49:36
6,1994-12-22 08:22:01,2010-01-12 04:53:51
7,1998-03-16 20:16:38,2014-08-28 07:57:05
8,1997-02-03 07:33:51,2010-06-05 02:38:45
9,1992-03-23 00:23:57,2018-10-07 20:19:37


In [7]:
datetime_data.dtypes

date_of_birth    datetime64[ns]
sale_date        datetime64[ns]
dtype: object

## Usage
The transformer requires 4 arguments:
- `column_lower`: the datetime column that is being subtracted.
- `column_upper`: the datetime column that is subtracted from.
- `new_column_name`: the name of the new age column.
- `units`: the time units: 'D', 'h', 'm' or 's'

### Days

In [8]:
date_difference_transformer = DateDifferenceTransformer(
    column_lower="date_of_birth",
    column_upper="sale_date",
    new_column_name="days",
    units="D",
)

In [9]:
transformed_data_days = date_difference_transformer.transform(datetime_data)

In [10]:
transformed_data_days

Unnamed: 0,date_of_birth,sale_date,days
0,1989-08-23 04:22:45,2014-10-21 12:07:34,9190.322789
1,1984-07-07 14:23:31,2010-01-08 00:39:31,9315.427778
2,1982-08-08 20:19:41,2012-10-19 02:10:58,11029.243947
3,1990-12-05 22:14:44,2010-01-19 16:16:44,6984.751389
4,1982-07-02 20:35:35,2012-03-06 02:00:43,10839.225787
5,1983-12-11 17:03:47,2019-10-05 06:49:36,13081.573484
6,1994-12-22 08:22:01,2010-01-12 04:53:51,5499.85544
7,1998-03-16 20:16:38,2014-08-28 07:57:05,6008.486424
8,1997-02-03 07:33:51,2010-06-05 02:38:45,4869.795069
9,1992-03-23 00:23:57,2018-10-07 20:19:37,9694.830324


### Hours

In [11]:
date_difference_transformer = DateDifferenceTransformer(
    column_lower="date_of_birth",
    column_upper="sale_date",
    new_column_name="hours",
    units="h",
)

In [12]:
transformed_data_hours = date_difference_transformer.transform(transformed_data_days)

In [13]:
transformed_data_hours

Unnamed: 0,date_of_birth,sale_date,days,hours
0,1989-08-23 04:22:45,2014-10-21 12:07:34,9190.322789,220567.746944
1,1984-07-07 14:23:31,2010-01-08 00:39:31,9315.427778,223570.266667
2,1982-08-08 20:19:41,2012-10-19 02:10:58,11029.243947,264701.854722
3,1990-12-05 22:14:44,2010-01-19 16:16:44,6984.751389,167634.033333
4,1982-07-02 20:35:35,2012-03-06 02:00:43,10839.225787,260141.418889
5,1983-12-11 17:03:47,2019-10-05 06:49:36,13081.573484,313957.763611
6,1994-12-22 08:22:01,2010-01-12 04:53:51,5499.85544,131996.530556
7,1998-03-16 20:16:38,2014-08-28 07:57:05,6008.486424,144203.674167
8,1997-02-03 07:33:51,2010-06-05 02:38:45,4869.795069,116875.081667
9,1992-03-23 00:23:57,2018-10-07 20:19:37,9694.830324,232675.927778


### Minutes

In [14]:
date_difference_transformer = DateDifferenceTransformer(
    column_lower="date_of_birth",
    column_upper="sale_date",
    new_column_name="minutes",
    units="m",
)

In [15]:
transformed_data_minutes = date_difference_transformer.transform(transformed_data_hours)

In [16]:
transformed_data_minutes

Unnamed: 0,date_of_birth,sale_date,days,hours,minutes
0,1989-08-23 04:22:45,2014-10-21 12:07:34,9190.322789,220567.746944,13234060.0
1,1984-07-07 14:23:31,2010-01-08 00:39:31,9315.427778,223570.266667,13414220.0
2,1982-08-08 20:19:41,2012-10-19 02:10:58,11029.243947,264701.854722,15882110.0
3,1990-12-05 22:14:44,2010-01-19 16:16:44,6984.751389,167634.033333,10058040.0
4,1982-07-02 20:35:35,2012-03-06 02:00:43,10839.225787,260141.418889,15608490.0
5,1983-12-11 17:03:47,2019-10-05 06:49:36,13081.573484,313957.763611,18837470.0
6,1994-12-22 08:22:01,2010-01-12 04:53:51,5499.85544,131996.530556,7919792.0
7,1998-03-16 20:16:38,2014-08-28 07:57:05,6008.486424,144203.674167,8652220.0
8,1997-02-03 07:33:51,2010-06-05 02:38:45,4869.795069,116875.081667,7012505.0
9,1992-03-23 00:23:57,2018-10-07 20:19:37,9694.830324,232675.927778,13960560.0


### Seconds

In [17]:
date_difference_transformer = DateDifferenceTransformer(
    column_lower="date_of_birth",
    column_upper="sale_date",
    new_column_name="seconds",
    units="s",
)

In [18]:
transformed_data_seconds = date_difference_transformer.transform(
    transformed_data_minutes
)

In [19]:
transformed_data_seconds

Unnamed: 0,date_of_birth,sale_date,days,hours,minutes,seconds
0,1989-08-23 04:22:45,2014-10-21 12:07:34,9190.322789,220567.746944,13234060.0,794043900.0
1,1984-07-07 14:23:31,2010-01-08 00:39:31,9315.427778,223570.266667,13414220.0,804853000.0
2,1982-08-08 20:19:41,2012-10-19 02:10:58,11029.243947,264701.854722,15882110.0,952926700.0
3,1990-12-05 22:14:44,2010-01-19 16:16:44,6984.751389,167634.033333,10058040.0,603482500.0
4,1982-07-02 20:35:35,2012-03-06 02:00:43,10839.225787,260141.418889,15608490.0,936509100.0
5,1983-12-11 17:03:47,2019-10-05 06:49:36,13081.573484,313957.763611,18837470.0,1130248000.0
6,1994-12-22 08:22:01,2010-01-12 04:53:51,5499.85544,131996.530556,7919792.0,475187500.0
7,1998-03-16 20:16:38,2014-08-28 07:57:05,6008.486424,144203.674167,8652220.0,519133200.0
8,1997-02-03 07:33:51,2010-06-05 02:38:45,4869.795069,116875.081667,7012505.0,420750300.0
9,1992-03-23 00:23:57,2018-10-07 20:19:37,9694.830324,232675.927778,13960560.0,837633300.0
