In [11]:
from datetime import date, time, datetime, timedelta

import pandas as pd

In [23]:
my_dataframe = pd.DataFrame(
    {
        'year': [2010, 2012, 1990, 1978, 2001],
        'month': [1, 2, 3, 4, 5],
        'day': [5, 10, 15, 10, 25],
        'hour': [1, 11, 13, 19, 23],
        'minute': [8, 16, 24, 32, 40],
        'second': [6, 2, 10, 54, 24]
    }
)

In [24]:
def calc_time(series):
    return time(series.hour, series.minute, series.second)


# this example takes 3 columns to use in a function! Returns a series
my_dataframe[['hour', 'minute', 'second']].apply(calc_time, axis=1)

0    01:08:06
1    11:16:02
2    13:24:10
3    19:32:54
4    23:40:24
dtype: object

In [25]:
def calc_date(series):
    return date(series.year, series.month, series.day)


# same as previous example
my_dataframe[['year', 'month', 'day']].apply(calc_date, axis=1)

0    2010-01-05
1    2012-02-10
2    1990-03-15
3    1978-04-10
4    2001-05-25
dtype: object

In [26]:
def calc_date_and_time(series):
    return calc_date(series), calc_time(series)


# this one returns 2 values!
my_dataframe.apply(calc_date_and_time, axis=1)

0    (2010-01-05, 01:08:06)
1    (2012-02-10, 11:16:02)
2    (1990-03-15, 13:24:10)
3    (1978-04-10, 19:32:54)
4    (2001-05-25, 23:40:24)
dtype: object

In [27]:
# takes the 2 return values from the calc_date_and_time function, 
# and creates 2 brand new columns on the original dataframe
my_dataframe[['date', 'time']] = my_dataframe.apply(calc_date_and_time, axis=1, result_type='expand')
my_dataframe

Unnamed: 0,year,month,day,hour,minute,second,date,time
0,2010,1,5,1,8,6,2010-01-05,01:08:06
1,2012,2,10,11,16,2,2012-02-10,11:16:02
2,1990,3,15,13,24,10,1990-03-15,13:24:10
3,1978,4,10,19,32,54,1978-04-10,19:32:54
4,2001,5,25,23,40,24,2001-05-25,23:40:24


In [28]:
my_dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 8 columns):
year      5 non-null int64
month     5 non-null int64
day       5 non-null int64
hour      5 non-null int64
minute    5 non-null int64
second    5 non-null int64
date      5 non-null object
time      5 non-null object
dtypes: int64(6), object(2)
memory usage: 448.0+ bytes


In [32]:
def calc_datetime(series):
    return pd.datetime.combine(series.date, series.time)


# creates a datetime column from the date and time column
my_dataframe['datetime'] = my_dataframe.apply(calc_datetime, axis=1)

In [33]:
# creates a timedelta difference between each of the rows!
my_dataframe['time_diff'] = my_dataframe.apply(calc_datetime, axis=1).diff()

In [34]:
my_dataframe

Unnamed: 0,year,month,day,hour,minute,second,date,time,datetime,time_diff
0,2010,1,5,1,8,6,2010-01-05,01:08:06,2010-01-05 01:08:06,NaT
1,2012,2,10,11,16,2,2012-02-10,11:16:02,2012-02-10 11:16:02,766 days 10:07:56
2,1990,3,15,13,24,10,1990-03-15,13:24:10,1990-03-15 13:24:10,-8002 days +02:08:08
3,1978,4,10,19,32,54,1978-04-10,19:32:54,1978-04-10 19:32:54,-4357 days +06:08:44
4,2001,5,25,23,40,24,2001-05-25,23:40:24,2001-05-25 23:40:24,8446 days 04:07:30


In [35]:
my_dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 10 columns):
year         5 non-null int64
month        5 non-null int64
day          5 non-null int64
hour         5 non-null int64
minute       5 non-null int64
second       5 non-null int64
date         5 non-null object
time         5 non-null object
datetime     5 non-null datetime64[ns]
time_diff    4 non-null timedelta64[ns]
dtypes: datetime64[ns](1), int64(6), object(2), timedelta64[ns](1)
memory usage: 528.0+ bytes
