# Series.astype()

In [1]:
import pandas as pd

In [3]:
d = { 'col1':[1,2], 'col2':[3,4] }
df = pd.DataFrame(data=d)
df.dtypes

col1    int64
col2    int64
dtype: object

In [4]:
df.astype('int32').dtypes

col1    int32
col2    int32
dtype: object

In [5]:
df.astype({'col1': 'int32'}).dtypes

col1    int32
col2    int64
dtype: object

In [6]:
ser = pd.Series([1,2], dtype='int32')
ser

0    1
1    2
dtype: int32

In [7]:
ser.astype('int64')

0    1
1    2
dtype: int64

In [8]:
ser.astype('category')

0    1
1    2
dtype: category
Categories (2, int64): [1, 2]

In [11]:
from pandas.api.types import CategoricalDtype

cat_dtype = CategoricalDtype(categories=[2,1], ordered=True)
ser.astype(cat_dtype)

0    1
1    2
dtype: category
Categories (2, int64): [2 < 1]

In [12]:
s1 = pd.Series([1,2])
s2 = s1.astype('int64', copy=False)
s2[0] = 10
s1

0    10
1     2
dtype: int64

In [13]:
ser_date = pd.Series(pd.date_range('20200101', periods=3))
ser_date

0   2020-01-01
1   2020-01-02
2   2020-01-03
dtype: datetime64[ns]

# DataFrame.astype()

In [15]:
d = {'col1': [1,2], 'col2': [3,4]}
df = pd.DataFrame(data=d)
df.dtypes

col1    int64
col2    int64
dtype: object

In [18]:
df.astype('int32').dtypes

col1    int32
col2    int32
dtype: object

In [19]:
df.astype({'col1': 'int32'}).dtypes

col1    int32
col2    int64
dtype: object

# pandas.to_datetime()

### handling various input formats

In [54]:
df = pd.DataFrame({'year': [2015, 2016],
                   'month': [2,3], 
                   'day': [4,5]})
print(df.dtypes, end="\n\n")
print(df)

year     int64
month    int64
day      int64
dtype: object

   year  month  day
0  2015      2    4
1  2016      3    5


In [55]:
pd.to_datetime(df)

0   2015-02-04
1   2016-03-05
dtype: datetime64[ns]

In [56]:
s = pd.Series(['3/11/2000', '3/13/2000', '3/13/2000'] * 1000)
s.head()

0    3/11/2000
1    3/13/2000
2    3/13/2000
3    3/11/2000
4    3/13/2000
dtype: object

In [57]:
%timeit pd.to_datetime(s, infer_datetime_format=True)

705 µs ± 4.54 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [58]:
%timeit pd.to_datetime(s, infer_datetime_format=False)

546 µs ± 4.24 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


### Unix Epoch Time

In [59]:
pd.to_datetime(1490195805, unit='s')

Timestamp('2017-03-22 15:16:45')

In [60]:
pd.to_datetime(1490195805433502912, unit='ns')

Timestamp('2017-03-22 15:16:45.433502912')

### Non-Unix Epoch Time

In [61]:
pd.to_datetime([1,2,3], unit='D', origin=pd.Timestamp('1960-01-01'))

DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], dtype='datetime64[ns]', freq=None)

### Non-Covertible Date/Times

In [62]:
pd.to_datetime('13000101', format='%Y%m%d', errors='ignore')

datetime.datetime(1300, 1, 1, 0, 0)

In [63]:
pd.to_datetime('13000101', format='%Y%m%d', errors='coerce')

NaT

### Timezones and Time offsets

In [64]:
pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00:15'])

DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'], dtype='datetime64[ns]', freq=None)

In [65]:
pd.to_datetime(['2018-10-26 12:00 -0500', '2018-10-26 13:00:15 -0500'])

DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:15-05:00'], dtype='datetime64[ns, pytz.FixedOffset(-300)]', freq=None)

In [67]:
pd.to_datetime(['2020-10-25 02:00 +0200', '2020-10-25 04:00 +0100'])

Index([2020-10-25 02:00:00+02:00, 2020-10-25 04:00:00+01:00], dtype='object')

In [68]:
from datetime import datetime

pd.to_datetime(["2020-01-01 01:00 -01:00", datetime(2020, 1, 1, 3, 0)])

DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'], dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None)

In [69]:
pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00'], utc=True)

DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 13:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None)

In [71]:
pd.to_datetime(['2018-10-26 12:00 -0530', '2018-10-26 12:00 -0500'], utc=True)

DatetimeIndex(['2018-10-26 17:30:00+00:00', '2018-10-26 17:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None)

In [73]:
from datetime import datetime, timezone, timedelta

pd.to_datetime(['2018-10-26 12:00', '2018-10-26 12:00 -0530',
                datetime(2020, 1, 1, 18),
                datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1))),], 
               utc=True)

DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 17:30:00+00:00',
               '2020-01-01 18:00:00+00:00', '2020-01-01 19:00:00+00:00'],
              dtype='datetime64[ns, UTC]', freq=None)