# How to Remove Timezone from a DateTime Column in Pandas

In [45]:
import pandas as pd
import datetime
import pytz

dates = ['2021-08-01', '2021-08-02', '2021-08-03']

timestamps_tz = [
    datetime.datetime(2021, 8, 1, 12, 30, 41, 775854, tzinfo=pytz.timezone('US/Pacific')),
    datetime.datetime(2021, 8, 2, 12, 31, 12, 432523, tzinfo=pytz.timezone('US/Pacific')),
    datetime.datetime(2021, 8, 3, 12, 29, 59, 123512, tzinfo=pytz.timezone('US/Pacific')),    
]

timestamps = [
    datetime.datetime(2021, 8, 1, 12, 30, 41, 775854),
    datetime.datetime(2021, 8, 2, 12, 31, 12, 432523),
    datetime.datetime(2021, 8, 3, 12, 29, 59, 123512),    
]

df = pd.DataFrame({'start_date': dates, 'time': timestamps, 'time_tz': timestamps_tz})
df


Unnamed: 0,start_date,time,time_tz
0,2021-08-01,2021-08-01 12:30:41.775854,2021-08-01 13:23:41.775854-07:00
1,2021-08-02,2021-08-02 12:31:12.432523,2021-08-02 13:24:12.432523-07:00
2,2021-08-03,2021-08-03 12:29:59.123512,2021-08-03 13:22:59.123512-07:00


In [46]:
import pyperclip

pyperclip.copy((df.head().to_html(classes='table table-striped text-center', justify='center', index=False)))


In [26]:
df['time']

0   2021-08-01 12:30:41.775854
1   2021-08-02 12:31:12.432523
2   2021-08-03 12:29:59.123512
Name: time, dtype: datetime64[ns]

In [33]:
import pandas as pd
import traceback

try:
    
    df['time'] > df['time_tz']

except Exception as e:
    traceback.print_exc(limit=1)
    exit(1)

Traceback (most recent call last):
  File "/home/vanx/PycharmProjects/datascientyst/venv/lib/python3.8/site-packages/pandas/core/arrays/datetimelike.py", line 554, in _validate_comparison_value
    self._check_compatible_with(other)
TypeError: Cannot compare tz-naive and tz-aware datetime-like objects.

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/vanx/PycharmProjects/datascientyst/venv/lib/python3.8/site-packages/pandas/core/arrays/datetimelike.py", line 1008, in _cmp_method
    other = self._validate_comparison_value(other)
pandas.core.arrays.datetimelike.InvalidComparison: <DatetimeArray>
['2021-08-01 13:23:41.775854-07:00', '2021-08-02 13:24:12.432523-07:00',
 '2021-08-03 13:22:59.123512-07:00']
Length: 3, dtype: datetime64[ns, US/Pacific]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/tmp/ipykernel_5846/1818200402.py", line 6, in <module>
   

In [32]:
import pandas as pd
import traceback

try:
    
    df['time'] - df['time_tz']

except Exception as e:
    traceback.print_exc(limit=1)
    exit(1)

Traceback (most recent call last):
  File "/tmp/ipykernel_5846/394564695.py", line 6, in <module>
    df['time'] - df['time_tz']
TypeError: DatetimeArray subtraction must have the same timezones or no timezones


In [36]:
import pandas as pd
import traceback

try:
    
    df['time_tz'] > pd.to_datetime(df['start_date'])

except Exception as e:
    traceback.print_exc(limit=1)
    exit(1)

Traceback (most recent call last):
  File "/home/vanx/PycharmProjects/datascientyst/venv/lib/python3.8/site-packages/pandas/core/arrays/datetimelike.py", line 554, in _validate_comparison_value
    self._check_compatible_with(other)
TypeError: Cannot compare tz-naive and tz-aware datetime-like objects

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/vanx/PycharmProjects/datascientyst/venv/lib/python3.8/site-packages/pandas/core/arrays/datetimelike.py", line 1008, in _cmp_method
    other = self._validate_comparison_value(other)
pandas.core.arrays.datetimelike.InvalidComparison: <DatetimeArray>
['2021-08-01 00:00:00', '2021-08-02 00:00:00', '2021-08-03 00:00:00']
Length: 3, dtype: datetime64[ns]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/tmp/ipykernel_5846/929996482.py", line 6, in <module>
    df['time_tz'] > pd.to_datetime(df['start_date'])
Type

In [37]:
df['time'] > pd.to_datetime(df['start_date'])

0    True
1    True
2    True
dtype: bool

In [41]:
df['time_tz'].dt.tz_localize(None)

0   2021-08-01 13:23:41.775854
1   2021-08-02 13:24:12.432523
2   2021-08-03 13:22:59.123512
Name: time_tz, dtype: datetime64[ns]

In [44]:
df['time_tz'].dt.tz_convert('UTC')

0   2021-08-01 20:23:41.775854+00:00
1   2021-08-02 20:24:12.432523+00:00
2   2021-08-03 20:22:59.123512+00:00
Name: time_tz, dtype: datetime64[ns, UTC]