In [1]:
import pandas as pd

In [2]:
# Generate sample datetime data
date_rng = pd.date_range(start='2022-01-01', end='2022-12-31', freq='H')
data = {'datetime': date_rng}
df = pd.DataFrame(data)
df['value'] = range(len(date_rng))

In [3]:
# Example 1: Convert datetime to a different time zone
df['datetime_utc'] = df['datetime'].dt.tz_localize('UTC')
df['datetime_new_york'] = df['datetime_utc'].dt.tz_convert('America/New_York')


In [4]:
# Example 2: Handling missing time zone information
df_missing_tz = df[df.index % 3 == 0]
df_missing_tz['datetime_missing_tz'] = df_missing_tz['datetime'].dt.tz_localize(None)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_missing_tz['datetime_missing_tz'] = df_missing_tz['datetime'].dt.tz_localize(None)


In [5]:
# Example 3: Handling daylight saving time transitions
spring_dst = pd.Timestamp('2022-03-12 01:59:59', tz='US/Eastern')
fall_dst = pd.Timestamp('2022-11-05 01:59:59', tz='US/Eastern')
df['is_dst_spring'] = df['datetime_new_york'] == spring_dst
df['is_dst_fall'] = df['datetime_new_york'] == fall_dst


In [6]:
# Example 4: Convert time zones for entire DataFrame
df['datetime_london'] = df['datetime_utc'].dt.tz_convert('Europe/London')
df['datetime_tokyo'] = df['datetime_utc'].dt.tz_convert('Asia/Tokyo')


In [7]:
# Example 5: Handling daylight saving time transitions using DST rules
eastern = df['datetime_new_york'].dt.tz_localize(None)
spring_dst_rules = eastern.dt.tz_localize('US/Eastern', ambiguous='NaT', nonexistent='NaT')
fall_dst_rules = eastern.dt.tz_localize('US/Eastern', ambiguous='infer', nonexistent='shift_forward')
df['spring_dst_rules'] = spring_dst_rules
df['fall_dst_rules'] = fall_dst_rules


In [8]:
# Example 6: Handling time zones when reading from CSV
df.to_csv('output-contends/timezone_example.csv', index=False)
df_from_csv = pd.read_csv('output-contends/timezone_example.csv', parse_dates=['datetime', 'datetime_utc', 'datetime_new_york'])


In [9]:
# Example 7: Time zone conversion with resampling
df_resampled = df.set_index('datetime_new_york').resample('D')['value'].sum()


In [10]:
# Example 8: Time zone conversion with groupby
df_grouped = df.groupby(df['datetime_new_york'].dt.date)['value'].sum()


In [11]:
# Example 9: Working with time zones in multi-level index
df_multiindex = df.set_index(['datetime_new_york', 'is_dst_spring'])


In [12]:
# Example 10: Time zone conversion with apply
df['hour'] = df['datetime_new_york'].dt.hour
df['hour_utc'] = df['hour'].apply(lambda x: pd.Timestamp(x, tz='UTC'))


In [13]:
# Example 11: Using pytz library for custom time zones
import pytz
df['custom_tz'] = df['datetime'].dt.tz_localize(pytz.timezone('Europe/Berlin'))


NonExistentTimeError: 2022-03-27 02:00:00

In [14]:
# Example 12: Time zone arithmetic
df['time_diff'] = df['datetime_new_york'] - df['datetime_new_york'].shift(1)


In [15]:
# Example 13: Converting time zone strings to time zones
df['timezone'] = df['datetime_new_york'].dt.tz.zone


In [16]:
# Example 14: Handling time zone conversion ambiguity
ambiguous_time = pd.Timestamp('2022-11-06 01:30:00', tz='US/Eastern')
ambiguous_time_dst = ambiguous_time.tz_localize(None)
ambiguous_time_eastern = ambiguous_time_dst.tz_localize('US/Eastern', ambiguous='NaT')


AmbiguousTimeError: Cannot infer dst time from 2022-11-06 01:30:00, try using the 'ambiguous' argument

In [17]:
# Example 15: Handling missing time zone conversion
missing_tz_time = pd.Timestamp('2022-07-01 12:00:00')
missing_tz_time_new_york = missing_tz_time.tz_localize('US/Eastern', nonexistent='shift_forward')


In [18]:
# Example 16: Using datetime accessor with time zones
df['weekday'] = df['datetime_new_york'].dt.day_name()


In [19]:
# Example 17: Time zone conversion with vectorized arithmetic
df['hour_diff'] = df['datetime_new_york'].dt.hour - df['datetime_london'].dt.hour


In [20]:
# Example 18: Handling time zones in multi-level columns
df_multi_col = df.set_index(['datetime_new_york', 'is_dst_spring'])['value'].unstack()


In [21]:
# Example 19: Time zone-aware arithmetic operations
df['time_diff_hours'] = (df['datetime_new_york'] - df['datetime_new_york'].shift(1)).dt.total_seconds() / 3600


In [22]:
# Example 20: Handling time zones with applymap
df_hour_map = df[['datetime_new_york', 'hour']].set_index('datetime_new_york').applymap(lambda x: x + 1)


In [23]:
# Example 21: Handling time zones with join
df_join = df.merge(df_from_csv, on='datetime_new_york')

ValueError: You are trying to merge on datetime64[ns, America/New_York] and object columns. If you wish to proceed you should use pd.concat

In [24]:
# Example 22: Working with time zones and timedelta
df['next_hour'] = df['datetime_new_york'] + pd.Timedelta(hours=1)

In [25]:
# Example 23: Handling time zones with boolean indexing
df_daylight = df[df['datetime_new_york'].dt.dst != pd.Timedelta(0)]


AttributeError: 'DatetimeProperties' object has no attribute 'dst'

In [26]:
# Example 24: Using tz_localize and tz_convert with different time zones
df['datetime_berlin'] = df['datetime'].dt.tz_localize('Europe/Berlin')
df['datetime_utc_berlin'] = df['datetime_berlin'].dt.tz_convert('UTC')


NonExistentTimeError: 2022-03-27 02:00:00

In [27]:
# Example 25: Using date_range with time zones
date_rng_new_york = pd.date_range(start='2022-01-01', end='2022-12-31', freq='D', tz='US/Eastern')


In [28]:
# Display a sample of the DataFrame
print(df.head())

             datetime  value              datetime_utc  \
0 2022-01-01 00:00:00      0 2022-01-01 00:00:00+00:00   
1 2022-01-01 01:00:00      1 2022-01-01 01:00:00+00:00   
2 2022-01-01 02:00:00      2 2022-01-01 02:00:00+00:00   
3 2022-01-01 03:00:00      3 2022-01-01 03:00:00+00:00   
4 2022-01-01 04:00:00      4 2022-01-01 04:00:00+00:00   

          datetime_new_york  is_dst_spring  is_dst_fall  \
0 2021-12-31 19:00:00-05:00          False        False   
1 2021-12-31 20:00:00-05:00          False        False   
2 2021-12-31 21:00:00-05:00          False        False   
3 2021-12-31 22:00:00-05:00          False        False   
4 2021-12-31 23:00:00-05:00          False        False   

            datetime_london            datetime_tokyo  \
0 2022-01-01 00:00:00+00:00 2022-01-01 09:00:00+09:00   
1 2022-01-01 01:00:00+00:00 2022-01-01 10:00:00+09:00   
2 2022-01-01 02:00:00+00:00 2022-01-01 11:00:00+09:00   
3 2022-01-01 03:00:00+00:00 2022-01-01 12:00:00+09:00   
4 2022-01-0

In [29]:
# Example 26: Handling time zones with timedelta
df['time_until_next_hour'] = df['next_hour'] - df['datetime_new_york']


In [30]:
# Example 27: Handling time zones with date offsets
df['next_business_day'] = df['datetime_new_york'] + pd.DateOffset(days=1)


NonExistentTimeError: 2022-03-13 02:00:00

In [31]:
# Example 28: Handling time zones with reindexing
df_reindexed = df.set_index('datetime_new_york').reindex(date_rng_new_york)

In [32]:
# Example 29: Handling time zones with rolling windows
rolling_window = df['value'].rolling('6H', closed='right').sum()


ValueError: window must be an integer 0 or greater

In [33]:
# Example 30: Handling time zones with expanding windows
expanding_window = df['value'].expanding(min_periods=1).sum()


In [34]:
# Example 31: Handling time zones with shifting
df['value_shifted'] = df['value'].shift(1)

In [35]:
# Example 32: Handling time zones with interpolate
df_interpolated = df.set_index('datetime_new_york').interpolate(method='time')


ValueError: Invalid fill method. Expecting pad (ffill) or backfill (bfill). Got time

In [36]:
# Example 33: Handling time zones with resample and aggregation
df_resampled_mean = df.set_index('datetime_new_york').resample('D')['value'].mean()


In [37]:
# Example 34: Handling time zones with resample and custom aggregation
def custom_agg(arr):
    return arr.max() - arr.min()

df_resampled_custom_agg = df.set_index('datetime_new_york').resample('D')['value'].apply(custom_agg)


In [38]:
# Example 35: Handling time zones with resample and rolling aggregation
df_resampled_rolling_sum = df.set_index('datetime_new_york').resample('D')['value'].rolling(window=3, min_periods=1).sum()


IndexError: index 365 is out of bounds for axis 0 with size 365

In [39]:
# Example 36: Handling time zones with time-based slicing
df_slice = df[(df['datetime_new_york'] > '2022-03-15') & (df['datetime_new_york'] < '2022-03-20')]


In [40]:
# Example 37: Handling time zones with between_time
df_between_time = df.set_index('datetime_new_york').between_time('10:00', '16:00')


In [41]:
# Example 38: Handling time zones with at_time
df_at_time = df.set_index('datetime_new_york').at_time('15:30')


In [42]:
# Example 39: Handling time zones with shift
df_shifted = df.set_index('datetime_new_york').shift(freq='2H')


In [43]:
# Example 40: Handling time zones with tshift
df_tshifted = df.set_index('datetime_new_york').tshift(freq='2H')


  df_tshifted = df.set_index('datetime_new_york').tshift(freq='2H')


In [44]:
# Example 41: Handling time zones with time zone-aware rolling windows
rolling_window_tz = df['value'].rolling('6H', closed='right', min_periods=1).sum()


ValueError: window must be an integer 0 or greater

In [45]:
# Example 42: Handling time zones with time zone-aware expanding windows
expanding_window_tz = df['value'].expanding('6H', closed='right', min_periods=1).sum()


TypeError: NDFrame.expanding() got an unexpected keyword argument 'closed'

In [46]:
# Example 43: Handling time zones with time zone-aware diff
df['diff_value'] = df['value'].diff()


In [47]:
# Example 44: Handling time zones with time zone-aware pct_change
df['pct_change_value'] = df['value'].pct_change()


In [48]:
# Example 45: Handling time zones with tz_localize and tz_convert in apply
df['next_hour_utc'] = df['datetime_new_york'].apply(lambda x: x + pd.Timedelta(hours=1)).dt.tz_localize('US/Eastern').dt.tz_convert('UTC')


TypeError: Already tz-aware, use tz_convert to convert.

In [49]:
# Example 46: Handling time zones with rolling correlation
rolling_corr = df['value'].rolling(window=5).corr(df['value_shifted'])


In [50]:
# Example 47: Handling time zones with time zone-aware rolling aggregation
df_rolling_mean_tz = df.set_index('datetime_new_york').rolling('3H', closed='right', min_periods=1)['value'].mean()


In [51]:
# Example 48: Handling time zones with time zone-aware expanding aggregation
df_expanding_sum_tz = df.set_index('datetime_new_york').expanding('3H', min_periods=1)['value'].sum()


TypeError: NDFrame.expanding() got multiple values for argument 'min_periods'

In [52]:
# Example 49: Handling time zones with time zone-aware ewm
df_ewm_tz = df.set_index('datetime_new_york')['value'].ewm(span=3, min_periods=1).mean()


In [53]:
# Example 50: Handling time zones with time zone-aware groupby
df_groupby_mean_tz = df.groupby(df['datetime_new_york'].dt.date)['value'].transform('mean')


In [54]:
print(df.head())

             datetime  value              datetime_utc  \
0 2022-01-01 00:00:00      0 2022-01-01 00:00:00+00:00   
1 2022-01-01 01:00:00      1 2022-01-01 01:00:00+00:00   
2 2022-01-01 02:00:00      2 2022-01-01 02:00:00+00:00   
3 2022-01-01 03:00:00      3 2022-01-01 03:00:00+00:00   
4 2022-01-01 04:00:00      4 2022-01-01 04:00:00+00:00   

          datetime_new_york  is_dst_spring  is_dst_fall  \
0 2021-12-31 19:00:00-05:00          False        False   
1 2021-12-31 20:00:00-05:00          False        False   
2 2021-12-31 21:00:00-05:00          False        False   
3 2021-12-31 22:00:00-05:00          False        False   
4 2021-12-31 23:00:00-05:00          False        False   

            datetime_london            datetime_tokyo  \
0 2022-01-01 00:00:00+00:00 2022-01-01 09:00:00+09:00   
1 2022-01-01 01:00:00+00:00 2022-01-01 10:00:00+09:00   
2 2022-01-01 02:00:00+00:00 2022-01-01 11:00:00+09:00   
3 2022-01-01 03:00:00+00:00 2022-01-01 12:00:00+09:00   
4 2022-01-0

In [55]:
# Example 51: Handling time zones with time zone-aware rolling apply
rolling_apply = df.set_index('datetime_new_york')['value'].rolling(window=5, min_periods=1).apply(lambda x: x.sum() / len(x))


In [56]:
# Example 52: Handling time zones with time zone-aware expanding apply
expanding_apply = df.set_index('datetime_new_york')['value'].expanding(min_periods=1).apply(lambda x: x.max() - x.min())


In [57]:
# Example 53: Handling time zones with time zone-aware rolling correlation
rolling_corr_tz = df.set_index('datetime_new_york')['value'].rolling(window=10, min_periods=1).corr(df['value_shifted'])


In [58]:
# Example 54: Handling time zones with time zone-aware expanding correlation
expanding_corr_tz = df.set_index('datetime_new_york')['value'].expanding(min_periods=1).corr(df['value_shifted'])


In [59]:
# Example 55: Handling time zones with time zone-aware rolling apply with custom function
def custom_rolling_apply(arr):
    return arr.mean() - arr.min()

rolling_apply_custom = df.set_index('datetime_new_york')['value'].rolling(window=5, min_periods=1).apply(custom_rolling_apply)
