# Timezones
While doing time series analysis, one should take into consideration the timezones. 

        

In [11]:
# Fetch the data and load it. 
# We have taken the data from Yahoo Finance website and downloaded the stock prices for Microsoft [ticker: MSFT] from 
# 20 August 2016 to 20 August 2017. 
import pandas as pd
df = pd.read_csv('MSFT.csv', header=0, parse_dates=True, index_col='Date')
df


# Observe that header=0, means that the first row entry should be considered as the header. 
#              parse_dates=True ensures that the Date field is formatted as YYYY-MM-DD
#              index_col='Date' ensures that the Date column is marked as the index/ row label. 


Unnamed: 0_level_0,Close_Price
Date,Unnamed: 1_level_1
2017-08-21 09:00:00,44.68
2017-08-21 09:15:00,45.57
2017-08-21 09:30:00,45.799999
2017-08-21 09:45:00,46.0
2017-08-21 10:00:00,45.939999
2017-08-21 10:15:00,45.48
2017-08-21 10:30:00,45.349998
2017-08-21 10:45:00,45.66
2017-08-21 11:00:00,45.5
2017-08-21 11:15:00,46.189999


# Confirm the index/ row label

In [8]:
df.index                              # Observe that the index has a data type of Datetime [Naive].
                                      # Datetime objects are of 2 types:
                                      # 1. Naive Datetime object [Unaware of the timezone information]
                                      # 2. Time zone aware datetime.       

DatetimeIndex(['2017-08-21 09:00:00', '2017-08-21 09:15:00',
               '2017-08-21 09:30:00', '2017-08-21 09:45:00',
               '2017-08-21 10:00:00', '2017-08-21 10:15:00',
               '2017-08-21 10:30:00', '2017-08-21 10:45:00',
               '2017-08-21 11:00:00', '2017-08-21 11:15:00',
               '2017-08-21 11:30:00', '2017-08-21 11:45:00',
               '2017-08-21 12:00:00'],
              dtype='datetime64[ns]', name='Date', freq=None)

# Converting Naive DatetimeIndex to Time zone aware DatetimeIndex

In [12]:
df = df.tz_localize(tz='US/Eastern')
df.index                                                     # Observe the timezone information added newly. 
                                                             # Observe that the US Eastern timezone is 4 hours behind the UTC
    

DatetimeIndex(['2017-08-21 09:00:00-04:00', '2017-08-21 09:15:00-04:00',
               '2017-08-21 09:30:00-04:00', '2017-08-21 09:45:00-04:00',
               '2017-08-21 10:00:00-04:00', '2017-08-21 10:15:00-04:00',
               '2017-08-21 10:30:00-04:00', '2017-08-21 10:45:00-04:00',
               '2017-08-21 11:00:00-04:00', '2017-08-21 11:15:00-04:00',
               '2017-08-21 11:30:00-04:00', '2017-08-21 11:45:00-04:00',
               '2017-08-21 12:00:00-04:00'],
              dtype='datetime64[ns, US/Eastern]', name='Date', freq=None)

# Converting US/Eastern Timezone to Berlin Timezone

In [16]:
df = df.tz_convert(tz='Europe/Berlin')
df.index                                                      # In Berlin its UTC + 2 hours. 

DatetimeIndex(['2017-08-21 15:00:00+02:00', '2017-08-21 15:15:00+02:00',
               '2017-08-21 15:30:00+02:00', '2017-08-21 15:45:00+02:00',
               '2017-08-21 16:00:00+02:00', '2017-08-21 16:15:00+02:00',
               '2017-08-21 16:30:00+02:00', '2017-08-21 16:45:00+02:00',
               '2017-08-21 17:00:00+02:00', '2017-08-21 17:15:00+02:00',
               '2017-08-21 17:30:00+02:00', '2017-08-21 17:45:00+02:00',
               '2017-08-21 18:00:00+02:00'],
              dtype='datetime64[ns, Europe/Berlin]', name='Date', freq=None)

# Convert to Calcutta Timezone

In [22]:
df = df.tz_convert(tz="Asia/Calcutta")
df                                                    # Observe that it is 5:30 min ahead of UTC.

Unnamed: 0_level_0,Close_Price
Date,Unnamed: 1_level_1
2017-08-21 18:30:00+05:30,44.68
2017-08-21 18:45:00+05:30,45.57
2017-08-21 19:00:00+05:30,45.799999
2017-08-21 19:15:00+05:30,46.0
2017-08-21 19:30:00+05:30,45.939999
2017-08-21 19:45:00+05:30,45.48
2017-08-21 20:00:00+05:30,45.349998
2017-08-21 20:15:00+05:30,45.66
2017-08-21 20:30:00+05:30,45.5
2017-08-21 20:45:00+05:30,46.189999


# View all possible supported time zones

In [23]:
from pytz import all_timezones
all_timezones                                              # View the summary of all possible timezones available. 

['Africa/Abidjan',
 'Africa/Accra',
 'Africa/Addis_Ababa',
 'Africa/Algiers',
 'Africa/Asmara',
 'Africa/Asmera',
 'Africa/Bamako',
 'Africa/Bangui',
 'Africa/Banjul',
 'Africa/Bissau',
 'Africa/Blantyre',
 'Africa/Brazzaville',
 'Africa/Bujumbura',
 'Africa/Cairo',
 'Africa/Casablanca',
 'Africa/Ceuta',
 'Africa/Conakry',
 'Africa/Dakar',
 'Africa/Dar_es_Salaam',
 'Africa/Djibouti',
 'Africa/Douala',
 'Africa/El_Aaiun',
 'Africa/Freetown',
 'Africa/Gaborone',
 'Africa/Harare',
 'Africa/Johannesburg',
 'Africa/Juba',
 'Africa/Kampala',
 'Africa/Khartoum',
 'Africa/Kigali',
 'Africa/Kinshasa',
 'Africa/Lagos',
 'Africa/Libreville',
 'Africa/Lome',
 'Africa/Luanda',
 'Africa/Lubumbashi',
 'Africa/Lusaka',
 'Africa/Malabo',
 'Africa/Maputo',
 'Africa/Maseru',
 'Africa/Mbabane',
 'Africa/Mogadishu',
 'Africa/Monrovia',
 'Africa/Nairobi',
 'Africa/Ndjamena',
 'Africa/Niamey',
 'Africa/Nouakchott',
 'Africa/Ouagadougou',
 'Africa/Porto-Novo',
 'Africa/Sao_Tome',
 'Africa/Timbuktu',
 'Africa/

# Using timezone in date_range()

In [36]:
dates = pd.date_range(start='01/01/2018', periods=10, freq='H')   # Here the start date is Jan 01, 2018   
print(dates)                                                      # Total number of periods to be generated from start date = 10
                                                                  # Frequency of Date generation = 'Hourly'     
                                                                  # Here we get Naive Datetime objects [Sans Timezone info]  

# Lets include the timezone information
print()
dates = pd.date_range(start='01/01/2018', periods=10, freq='H', tz='Europe/London')
print(dates)

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
               '2018-01-01 02:00:00', '2018-01-01 03:00:00',
               '2018-01-01 04:00:00', '2018-01-01 05:00:00',
               '2018-01-01 06:00:00', '2018-01-01 07:00:00',
               '2018-01-01 08:00:00', '2018-01-01 09:00:00'],
              dtype='datetime64[ns]', freq='H')

DatetimeIndex(['2018-01-01 00:00:00+00:00', '2018-01-01 01:00:00+00:00',
               '2018-01-01 02:00:00+00:00', '2018-01-01 03:00:00+00:00',
               '2018-01-01 04:00:00+00:00', '2018-01-01 05:00:00+00:00',
               '2018-01-01 06:00:00+00:00', '2018-01-01 07:00:00+00:00',
               '2018-01-01 08:00:00+00:00', '2018-01-01 09:00:00+00:00'],
              dtype='datetime64[ns, Europe/London]', freq='H')


# Using timezones supported by the OS
Until now, we have been using timezones supported by pytz. Now we need to use the timezones supported by the native OS

In [34]:
dates = pd.date_range(start='01/01/2018', periods=10, freq='H', tz='dateutil/Europe/London')
print(dates)


DatetimeIndex(['2018-01-01 00:00:00+00:00', '2018-01-01 01:00:00+00:00',
               '2018-01-01 02:00:00+00:00', '2018-01-01 03:00:00+00:00',
               '2018-01-01 04:00:00+00:00', '2018-01-01 05:00:00+00:00',
               '2018-01-01 06:00:00+00:00', '2018-01-01 07:00:00+00:00',
               '2018-01-01 08:00:00+00:00', '2018-01-01 09:00:00+00:00'],
              dtype='datetime64[ns, tzfile('GB-Eire')]', freq='H')


# Timezone arithmetics

In [48]:
# First construct a time series with a frequency of 30 min 
dates = pd.date_range(start='01/01/2019 09:00:00', periods = 10, freq='30min')
series = pd.Series(range(10), index=dates)
print(series)
print()
print()

# Now convert this series to Berlin timezone
bSeries = series.tz_localize(tz='Europe/Berlin')
print(bSeries)

# Verify whether the timezone is truely Berlin
bSeries.index                                                                                  # Observe the "dtype" parameter

# Next convert the same time series to Calcutta time zone
print()
cSeries = series.tz_localize(tz='Asia/Calcutta')
print(cSeries)


2019-01-01 09:00:00    0
2019-01-01 09:30:00    1
2019-01-01 10:00:00    2
2019-01-01 10:30:00    3
2019-01-01 11:00:00    4
2019-01-01 11:30:00    5
2019-01-01 12:00:00    6
2019-01-01 12:30:00    7
2019-01-01 13:00:00    8
2019-01-01 13:30:00    9
Freq: 30T, dtype: int64


2019-01-01 09:00:00+01:00    0
2019-01-01 09:30:00+01:00    1
2019-01-01 10:00:00+01:00    2
2019-01-01 10:30:00+01:00    3
2019-01-01 11:00:00+01:00    4
2019-01-01 11:30:00+01:00    5
2019-01-01 12:00:00+01:00    6
2019-01-01 12:30:00+01:00    7
2019-01-01 13:00:00+01:00    8
2019-01-01 13:30:00+01:00    9
Freq: 30T, dtype: int64

2019-01-01 09:00:00+05:30    0
2019-01-01 09:30:00+05:30    1
2019-01-01 10:00:00+05:30    2
2019-01-01 10:30:00+05:30    3
2019-01-01 11:00:00+05:30    4
2019-01-01 11:30:00+05:30    5
2019-01-01 12:00:00+05:30    6
2019-01-01 12:30:00+05:30    7
2019-01-01 13:00:00+05:30    8
2019-01-01 13:30:00+05:30    9
Freq: 30T, dtype: int64


In [50]:
# Lets add the 2 time zones
bSeries + cSeries

# Inference: We got some value(s) for some rows while others time series values were rendered NaN. 
# What happened behind the scenes is: It converted both the time series to UTC [Universal Coordinated Timezone]
# and then printed only those time series values where there was an overlap. 


2019-01-01 03:30:00+00:00    NaN
2019-01-01 04:00:00+00:00    NaN
2019-01-01 04:30:00+00:00    NaN
2019-01-01 05:00:00+00:00    NaN
2019-01-01 05:30:00+00:00    NaN
2019-01-01 06:00:00+00:00    NaN
2019-01-01 06:30:00+00:00    NaN
2019-01-01 07:00:00+00:00    NaN
2019-01-01 07:30:00+00:00    NaN
2019-01-01 08:00:00+00:00    9.0
2019-01-01 08:30:00+00:00    NaN
2019-01-01 09:00:00+00:00    NaN
2019-01-01 09:30:00+00:00    NaN
2019-01-01 10:00:00+00:00    NaN
2019-01-01 10:30:00+00:00    NaN
2019-01-01 11:00:00+00:00    NaN
2019-01-01 11:30:00+00:00    NaN
2019-01-01 12:00:00+00:00    NaN
2019-01-01 12:30:00+00:00    NaN
Freq: 30T, dtype: float64