In [None]:
import numpy as np
import pandas as pd
np.random.seed(12345)
import matplotlib.pyplot as plt
plt.rc("figure", figsize=(10, 6))
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_columns = 20
pd.options.display.max_rows = 20
pd.options.display.max_colwidth = 80
np.set_printoptions(precision=4, suppress=True)

In [None]:
import numpy as np
import pandas as pd

# Date and Time Data Types and Tools
 The Python standard library includes data types for date and time data, as well as
 calendar-related functionality. The datetime, time, and calendar modules are the
 main places to start. The datetime.datetime type, or simply datetime, is widely
 used:

In [None]:
from datetime import datetime
now = datetime.now()
now

print(now.year)
print(now.month)
print(now.day)


2024
11
5


 datetime stores both the date and time down to the microsecond. datetime.time
 delta, or simply timedelta, represents the temporal difference between two date
 time objects:

In [None]:
delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)
print(delta)
print(type(delta))
print(delta.days)
# delta.seconds

926 days, 15:45:00
<class 'datetime.timedelta'>
926


You can add (or subtract) a timedelta or multiple thereof to a datetime object to
 yield a new shifted object:

In [None]:
from datetime import timedelta
start = datetime(2011, 1, 7)
start + timedelta(12)
start - 2 * timedelta(12)

 # Converting Between String and Datetime
 You can format datetime objects and pandas Timestamp objects, which I’ll introduce
 later, as strings using str or the strftime method, passing a format specification:

In [None]:
stamp = datetime(2011, 1, 2)
print(str(stamp))
print(stamp.strftime("%Y-%Y-%m-%d.     %A"))

2011-01-02 00:00:00
2011-2011-01-02.     Sunday


 You can use many of the same format codes to convert strings to dates using date
 time.strptime (but some codes, like %F, cannot be used):

In [None]:
value = "2011-01-03"
print(datetime.strptime(value, "%Y-%m-%d"))
datestrs = ["7/6/2011", "8/6/2011"]
[datetime.strptime(x, "%m/%d/%Y") for x in datestrs]

2011-01-03 00:00:00


[datetime.datetime(2011, 7, 6, 0, 0), datetime.datetime(2011, 8, 6, 0, 0)]

 datetime.strptime is one way to parse a date with a known format.
 pandas is generally oriented toward working with arrays of dates, whether used as
 an axis index or a column in a DataFrame. The pandas.to_datetime method parses
 many different kinds of date representations. Standard date formats like ISO 8601 can
 be parsed quickly:

In [None]:
# datestrs = ["2011-07-06 12:00:00", "2011-08-06 00:00:00"]
datestrs = ["2011-08-06 00:00:00-05:00", "2011-08-06 00:00:00-05:00"]
x = pd.to_datetime(datestrs)
print(x)
print(x.tz)

DatetimeIndex(['2011-08-06 00:00:00-05:00', '2011-08-06 00:00:00-05:00'], dtype='datetime64[ns, UTC-05:00]', freq=None)
UTC-05:00


It also handles values that should be considered missing (None, empty string, etc.):

In [None]:
idx = pd.to_datetime(datestrs + [None])
idx
idx[2]
pd.isna(idx)

 # Time Series Basics
 A basic kind of time series object in pandas is a Series indexed by timestamps, which
 is often represented outside of pandas as Python strings or datetime objects:

In [None]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = pd.Series(np.random.standard_normal(6), index=dates)
ts

Unnamed: 0,0
2011-01-02,-0.204708
2011-01-05,0.478943
2011-01-07,-0.519439
2011-01-08,-0.55573
2011-01-10,1.965781
2011-01-12,1.393406


Under the hood, these datetime objects have been put in a DatetimeIndex:

In [None]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

 Like other Series, arithmetic operations between differently indexed time series auto
matically align on the dates:

In [None]:
ts + ts[::2]

Unnamed: 0,0
2011-01-02,-0.409415
2011-01-05,
2011-01-07,-1.038877
2011-01-08,
2011-01-10,3.931561
2011-01-12,


 pandas stores timestamps using NumPy’s datetime64 data type at the nanosecond
 resolution:

In [None]:
ts.index.dtype

dtype('<M8[ns]')

 Scalar values from a DatetimeIndex are pandas Timestamp objects:

In [None]:
stamp = ts.index[0]
stamp

Timestamp('2011-01-02 00:00:00')

 # Indexing, Selection, Subsetting
 Time series behaves like any other Series when you are indexing and selecting data
 based on the label:

In [None]:
stamp = ts.index[2]
ts[stamp]

-0.5194387150567381

 As a convenience, you can also pass a string that is interpretable as a date:

In [None]:
ts["2011-01-10"]

1.9657805725027142

 For longer time series, a year or only a year and month can be passed to easily select
 slices of data (pandas.date_range is discussed in more detail in “Generating Date
 Ranges” on page 367):

In [None]:
longer_ts = pd.Series(np.random.standard_normal(1000),
                      index=pd.date_range("2000-01-01", periods=1000))
# print(longer_ts)
longer_ts["2001-01"]

Unnamed: 0,0
2001-01-01,-0.786588
2001-01-02,0.006836
2001-01-03,-0.479013
2001-01-04,-1.370790
2001-01-05,-0.206097
...,...
2001-01-27,-1.884929
2001-01-28,0.080282
2001-01-29,-0.039967
2001-01-30,-0.533834


 Here, the string "2001" is interpreted as a year and selects that time period. This also
 works if you specify the month:

In [None]:
longer_ts["2001-05"]

 Slicing with datetime objects works as well:

In [None]:
ts[datetime(2011, 1, 7):]
ts[datetime(2011, 1, 7):datetime(2011, 1, 10)]

Unnamed: 0,0
2011-01-07,-0.519439
2011-01-08,-0.55573
2011-01-10,1.965781


Because most time series data is ordered chronologically, you can slice with time
stamps not contained in a time series to perform a range query:

In [None]:
ts
ts["2011-01-06":"2012-01-11"]

Unnamed: 0,0
2011-01-07,-0.519439
2011-01-08,-0.55573
2011-01-10,1.965781
2011-01-12,1.393406


As before, you can pass a string date, datetime, or timestamp. Remember that slicing
 in this manner produces views on the source time series, like slicing NumPy arrays.
 This means that no data is copied, and modifications on the slice will be reflected in
 the original data.
 There is an equivalent instance method, truncate, that slices a Series between two
 dates:

In [None]:
ts.truncate(after="2011-01-09")

Unnamed: 0,0
2011-01-02,-0.204708
2011-01-05,0.478943
2011-01-07,-0.519439
2011-01-08,-0.55573


All of this holds true for DataFrame as well, indexing on its rows:

In [None]:
dates = pd.date_range("2000-01-01", periods=100, freq="W-WED")
long_df = pd.DataFrame(np.random.standard_normal((100, 4)),
                       index=dates,
                       columns=["Colorado", "Texas",
                                "New York", "Ohio"])
long_df.loc["2001-05"]

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-05-02,-2.149617,1.552855,0.405074,0.90038
2001-05-09,0.426804,0.060434,-1.964908,1.570305
2001-05-16,-0.875665,0.580321,0.746482,1.174756
2001-05-23,-0.567599,-0.803335,0.600198,1.367461
2001-05-30,-0.295445,-0.085571,0.777407,1.067479


 # Time Series with Duplicate Indices
 In some applications, there may be multiple data observations falling on a particular
 timestamp. Here is an example:

In [None]:
dates = pd.DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-02",
                          "2000-01-02", "2000-01-03"])
dup_ts = pd.Series(np.arange(5), index=dates)
dup_ts

Unnamed: 0,0
2000-01-01,0
2000-01-02,1
2000-01-02,2
2000-01-02,3
2000-01-03,4


 We can tell that the index is not unique by checking its is_unique property:

In [None]:
dup_ts.index.is_unique

False

 Indexing into this time series will now either produce scalar values or slices, depend
ing on whether a timestamp is duplicated:

In [None]:
# dup_ts["2000-01-03"]  # not duplicated
dup_ts["2000-01-02"]  # duplicated

Unnamed: 0,0
2000-01-02,1
2000-01-02,2
2000-01-02,3


 Suppose you wanted to aggregate the data having nonunique timestamps. One way to
 do this is to use groupby and pass level=0 (the one and only level):

In [None]:
grouped = dup_ts.groupby(level=0)
print(grouped.mean())
print(grouped.count())

2000-01-01    0.0
2000-01-02    2.0
2000-01-03    4.0
dtype: float64
2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64


# Date Ranges, Frequencies, and Shifting
 Generic time series in pandas are assumed to be irregular; that is, they have no fixed
 frequency. For many applications this is sufficient. However, it’s often desirable to
 work relative to a fixed frequency, such as daily, monthly, or every 15 minutes, even
 if that means introducing missing values into a time series.

In [None]:
ts
resampler = ts.resample("D")
resampler

 The string "D" is interpreted as daily frequency.

 # Generating Date Ranges
 While I used it previously without explanation, pandas.date_range is responsible
 for generating a DatetimeIndex with an indicated length according to a particular
 frequency:

In [None]:
index = pd.date_range("2012-04-01", "2012-06-01")
index

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20',
               '2012-04-21', '2012-04-22', '2012-04-23', '2012-04-24',
               '2012-04-25', '2012-04-26', '2012-04-27', '2012-04-28',
               '2012-04-29', '2012-04-30', '2012-05-01', '2012-05-02',
               '2012-05-03', '2012-05-04', '2012-05-05', '2012-05-06',
               '2012-05-07', '2012-05-08', '2012-05-09', '2012-05-10',
               '2012-05-11', '2012-05-12', '2012-05-13', '2012-05-14',
               '2012-05-15', '2012-05-16', '2012-05-17', '2012-05-18',
               '2012-05-19', '2012-05-20', '2012-05-21', '2012-05-22',
               '2012-05-23', '2012-05-24', '2012-05-25', '2012-05-26',
      

By default, pandas.date_range generates daily timestamps. If you pass only a start or
 end date, you must pass a number of periods to generate:

In [None]:
# pd.date_range(start="2012-04-01", periods=20)
# pd.date_range(end="2012-06-01", periods=20)

DatetimeIndex(['2012-05-13', '2012-05-14', '2012-05-15', '2012-05-16',
               '2012-05-17', '2012-05-18', '2012-05-19', '2012-05-20',
               '2012-05-21', '2012-05-22', '2012-05-23', '2012-05-24',
               '2012-05-25', '2012-05-26', '2012-05-27', '2012-05-28',
               '2012-05-29', '2012-05-30', '2012-05-31', '2012-06-01'],
              dtype='datetime64[ns]', freq='D')

 The start and end dates define strict boundaries for the generated date index. For
 example, if you wanted a date index containing the last business day of each month,
 you would pass the "BM" frequency

In [None]:
pd.date_range("2000-01-01", "2000-12-03", freq="BM")

  pd.date_range("2000-01-01", "2000-12-03", freq="BM")


DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-28',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-29', '2000-10-31', '2000-11-30'],
              dtype='datetime64[ns]', freq='BME')

 pandas.date_range by default preserves the time (if any) of the start or end time
stamp:

In [None]:
pd.date_range("2012-05-02 12:56:31", periods=5)

DatetimeIndex(['2012-05-02 12:56:31', '2012-05-03 12:56:31',
               '2012-05-04 12:56:31', '2012-05-05 12:56:31',
               '2012-05-06 12:56:31'],
              dtype='datetime64[ns]', freq='D')

 Sometimes you will have start or end dates with time information but want to
 generate a set of timestamps normalized to midnight as a convention. To do this,
 there is a normalize option:

In [None]:
pd.date_range("2012-05-02 12:56:31", periods=5, normalize=True)

DatetimeIndex(['2012-05-02', '2012-05-03', '2012-05-04', '2012-05-05',
               '2012-05-06'],
              dtype='datetime64[ns]', freq='D')

# Frequencies and Date Offsets
 Frequencies in pandas are composed of a base frequency and a multiplier. Base
 frequencies are typically referred to by a string alias, like "M" for monthly or "H" for
 hourly. For each base frequency, there is an object referred to as a date offset. For
 example, hourly frequency can be represented with the Hour class:

In [None]:
from pandas.tseries.offsets import Hour, Minute
hour = Hour()
hour

<Hour>

You can define a multiple of an offset by passing an integer:

In [None]:
four_hours = Hour(4)
four_hours

<4 * Hours>

 In most applications, you would never need to explicitly create one of these objects;
 instead you’d use a string alias like "H" or "4H". Putting an integer before the base
 frequency creates a multiple:

In [None]:
pd.date_range("2000-01-01", "2000-01-03 23:59", freq="4H")

  pd.date_range("2000-01-01", "2000-01-03 23:59", freq="4H")


DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 04:00:00',
               '2000-01-01 08:00:00', '2000-01-01 12:00:00',
               '2000-01-01 16:00:00', '2000-01-01 20:00:00',
               '2000-01-02 00:00:00', '2000-01-02 04:00:00',
               '2000-01-02 08:00:00', '2000-01-02 12:00:00',
               '2000-01-02 16:00:00', '2000-01-02 20:00:00',
               '2000-01-03 00:00:00', '2000-01-03 04:00:00',
               '2000-01-03 08:00:00', '2000-01-03 12:00:00',
               '2000-01-03 16:00:00', '2000-01-03 20:00:00'],
              dtype='datetime64[ns]', freq='4h')

 Many offsets can be combined by addition:

In [None]:
Hour(2) + Minute(30)

<150 * Minutes>

 Similarly, you can pass frequency strings, like "1h30min", that will effectively be
 parsed to the same expression:

In [None]:
pd.date_range("2000-01-01", periods=10, freq="1h30min")

Some frequencies describe points in time that are not evenly spaced. For example,
 "M" (calendar month end) and "BM" (last business/weekday of month) depend on the
 number of days in a month and, in the latter case, whether the month ends on a
 weekend or not. We refer to these as anchored offsets.

 Week of month dates
 One useful frequency class is “week of month,” starting with WOM. This enables you to
 get dates like the third Friday of each month:

In [None]:
monthly_dates = pd.date_range("2012-01-01", "2012-09-01", freq="WOM-3FRI")
list(monthly_dates)

[Timestamp('2012-01-20 00:00:00'),
 Timestamp('2012-02-17 00:00:00'),
 Timestamp('2012-03-16 00:00:00'),
 Timestamp('2012-04-20 00:00:00'),
 Timestamp('2012-05-18 00:00:00'),
 Timestamp('2012-06-15 00:00:00'),
 Timestamp('2012-07-20 00:00:00'),
 Timestamp('2012-08-17 00:00:00')]

 # Shifting (Leading and Lagging) Data
 Shifting refers to moving data backward and forward through time. Both Series and
 DataFrame have a shift method for doing naive shifts forward or backward, leaving
 the index unmodified:

In [None]:
ts = pd.Series(np.random.standard_normal(4),
               index=pd.date_range("2000-01-01", periods=4, freq="M"))
print(ts)
print(ts.shift(2))
# ts.shift(-2)

2000-01-31    0.703083
2000-02-29    0.075575
2000-03-31    0.700022
2000-04-30    1.416030
Freq: ME, dtype: float64
2000-01-31         NaN
2000-02-29         NaN
2000-03-31    0.703083
2000-04-30    0.075575
Freq: ME, dtype: float64


  index=pd.date_range("2000-01-01", periods=4, freq="M"))


 When we shift like this, missing data is introduced either at the start or the end of the
 time series.
 A common use of shift is computing consecutive percent changes in a time series or
 multiple time series as DataFrame columns. This is expressed as:
 ts / ts.shift(1) - 1
 Because naive shifts leave the index unmodified, some data is discarded. Thus if the
 frequency is known, it can be passed to shift to advance the timestamps instead of
 simply the data:

In [None]:
ts.shift(2, freq="M")

  ts.shift(2, freq="M")


Unnamed: 0,0
2000-03-31,0.703083
2000-04-30,0.075575
2000-05-31,0.700022
2000-06-30,1.41603


 Other frequencies can be passed, too, giving you some flexibility in how to lead and
 lag the data:

In [None]:
ts.shift(3, freq="D")
ts.shift(1, freq="90T")

 The T here stands for minutes. Note that the freq parameter here indicates the offset
 to apply to the timestamps, but it does not change the underlying frequency of the
 data, if any.

 # Shifting dates with offsets
 The pandas date offsets can also be used with datetime or Timestamp objects:

In [None]:
from pandas.tseries.offsets import Day, MonthEnd
now = datetime(2011, 11, 17)
now + 3 * Day()

Timestamp('2011-11-20 00:00:00')

 If you add an anchored offset like MonthEnd, the first increment will “roll forward” a
 date to the next date according to the frequency rule:

In [None]:
now + MonthEnd()
now + MonthEnd(2)

Timestamp('2011-12-31 00:00:00')

 Anchored offsets can explicitly “roll” dates forward or backward by simply using their
 rollforward and rollback methods, respectively:

In [None]:
offset = MonthEnd()
offset.rollforward(now)
offset.rollback(now)

 A creative use of date offsets is to use these methods with groupby:

In [None]:
ts = pd.Series(np.random.standard_normal(20),
               index=pd.date_range("2000-01-15", periods=20, freq="4D"))
print(ts)
ts.groupby(MonthEnd().rollforward).mean()

2000-01-15   -0.700676
2000-01-19   -0.301605
2000-01-23    0.028612
2000-01-27   -1.947259
2000-01-31    2.033981
2000-02-04   -0.138246
2000-02-08   -0.719689
2000-02-12    2.219406
2000-02-16   -1.659319
2000-02-20    0.877243
2000-02-24    0.540589
2000-02-28    0.045965
2000-03-03    1.437115
2000-03-07    0.280476
2000-03-11   -0.820854
2000-03-15   -1.568038
2000-03-19   -1.330763
2000-03-23   -1.827303
2000-03-27   -0.618024
2000-03-31    1.230260
Freq: 4D, dtype: float64


In [None]:
ts.resample("M").mean()

# Time Zone Handling
 Working with time zones can be one of the most unpleasant parts of time series
 manipulation. As a result, many time series users choose to work with time series in
 coordinated universal time or UTC, which is the geography-independent international
 standard. Time zones are expressed as offsets from UTC; for example, New York is
 four hours behind UTC during daylight saving time (DST) and five hours behind the
 rest of the year.


  In Python, time zone information comes from the third-party pytz library (installa
ble with pip or conda), which exposes the Olson database, a compilation of world
 time zone information. This is especially important for historical data because the
 DST transition dates (and even UTC offsets) have been changed numerous times depending on the regional laws. In the United States, the DST transition times have
 been changed many times since 1900!


 For detailed information about the pytz library, you’ll need to look at that library’s
 documentation. As far as this book is concerned, pandas wraps pytz’s functionality
 so you can ignore its API outside of the time zone names. Since pandas has a hard
 dependency on pytz, it isn’t necessary to install it separately. Time zone names can be
 found interactively and in the docs:

In [None]:
import pytz
pytz.common_timezones[-5:]

['US/Eastern', 'US/Hawaii', 'US/Mountain', 'US/Pacific', 'UTC']

 To get a time zone object from pytz, use pytz.timezone:

In [None]:
tz = pytz.timezone("America/New_York")
tz

<DstTzInfo 'America/New_York' LMT-1 day, 19:04:00 STD>

 Methods in pandas will accept either time zone names or these objects.
 # Time Zone Localization and Conversion
 By default, time series in pandas are time zone naive. For example, consider the
 following time series:

In [None]:
dates = pd.date_range("2012-03-09 09:30", periods=6)
ts = pd.Series(np.random.standard_normal(len(dates)), index=dates)
ts

Unnamed: 0,0
2012-03-09 09:30:00,1.802494
2012-03-10 09:30:00,0.311956
2012-03-11 09:30:00,-0.631725
2012-03-12 09:30:00,-2.02892
2012-03-13 09:30:00,0.057322
2012-03-14 09:30:00,0.899711


 The index’s tz field is None:

In [None]:
print(ts.index.tz)

None


 Date ranges can be generated with a time zone set:

In [None]:
pd.date_range("2012-03-09 09:30", periods=10, tz="UTC")

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00', '2012-03-16 09:30:00+00:00',
               '2012-03-17 09:30:00+00:00', '2012-03-18 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

 Conversion from naive to localized (reinterpreted as having been observed in a
 particular time zone) is handled by the tz_localize method:

In [None]:
ts
ts_utc = ts.tz_localize("UTC")
ts_utc
ts_utc.index

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

 Once a time series has been localized to a particular time zone, it can be converted to
 another time zone with tz_convert:

In [None]:
ts_utc.tz_convert("America/New_York")

Unnamed: 0,0
2012-03-09 04:30:00-05:00,-0.836888
2012-03-10 04:30:00-05:00,-0.837012
2012-03-11 05:30:00-04:00,-1.322403
2012-03-12 05:30:00-04:00,1.195259
2012-03-13 05:30:00-04:00,-1.85245
2012-03-14 05:30:00-04:00,-1.078315


 In the case of the preceding time series, which straddles a DST transition in the
 America/New_York time zone, we could localize to US Eastern time and convert to,
 say, UTC or Berlin time:

In [None]:
print(ts)
print(ts.index.tz)
ts_eastern = ts.tz_localize("America/New_York")
print(ts_eastern)
# ts_eastern.tz_convert("UTC")
# ts_eastern.tz_convert("Europe/Berlin")

2012-03-09 09:30:00   -0.836888
2012-03-10 09:30:00   -0.837012
2012-03-11 09:30:00   -1.322403
2012-03-12 09:30:00    1.195259
2012-03-13 09:30:00   -1.852450
2012-03-14 09:30:00   -1.078315
Freq: D, dtype: float64
None
2012-03-09 09:30:00-05:00   -0.836888
2012-03-10 09:30:00-05:00   -0.837012
2012-03-11 09:30:00-04:00   -1.322403
2012-03-12 09:30:00-04:00    1.195259
2012-03-13 09:30:00-04:00   -1.852450
2012-03-14 09:30:00-04:00   -1.078315
dtype: float64


 tz_localize and tz_convert are also instance methods on DatetimeIndex:

In [None]:
ts.index.tz_localize("Asia/Shanghai")

# Operations with Time Zone-Aware Timestamp Objects
 Similar to time series and date ranges, individual Timestamp objects similarly can
 be localized from naive to time zone-aware and converted from one time zone to
 another:

In [None]:
stamp = pd.Timestamp("2011-03-12 04:00")
stamp_utc = stamp.tz_localize("utc")
stamp_utc.tz_convert("America/New_York")

You can also pass a time zone when creating the Timestamp:

In [None]:
stamp_moscow = pd.Timestamp("2011-03-12 04:00", tz="Europe/Moscow")
stamp_moscow

 Time zone-aware Timestamp objects internally store a UTC timestamp value as nano
seconds since the Unix epoch (January 1, 1970), so changing the time zone does not
 alter the internal UTC value:

In [None]:
stamp_utc.value
stamp_utc.tz_convert("America/New_York").value

 When performing time arithmetic using pandas’s DateOffset objects, pandas
 respects daylight saving time transitions where possible. Here we construct time
stamps that occur right before DST transitions (forward and backward). First, 30
 minutes before transitioning to DST:

In [None]:
stamp = pd.Timestamp("2012-03-11 01:30", tz="US/Eastern")
stamp
stamp + Hour()

 Then, 90 minutes before transitioning out of DST:

In [None]:
stamp = pd.Timestamp("2012-11-04 00:30", tz="US/Eastern")
stamp
stamp + 2 * Hour()

#  Operations Between Different Time Zones
 If two time series with different time zones are combined, the result will be UTC.
 Since the timestamps are stored under the hood in UTC, this is a straightforward
 operation and requires no conversion:

In [None]:
dates = pd.date_range("2012-03-07 09:30", periods=10, freq="B")
ts = pd.Series(np.random.standard_normal(len(dates)), index=dates)
ts
ts1 = ts[:7].tz_localize("Europe/London")
ts2 = ts1[2:].tz_convert("Europe/Moscow")
result = ts1 + ts2
result.index

DatetimeIndex(['2012-03-07 09:30:00+00:00', '2012-03-08 09:30:00+00:00',
               '2012-03-09 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq=None)

 Operations between time zone-naive and time zone-aware data are not supported
 and will raise an exception

# Periods and Period Arithmetic
 Periods represent time spans, like days, months, quarters, or years. The pan
 das.Period class represents this data type

In [None]:
p = pd.Period("2011", freq="Y-DEC")
p

Period('2011', 'Y-DEC')

 In this case, the Period object represents the full time span from January 1, 2011,
 to December 31, 2011, inclusive. Conveniently, adding and subtracting integers from
 periods has the effect of shifting their frequency:

In [None]:
p + 5
p - 2

Period('2009', 'Y-DEC')

If two periods have the same frequency, their difference is the number of units
 between them as a date offset:

In [None]:
pd.Period("2014", freq="A-DEC") - p

  pd.Period("2014", freq="A-DEC") - p


<3 * YearEnds: month=12>

 Regular ranges of periods can be constructed with the period_range function:

In [None]:
periods = pd.period_range("2000-01-01", "2000-06-30", freq="M")
periods

PeriodIndex(['2000-01', '2000-02', '2000-03', '2000-04', '2000-05', '2000-06'], dtype='period[M]')

 The PeriodIndex class stores a sequence of periods and can serve as an axis index in
 any pandas data structure:

In [None]:
pd.Series(np.random.standard_normal(6), index=periods)

 If you have an array of strings, you can also use the PeriodIndex class, where all of its
 values are periods

In [None]:
values = ["2001Q3", "2002Q2", "2003Q1"]
index = pd.PeriodIndex(values, freq="Q-DEC")
index

 # Period Frequency Conversion
 Periods and PeriodIndex objects can be converted to another frequency with their
 asfreq method. As an example, suppose we had an annual period and wanted to
 convert it into a monthly period either at the start or end of the year. This can be
 done like so:

In [None]:
p = pd.Period("2011", freq="Y-DEC")
p
p.asfreq("M", how="start")
print(p)
# p.asfreq("M", how="end")
# p.asfreq("M")

2011


 You can think of Period("2011", "A-DEC") as being a sort of cursor pointing to a
 span of time, subdivided by monthly periods. See Figure 11-1 for an illustration of
 this. For a fiscal year ending on a month other than December, the corresponding
 monthly subperiods are different:

In [None]:
p = pd.Period("2011", freq="A-JUN")
p
p.asfreq("M", how="start")
p.asfreq("M", how="end")

 When you are converting from high to low frequency, pandas determines the subper
iod, depending on where the superperiod “belongs.” For example, in A-JUN frequency,
 the month Aug-2011 is actually part of the 2012 period:

In [None]:
p = pd.Period("Aug-2011", "M")
p.asfreq("A-JUN")

 Whole PeriodIndex objects or time series can be similarly converted with the same
 semantics:

In [None]:
periods = pd.period_range("2006", "2009", freq="A-DEC")
ts = pd.Series(np.random.standard_normal(len(periods)), index=periods)
ts
ts.asfreq("M", how="start")

 Here, the annual periods are replaced with monthly periods corresponding to the first
 month falling within each annual period. If we instead wanted the last business day
 of each year, we can use the "B" frequency and indicate that we want the end of the
 period:

In [None]:
ts.asfreq("B", how="end")

 # Quarterly Period Frequencies
 Quarterly data is standard in accounting, finance, and other fields. Much quarterly
 data is reported relative to a fiscal year end, typically the last calendar or business day
 of one of the 12 months of the year. Thus, the period 2012Q4 has a different meaning
 depending on fiscal year end. pandas supports all 12 possible quarterly frequencies as
 Q-JAN through Q-DEC:

In [None]:
p = pd.Period("2012Q4", freq="Q-JAN")
p

In the case of a fiscal year ending in January, 2012Q4 runs from November 2011
 through January 2012, which you can check by converting to daily frequency:

In [None]:
p.asfreq("D", how="start")
p.asfreq("D", how="end")

 Thus, it’s possible to do convenient period arithmetic; for example, to get the time
stamp at 4 P.M. on the second-to-last business day of the quarter, you could do:

In [None]:
p4pm = (p.asfreq("B", how="end") - 1).asfreq("T", how="start") + 16 * 60
p4pm
p4pm.to_timestamp()

 The to_timestamp method returns the Timestamp at the start of the period by default.
 You can generate quarterly ranges using pandas.period_range. The arithmetic is
 identical, too:

In [None]:
periods = pd.period_range("2011Q3", "2012Q4", freq="Q-JAN")
ts = pd.Series(np.arange(len(periods)), index=periods)
ts
new_periods = (periods.asfreq("B", "end") - 1).asfreq("H", "start") + 16
ts.index = new_periods.to_timestamp()
ts

 # Converting Timestamps to Periods (and Back)
 Series and DataFrame objects indexed by timestamps can be converted to periods
 with the to_period method:

In [None]:
dates = pd.date_range("2000-01-01", periods=3, freq="M")
ts = pd.Series(np.random.standard_normal(3), index=dates)
ts
pts = ts.to_period()
pts

 Since periods refer to nonoverlapping time spans, a timestamp can only belong to a
 single period for a given frequency. While the frequency of the new PeriodIndex is
 inferred from the timestamps by default, you can specify any supported frequency.  There is also no problem with
 having duplicate periods in the result:

In [None]:
dates = pd.date_range("2000-01-29", periods=6)
ts2 = pd.Series(np.random.standard_normal(6), index=dates)
ts2
ts2.to_period("M")

 To convert back to timestamps, use the to_timestamp method, which returns a
 DatetimeIndex:

In [None]:
pts = ts2.to_period()
pts
pts.to_timestamp(how="end")

 # Creating a PeriodIndex from Arrays
 Fixed frequency datasets are sometimes stored with time span information spread
 across multiple columns. For example, in this macroeconomic dataset, the year and
 quarter are in different columns:

In [None]:
data = pd.read_csv("examples/macrodata.csv")
data.head(5)
data["year"]
data["quarter"]

In [None]:
index = pd.PeriodIndex(year=data["year"], quarter=data["quarter"],
                       freq="Q-DEC")
index
data.index = index
data["infl"]

In [None]:
dates = pd.date_range("2000-01-01", periods=100)
ts = pd.Series(np.random.standard_normal(len(dates)), index=dates)
ts
ts.resample("M").mean()
ts.resample("M", kind="period").mean()

In [None]:
dates = pd.date_range("2000-01-01", periods=12, freq="T")
ts = pd.Series(np.arange(len(dates)), index=dates)
ts

In [None]:
ts.resample("5min").sum()

In [None]:
ts.resample("5min", closed="right").sum()

In [None]:
ts.resample("5min", closed="right", label="right").sum()

In [None]:
from pandas.tseries.frequencies import to_offset
result = ts.resample("5min", closed="right", label="right").sum()
result.index = result.index + to_offset("-1s")
result

In [None]:
ts = pd.Series(np.random.permutation(np.arange(len(dates))), index=dates)
ts.resample("5min").ohlc()

In [None]:
frame = pd.DataFrame(np.random.standard_normal((2, 4)),
                     index=pd.date_range("2000-01-01", periods=2,
                                         freq="W-WED"),
                     columns=["Colorado", "Texas", "New York", "Ohio"])
frame

In [None]:
df_daily = frame.resample("D").asfreq()
df_daily

In [None]:
frame.resample("D").ffill()

In [None]:
frame.resample("D").ffill(limit=2)

In [None]:
frame.resample("W-THU").ffill()

In [None]:
frame = pd.DataFrame(np.random.standard_normal((24, 4)),
                     index=pd.period_range("1-2000", "12-2001",
                                           freq="M"),
                     columns=["Colorado", "Texas", "New York", "Ohio"])
frame.head()
annual_frame = frame.resample("A-DEC").mean()
annual_frame

In [None]:
# Q-DEC: Quarterly, year ending in December
annual_frame.resample("Q-DEC").ffill()
annual_frame.resample("Q-DEC", convention="end").asfreq()

In [None]:
annual_frame.resample("Q-MAR").ffill()

In [None]:
N = 15
times = pd.date_range("2017-05-20 00:00", freq="1min", periods=N)
df = pd.DataFrame({"time": times,
                   "value": np.arange(N)})
df

In [None]:
df.set_index("time").resample("5min").count()

In [None]:
df2 = pd.DataFrame({"time": times.repeat(3),
                    "key": np.tile(["a", "b", "c"], N),
                    "value": np.arange(N * 3.)})
df2.head(7)

In [None]:
time_key = pd.Grouper(freq="5min")

In [None]:
resampled = (df2.set_index("time")
             .groupby(["key", time_key])
             .sum())
resampled
resampled.reset_index()

In [None]:
close_px_all = pd.read_csv("examples/stock_px.csv",
                           parse_dates=True, index_col=0)
close_px = close_px_all[["AAPL", "MSFT", "XOM"]]
close_px = close_px.resample("B").ffill()

In [None]:
close_px["AAPL"].plot()
close_px["AAPL"].rolling(250).mean().plot()

In [None]:
plt.figure()
std250 = close_px["AAPL"].pct_change().rolling(250, min_periods=10).std()
std250[5:12]
std250.plot()

In [None]:
expanding_mean = std250.expanding().mean()

In [None]:
plt.figure()

In [None]:
plt.style.use('grayscale')
close_px.rolling(60).mean().plot(logy=True)

In [None]:
close_px.rolling("20D").mean()

In [None]:
plt.figure()

In [None]:
aapl_px = close_px["AAPL"]["2006":"2007"]

ma30 = aapl_px.rolling(30, min_periods=20).mean()
ewma30 = aapl_px.ewm(span=30).mean()

aapl_px.plot(style="k-", label="Price")
ma30.plot(style="k--", label="Simple Moving Avg")
ewma30.plot(style="k-", label="EW MA")
plt.legend()

In [None]:
plt.figure()

In [None]:
spx_px = close_px_all["SPX"]
spx_rets = spx_px.pct_change()
returns = close_px.pct_change()

In [None]:
corr = returns["AAPL"].rolling(125, min_periods=100).corr(spx_rets)
corr.plot()

In [None]:
plt.figure()

In [None]:
corr = returns.rolling(125, min_periods=100).corr(spx_rets)
corr.plot()

In [None]:
plt.figure()

In [None]:
from scipy.stats import percentileofscore
def score_at_2percent(x):
    return percentileofscore(x, 0.02)

result = returns["AAPL"].rolling(250).apply(score_at_2percent)
result.plot()

In [None]:
pd.options.display.max_rows = PREVIOUS_MAX_ROWS