# Period and PeriodIndex

# Create the 2019 YEARLY TIME PERIOD

In [1]:
import pandas as pd
y = pd.Period('2019')                               # Period('2016', 'A-DEC') Here "A-Dec" means "Annual ending in December"
y

# Take a look at all the properties of the Period object
dir(y)

['__add__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__radd__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rsub__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__unicode__',
 '__weakref__',
 '_add_delta',
 '_from_ordinal',
 '_maybe_convert_freq',
 '_typ',
 'asfreq',
 'day',
 'dayofweek',
 'dayofyear',
 'days_in_month',
 'daysinmonth',
 'end_time',
 'freq',
 'freqstr',
 'hour',
 'is_leap_year',
 'minute',
 'month',
 'now',
 'ordinal',
 'quarter',
 'qyear',
 'second',
 'start_time',
 'strftime',
 'to_timestamp',
 'week',
 'weekday',
 'weekofyear',
 'year']

# Exploring the properties of Period object

In [2]:
y.start_time                                              # Timestamp('2016-01-01 00:00:00')

Timestamp('2019-01-01 00:00:00')

In [3]:
y.is_leap_year                                            # True 

False

In [4]:
y.end_time

Timestamp('2019-12-31 23:59:59.999999999')

# Create a MONTHLY TIME PERIOD

In [5]:
m = pd.Period('2019-01', freq='M')                             # For January. 'M' == Month
m


Period('2019-01', 'M')

In [6]:
m.start_time                                                  # Start day of the month

Timestamp('2019-01-01 00:00:00')

In [7]:
m.end_time                                                   # End day of the month

Timestamp('2019-01-31 23:59:59.999999999')

# Mathematical operations on Period Object

In [8]:
print(m)                                                     # January 2019
print(m+1)                                                   # February 2019
print(m-1)                                                   # December 2018
print(m+12)                                                  # January 2020

2019-01
2019-02
2018-12
2020-01


# DAILY TIME PERIOD

In [9]:
d = pd.Period('2019-01-12', freq='D')
print(d)                                                     # 2019-01-12    

# Mathematical operations on Daily time period
print(d+1)                                                   # 2019-01-13

2019-01-12
2019-01-13


# HOURLY TIME PERIOD

In [10]:
h = pd.Period('2019-01-12 23:00:00', freq='H')
print(h)                                                     # 2019-01-12    


2019-01-12 23:00


In [11]:
h.start_time                                                # 2019-01-12 23:00:00

Timestamp('2019-01-12 23:00:00')

In [12]:
h.end_time                                                  # 2019-01-12 23:59:59.999999999

Timestamp('2019-01-12 23:59:59.999999999')

In [13]:
# Mathematical operations on Hourly time period
print(h+1)                                                 # 2019-01-13 00:00
print(h-6)                                                 # 2019-01-12 17:00


2019-01-13 00:00
2019-01-12 17:00


# QUATERLY TIME PERIOD
A year consists of 4 quarters. Each quarter comprises of 3 months. Financial companies ususally release their data quaterly.

In [14]:
q = pd.Period('2019Q1')
print(q)                                                # Period('2019Q1', 'Q-DEC'). Here Q-DEC == Quarter ending in December.
print(q.start_time)                                     # 2019-01-01 00:00:00  
print(q.end_time)                                       # 2019-03-31 23:59:59.999999999 


2019Q1
2019-01-01 00:00:00
2019-03-31 23:59:59.999999999


In [15]:
# Mathematical operations on Quaterly data
print(q+1)                                               # 2019Q2
print(q+3)                                               # 2019Q4
print(q-1)                                               # 2018Q4

2019Q2
2019Q4
2018Q4


In [16]:
# In most of the companies the fiscal year is from Jan to Dec. But certain companies like Walmart, the fiscal year is from:
# Feb to Jan. We need to make necessary provisions. 

In [17]:
q_new = pd.Period('2019Q1', freq = 'Q-JAN')              # Here Q-JAN == Quarter ending in January.   
print(q_new)                                             # 2019Q1  
print(q_new.start_time)                                  # 2018-02-01 00:00:00... starts in Feb  
print(q_new.end_time)                                    # 2018-04-30 23:59:59.999999999...The quater ends in April end. 


2019Q1
2018-02-01 00:00:00
2018-04-30 23:59:59.999999999


In [18]:
# Converting a quaterly frequency to monthly frequency
# Use the asfreq()
q.asfreq('M', how='start')

Period('2019-01', 'M')

# Using Period as index to DataFame.
Index with Period == PeriodIndex

In [19]:
period_idx = pd.period_range(start='2018', end='2019', freq='Q')                # Quaterly periods...when you know the end date
print(period_idx)


period_idx = pd.period_range(start='2018', periods = 5, freq='Q')                # Quaterly periods...when you dont know end date
print(period_idx)


PeriodIndex(['2018Q1', '2018Q2', '2018Q3', '2018Q4', '2019Q1'], dtype='period[Q-DEC]', freq='Q-DEC')
PeriodIndex(['2018Q1', '2018Q2', '2018Q3', '2018Q4', '2019Q1'], dtype='period[Q-DEC]', freq='Q-DEC')


# Converting PeriodIndex to DatetimeIndex

In [21]:
# Convert to DatetimeIndex
dt_idx = period_idx.to_timestamp()
dt_idx


DatetimeIndex(['2018-01-01', '2018-04-01', '2018-07-01', '2018-10-01',
               '2019-01-01'],
              dtype='datetime64[ns]', freq='QS-OCT')

# Converting DatetimeIndex to PeriodIndex

In [23]:
# Connert to PeriodIndex
period_idx = dt_idx.to_period()
period_idx


PeriodIndex(['2018Q1', '2018Q2', '2018Q3', '2018Q4', '2019Q1'], dtype='period[Q-DEC]', freq='Q-DEC')

# WALMART Data Processing
Goto Yahoo Finance, and download the stock's data corresponding to Walmart [Sticker: WMT] > Financials > Quaterly Data > Copy data corresponding to 'Revenue', 'Expenses' and'Profit'. 
In the 'WMT.csv', the time information is stored in the column. 

### Our objective is to Transpose this table in such a way that the quarterly time periods form the index of the DataFrame + add 2 new columns showing the start and the end date of that quarter. 

### Also note that the fiscal year for Walmart is from Feb to Jan unlike the Jan to Dec fiscal period. 


In [28]:
import pandas as pd
df = pd.read_csv('WMTT.csv')
df

Unnamed: 0,Line Item,2018Q1,2018Q2,2018Q3,2018Q4
0,Revenue,136267000,122690000,128028000,124894000
1,Expenses,102640000,91707000,95571000,93116000
2,Profit,33627000,30983000,32457000,31778000


In [31]:
# Let the "Line Item" column as Index
df.set_index('Line Item', inplace=True)
df

Unnamed: 0_level_0,2018Q1,2018Q2,2018Q3,2018Q4
Line Item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Revenue,136267000,122690000,128028000,124894000
Expenses,102640000,91707000,95571000,93116000
Profit,33627000,30983000,32457000,31778000


In [33]:
# Now Transpose the table
df = df.T
df

# One task achieved - periods are set as index

Line Item,Revenue,Expenses,Profit
2018Q1,136267000,102640000,33627000
2018Q2,122690000,91707000,30983000
2018Q3,128028000,95571000,32457000
2018Q4,124894000,93116000,31778000


In [34]:
# See the index
df.index

# The datatype of the Index = object. 

Index(['2018Q1', '2018Q2', '2018Q3', '2018Q4'], dtype='object')

In [40]:
# Lets convert the object index to PeriodIndex
df.index = pd.PeriodIndex(df.index, freq='Q-JAN')
df.index

PeriodIndex(['2018Q1', '2018Q2', '2018Q3', '2018Q4'], dtype='period[Q-JAN]', freq='Q-JAN')

In [41]:
# Lets create 'Start Date' column
df['Start Date'] = df.index.map(lambda x: x.start_time)    # An anonymous function which iterates through all the Period Indexes
df


Line Item,Revenue,Expenses,Profit,Start Date
2018Q1,136267000,102640000,33627000,2017-02-01
2018Q2,122690000,91707000,30983000,2017-05-01
2018Q3,128028000,95571000,32457000,2017-08-01
2018Q4,124894000,93116000,31778000,2017-11-01


In [43]:
# Similarly create 'End Date' column
df['End Date'] = df.index.map(lambda x: x.end_time)
df


Line Item,Revenue,Expenses,Profit,Start Date,End Date
2018Q1,136267000,102640000,33627000,2017-02-01,2017-04-30
2018Q2,122690000,91707000,30983000,2017-05-01,2017-07-31
2018Q3,128028000,95571000,32457000,2017-08-01,2017-10-31
2018Q4,124894000,93116000,31778000,2017-11-01,2018-01-31


In [49]:
# Shuffle the columns of the DataFrame
df = df.reindex(columns = ['Start Date', 'End Date', 'Revenue', 'Expenses', 'Profit'])
df

Line Item,Start Date,End Date,Revenue,Expenses,Profit
2018Q1,2017-02-01,2017-04-30,136267000,102640000,33627000
2018Q2,2017-05-01,2017-07-31,122690000,91707000,30983000
2018Q3,2017-08-01,2017-10-31,128028000,95571000,32457000
2018Q4,2017-11-01,2018-01-31,124894000,93116000,31778000
