## Manejo del tiempo en Pandas

### Timestamps

In [1]:
import pandas as pd

In [2]:
pd.Timestamp('2017-09-01')

Timestamp('2017-09-01 00:00:00')

In [3]:
pd.Timestamp(2017,11,29,13,45)

Timestamp('2017-11-29 13:45:00')

In [4]:
pd.Timestamp('9/1/2017 10:05AM')

Timestamp('2017-09-01 10:05:00')

In [5]:
pd.to_datetime('20170901 100500', format='%Y%m%d %H%M%S')

Timestamp('2017-09-01 10:05:00')

### Period

In [6]:
pd.Period('1/2016')

Period('2016-01', 'M')

In [7]:
pd.Period('3/5/2016')

Period('2016-03-05', 'D')

### Datetimeindex

In [8]:
t1 = pd.Series(list('abc'), [pd.Timestamp('2016-09-01'), pd.Timestamp('2016-09-02'), pd.Timestamp('2016-09-03')])
t1

2016-09-01    a
2016-09-02    b
2016-09-03    c
dtype: object

In [9]:
type(t1.index)

pandas.core.indexes.datetimes.DatetimeIndex

### Convirtiendo a DateTime

In [10]:
import numpy as np
d1 = ['2 June 2013', 'Aug 29, 2014', '2015-06-26', '7/12/16']
ts3 = pd.DataFrame(np.random.randint(10, 100, (4,2)), index=d1, columns=list('ab'))
ts3

Unnamed: 0,a,b
2 June 2013,46,82
"Aug 29, 2014",67,74
2015-06-26,59,16
7/12/16,88,22


In [11]:
type(ts3.index)

pandas.core.indexes.base.Index

In [12]:
ts3.index = pd.to_datetime(ts3.index)
ts3.index, type(ts3.index)

(DatetimeIndex(['2013-06-02', '2014-08-29', '2015-06-26', '2016-07-12'], dtype='datetime64[ns]', freq=None),
 pandas.core.indexes.datetimes.DatetimeIndex)

### TimeDeltas

In [13]:
pd.Timestamp('9/3/2016')-pd.Timestamp('9/1/2016')

Timedelta('2 days 00:00:00')

In [14]:
pd.Period('5/2017') - pd.Period('3/2016')

14

In [15]:
pd.Timestamp('9/2/2016 8:10AM') + pd.Timedelta('12D')

Timestamp('2016-09-14 08:10:00')

### Fechas en DataFrames

In [16]:
dates = pd.date_range('2017-04-06', periods=4, freq='D')
dates

DatetimeIndex(['2017-04-06', '2017-04-07', '2017-04-08', '2017-04-09'], dtype='datetime64[ns]', freq='D')

In [17]:
dates = pd.date_range('2017-04-06', periods=4, freq='M')
dates

DatetimeIndex(['2017-04-30', '2017-05-31', '2017-06-30', '2017-07-31'], dtype='datetime64[ns]', freq='M')

In [18]:
dates = pd.date_range('2017-04-06', periods=4, freq='2W-THU')
dates

DatetimeIndex(['2017-04-06', '2017-04-20', '2017-05-04', '2017-05-18'], dtype='datetime64[ns]', freq='2W-THU')

In [19]:
df = pd.DataFrame({'Count 1': 100 + np.random.randint(-5, 10, 4).cumsum(),
                  'Count 2': 120 + np.random.randint(-5, 10, 4)}, index=dates)
df

Unnamed: 0,Count 1,Count 2
2017-04-06,104,127
2017-04-20,113,117
2017-05-04,115,125
2017-05-18,117,125


In [20]:
df['Count 1'].shift()
#baja uno los valores de una columna

2017-04-06      NaN
2017-04-20    104.0
2017-05-04    113.0
2017-05-18    115.0
Freq: 2W-THU, Name: Count 1, dtype: float64

In [21]:
df.diff()

Unnamed: 0,Count 1,Count 2
2017-04-06,,
2017-04-20,9.0,-10.0
2017-05-04,2.0,8.0
2017-05-18,2.0,0.0


In [22]:
df.index.weekday_name

Index(['Thursday', 'Thursday', 'Thursday', 'Thursday'], dtype='object')

In [23]:
df['2017-04']

Unnamed: 0,Count 1,Count 2
2017-04-06,104,127
2017-04-20,113,117


 ##  Trabajo con series de tiempo: calcular variaciones con shift()

Cuando se trabaja con series de tiempo, es muy habitual trabajar agregar al dataframe una variable que indique cuánto se modificó determinado valor desde la última medición.

In [24]:
#Creamos el dataframe
df= pd.DataFrame()
# Agregamos las series de datos
df['hora']=['10:30','10:31','10:38','10:40','10:41']
df['cantidad']=[20,20,9,12,12]
df

Unnamed: 0,hora,cantidad
0,10:30,20
1,10:31,20
2,10:38,9
3,10:40,12
4,10:41,12


In [25]:
df['cantidad_anterior'] = df['cantidad'].shift(1)
df

Unnamed: 0,hora,cantidad,cantidad_anterior
0,10:30,20,
1,10:31,20,20.0
2,10:38,9,20.0
3,10:40,12,9.0
4,10:41,12,12.0


In [26]:
df['variacion'] = df['cantidad'] - df['cantidad_anterior']

In [27]:
df

Unnamed: 0,hora,cantidad,cantidad_anterior,variacion
0,10:30,20,,
1,10:31,20,20.0,0.0
2,10:38,9,20.0,-11.0
3,10:40,12,9.0,3.0
4,10:41,12,12.0,0.0


### Variaciones con agrupamiento

Es muy habitual tener que calcular esto mismo pero por cada individuo o categoría que interesa estudiar 

In [28]:
df=pd.DataFrame()
df['operador']=['Q8','Q8','Q8','Q7','Q9','Q9']
df['hora']=['10:30','10:31','10:32','10:38','10:40','10:41']
df['cantidad']=[15,20,10,9,12,12]
df

Unnamed: 0,operador,hora,cantidad
0,Q8,10:30,15
1,Q8,10:31,20
2,Q8,10:32,10
3,Q7,10:38,9
4,Q9,10:40,12
5,Q9,10:41,12


In [29]:
df['cantidad_ant_operador'] = df.groupby(['operador'])['cantidad'].transform(lambda x:x.shift())

In [30]:
df

Unnamed: 0,operador,hora,cantidad,cantidad_ant_operador
0,Q8,10:30,15,
1,Q8,10:31,20,15.0
2,Q8,10:32,10,20.0
3,Q7,10:38,9,
4,Q9,10:40,12,
5,Q9,10:41,12,12.0


In [31]:
df['variacion_operador'] = df['cantidad'] - df['cantidad_ant_operador']
df

Unnamed: 0,operador,hora,cantidad,cantidad_ant_operador,variacion_operador
0,Q8,10:30,15,,
1,Q8,10:31,20,15.0,5.0
2,Q8,10:32,10,20.0,-10.0
3,Q7,10:38,9,,
4,Q9,10:40,12,,
5,Q9,10:41,12,12.0,0.0
