# Time Series Basics

## Importing Time Series Data from csv-Files

In [1]:
import pandas as pd

In [16]:
temp = pd.read_csv("temp.csv", parse_dates = ["datetime"], index_col= "datetime")

In [17]:
temp.head()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,11.7,-1.1
2013-01-01 01:00:00,10.7,-1.7
2013-01-01 02:00:00,9.9,-2.0
2013-01-01 03:00:00,9.3,-2.1
2013-01-01 04:00:00,8.8,-2.3


In [8]:
temp.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35064 entries, 2013-01-01 00:00:00 to 2016-12-31 23:00:00
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LA      35062 non-null  float64
 1   NY      35064 non-null  float64
dtypes: float64(2)
memory usage: 821.8 KB


In [15]:
temp.iloc[0, 0]

11.7

In [6]:
temp.index

DatetimeIndex(['2013-01-01 00:00:00', '2013-01-01 01:00:00',
               '2013-01-01 02:00:00', '2013-01-01 03:00:00',
               '2013-01-01 04:00:00', '2013-01-01 05:00:00',
               '2013-01-01 06:00:00', '2013-01-01 07:00:00',
               '2013-01-01 08:00:00', '2013-01-01 09:00:00',
               ...
               '2016-12-31 14:00:00', '2016-12-31 15:00:00',
               '2016-12-31 16:00:00', '2016-12-31 17:00:00',
               '2016-12-31 18:00:00', '2016-12-31 19:00:00',
               '2016-12-31 20:00:00', '2016-12-31 21:00:00',
               '2016-12-31 22:00:00', '2016-12-31 23:00:00'],
              dtype='datetime64[ns]', name='datetime', length=35064, freq=None)

In [7]:
temp.index[0]

Timestamp('2013-01-01 00:00:00')

## Converting strings to datetime objects with pd.to_datetime()

In [18]:
import pandas as pd

In [22]:
temp = pd.read_csv("temp.csv")

In [23]:
temp.head()

Unnamed: 0,datetime,LA,NY
0,2013-01-01 00:00:00,11.7,-1.1
1,2013-01-01 01:00:00,10.7,-1.7
2,2013-01-01 02:00:00,9.9,-2.0
3,2013-01-01 03:00:00,9.3,-2.1
4,2013-01-01 04:00:00,8.8,-2.3


In [24]:
temp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35064 entries, 0 to 35063
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   datetime  35064 non-null  object 
 1   LA        35062 non-null  float64
 2   NY        35064 non-null  float64
dtypes: float64(2), object(1)
memory usage: 821.9+ KB


In [25]:
temp.datetime[0]

'2013-01-01 00:00:00'

In [26]:
pd.to_datetime(temp.datetime)

0       2013-01-01 00:00:00
1       2013-01-01 01:00:00
2       2013-01-01 02:00:00
3       2013-01-01 03:00:00
4       2013-01-01 04:00:00
                ...        
35059   2016-12-31 19:00:00
35060   2016-12-31 20:00:00
35061   2016-12-31 21:00:00
35062   2016-12-31 22:00:00
35063   2016-12-31 23:00:00
Name: datetime, Length: 35064, dtype: datetime64[ns]

In [27]:
temp = temp.set_index(pd.to_datetime(temp.datetime)).drop("datetime", axis = 1)

In [28]:
temp.head()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,11.7,-1.1
2013-01-01 01:00:00,10.7,-1.7
2013-01-01 02:00:00,9.9,-2.0
2013-01-01 03:00:00,9.3,-2.1
2013-01-01 04:00:00,8.8,-2.3


In [29]:
temp.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35064 entries, 2013-01-01 00:00:00 to 2016-12-31 23:00:00
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LA      35062 non-null  float64
 1   NY      35064 non-null  float64
dtypes: float64(2)
memory usage: 821.8 KB


In [30]:
temp.index[0]

Timestamp('2013-01-01 00:00:00')

In [31]:
pd.to_datetime("2015-05-20 10:30:20")

Timestamp('2015-05-20 10:30:20')

In [32]:
pd.to_datetime("20150520")

Timestamp('2015-05-20 00:00:00')

In [33]:
pd.to_datetime("2015/05/20")

Timestamp('2015-05-20 00:00:00')

In [34]:
pd.to_datetime("2015 05 20")

Timestamp('2015-05-20 00:00:00')

In [35]:
#pd.to_datetime("2015-20-05")

In [36]:
pd.to_datetime("2015 May 20")

Timestamp('2015-05-20 00:00:00')

In [37]:
pd.to_datetime("May 2015 20")

Timestamp('2015-05-20 00:00:00')

In [38]:
pd.to_datetime("2015 20th may")

Timestamp('2015-05-20 00:00:00')

In [39]:
pd.to_datetime(["2015-05-20", "Feb 20 2015"])

DatetimeIndex(['2015-05-20', '2015-02-20'], dtype='datetime64[ns]', freq=None)

In [40]:
pd.to_datetime(["2015-05-20", "Feb 20 2015", "Elephant"], errors="coerce")

DatetimeIndex(['2015-05-20', '2015-02-20', 'NaT'], dtype='datetime64[ns]', freq=None)

## Indexing and Slicing Time Series

In [41]:
import pandas as pd

In [42]:
temp = pd.read_csv("temp.csv", parse_dates= ["datetime"], index_col= "datetime")

In [43]:
temp.head()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,11.7,-1.1
2013-01-01 01:00:00,10.7,-1.7
2013-01-01 02:00:00,9.9,-2.0
2013-01-01 03:00:00,9.3,-2.1
2013-01-01 04:00:00,8.8,-2.3


In [44]:
temp.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35064 entries, 2013-01-01 00:00:00 to 2016-12-31 23:00:00
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LA      35062 non-null  float64
 1   NY      35064 non-null  float64
dtypes: float64(2)
memory usage: 821.8 KB


In [45]:
temp.loc["2013-01-01 01:00:00"]

LA    10.7
NY    -1.7
Name: 2013-01-01 01:00:00, dtype: float64

In [46]:
temp.loc["2015"]

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-01 00:00:00,3.8,-5.1
2015-01-01 01:00:00,4.4,-5.1
2015-01-01 02:00:00,3.2,-6.0
2015-01-01 03:00:00,1.2,-6.0
2015-01-01 04:00:00,0.2,-6.0
...,...,...
2015-12-31 19:00:00,16.0,8.1
2015-12-31 20:00:00,16.0,8.1
2015-12-31 21:00:00,16.4,7.9
2015-12-31 22:00:00,16.6,7.2


In [48]:
temp.loc["2015-05"]

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-05-01 00:00:00,25.5,13.9
2015-05-01 01:00:00,25.7,13.9
2015-05-01 02:00:00,23.8,10.5
2015-05-01 03:00:00,22.0,10.2
2015-05-01 04:00:00,20.1,8.6
...,...,...
2015-05-31 19:00:00,25.4,25.5
2015-05-31 20:00:00,26.0,23.9
2015-05-31 21:00:00,24.9,22.5
2015-05-31 22:00:00,26.0,21.3


In [49]:
temp.loc["2015-05-20"].shape

(24, 2)

In [50]:
temp.loc["2015-05-20 10:00:00"]

LA     7.8
NY    13.3
Name: 2015-05-20 10:00:00, dtype: float64

In [51]:
#temp.loc["2015-05-20 10:30:00"]

In [52]:
temp.loc["2015-01-01" : "2015-12-31"]

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-01 00:00:00,3.8,-5.1
2015-01-01 01:00:00,4.4,-5.1
2015-01-01 02:00:00,3.2,-6.0
2015-01-01 03:00:00,1.2,-6.0
2015-01-01 04:00:00,0.2,-6.0
...,...,...
2015-12-31 19:00:00,16.0,8.1
2015-12-31 20:00:00,16.0,8.1
2015-12-31 21:00:00,16.4,7.9
2015-12-31 22:00:00,16.6,7.2


In [53]:
temp.loc["2015-01-01" : "2015-12-31"].equals(temp.loc["2015"])

True

In [54]:
temp.loc["2015-04-15" : "2016-02-23"]

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-04-15 00:00:00,19.1,12.8
2015-04-15 01:00:00,20.0,12.0
2015-04-15 02:00:00,17.7,11.3
2015-04-15 03:00:00,15.5,11.2
2015-04-15 04:00:00,13.3,10.4
...,...,...
2016-02-23 19:00:00,23.4,2.6
2016-02-23 20:00:00,23.4,2.2
2016-02-23 21:00:00,26.8,2.2
2016-02-23 22:00:00,26.8,2.0


In [55]:
temp.loc["2015-05-20":]

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-05-20 00:00:00,17.7,19.8
2015-05-20 01:00:00,18.0,19.7
2015-05-20 02:00:00,16.6,19.0
2015-05-20 03:00:00,14.4,19.0
2015-05-20 04:00:00,13.3,19.7
...,...,...
2016-12-31 19:00:00,13.5,4.6
2016-12-31 20:00:00,13.2,5.7
2016-12-31 21:00:00,12.8,5.8
2016-12-31 22:00:00,12.3,5.7


In [56]:
temp.loc[:"2015-05-20"]

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,11.7,-1.1
2013-01-01 01:00:00,10.7,-1.7
2013-01-01 02:00:00,9.9,-2.0
2013-01-01 03:00:00,9.3,-2.1
2013-01-01 04:00:00,8.8,-2.3
...,...,...
2015-05-20 19:00:00,17.7,18.1
2015-05-20 20:00:00,18.4,17.8
2015-05-20 21:00:00,18.0,17.8
2015-05-20 22:00:00,19.1,14.2


In [None]:
temp.loc["20FEBRUARY2015"]

In [None]:
#temp.loc[["2015-05-20 10:00:00", "2015-05-20 12:00:00"]]

In [None]:
two_timestamps = pd.to_datetime(["2015-05-20 10:00:00", "2015-05-20 12:00:00"])
two_timestamps

In [None]:
temp.loc[two_timestamps]

## Downsampling Time Series with resample()

In [57]:
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use("seaborn")

In [58]:
temp = pd.read_csv("temp.csv", parse_dates= ["datetime"], index_col = "datetime")

In [59]:
temp.head()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,11.7,-1.1
2013-01-01 01:00:00,10.7,-1.7
2013-01-01 02:00:00,9.9,-2.0
2013-01-01 03:00:00,9.3,-2.1
2013-01-01 04:00:00,8.8,-2.3


In [60]:
temp.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35064 entries, 2013-01-01 00:00:00 to 2016-12-31 23:00:00
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LA      35062 non-null  float64
 1   NY      35064 non-null  float64
dtypes: float64(2)
memory usage: 821.8 KB


In [62]:
list(temp.resample("D"))[1][1]

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-02 00:00:00,13.2,2.6
2013-01-02 01:00:00,11.8,2.7
2013-01-02 02:00:00,10.5,2.9
2013-01-02 03:00:00,9.5,2.9
2013-01-02 04:00:00,8.3,2.9
2013-01-02 05:00:00,8.0,3.5
2013-01-02 06:00:00,7.5,3.7
2013-01-02 07:00:00,7.1,3.5
2013-01-02 08:00:00,6.4,3.7
2013-01-02 09:00:00,6.0,3.6


In [63]:
temp.head(25)

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,11.7,-1.1
2013-01-01 01:00:00,10.7,-1.7
2013-01-01 02:00:00,9.9,-2.0
2013-01-01 03:00:00,9.3,-2.1
2013-01-01 04:00:00,8.8,-2.3
2013-01-01 05:00:00,8.7,-2.5
2013-01-01 06:00:00,6.9,-3.2
2013-01-01 07:00:00,7.8,-3.4
2013-01-01 08:00:00,6.7,-3.0
2013-01-01 09:00:00,6.6,-1.8


In [64]:
temp.resample("D").sum()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01,212.6,-9.7
2013-01-02,222.8,77.0
2013-01-03,247.3,-58.2
2013-01-04,276.3,-49.7
2013-01-05,266.0,19.6
...,...,...
2016-12-27,291.7,253.9
2016-12-28,346.4,96.4
2016-12-29,385.1,31.5
2016-12-30,382.4,52.9


In [65]:
temp.resample("2H").first()

Unnamed: 0_level_0,LA,NY
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-01-01 00:00:00,11.7,-1.1
2013-01-01 02:00:00,9.9,-2.0
2013-01-01 04:00:00,8.8,-2.3
2013-01-01 06:00:00,6.9,-3.2
2013-01-01 08:00:00,6.7,-3.0
...,...,...
2016-12-31 14:00:00,12.7,-1.3
2016-12-31 16:00:00,12.6,1.1
2016-12-31 18:00:00,13.2,3.4
2016-12-31 20:00:00,13.2,5.7


In [None]:
temp.resample("W").mean()

In [None]:
temp.resample("W-Wed").mean()

In [None]:
temp.resample("M").mean()

In [None]:
temp.resample("MS").mean()

In [None]:
temp.resample("MS", loffset="14D").mean()

In [None]:
temp.resample("Q").mean()

In [None]:
temp.resample("Q-Feb").mean()

In [None]:
temp.resample("Y").mean()

In [None]:
temp.resample("YS").mean()