#  Time series advanced operations

**Learning Objectives:**
  * Gain an introduction to advanced time series operation using the *pandas* library

## Library import

The following line imports the *pandas* library

In [2]:
import pandas as pd


## Data loading and DataFrame creation


. The following example loads an external file data into a new `DataFrame`

In [3]:
PanelDataFrame = pd.read_csv('https://raw.githubusercontent.com/m-mehdi/pandas_tutorials/main/server_util.csv', parse_dates=['datetime'])



Let's display the first few records of the `DataFrame`:

In [4]:
PanelDataFrame.head()

Unnamed: 0,datetime,server_id,cpu_utilization,free_memory,session_count
0,2019-03-06 00:00:00,100,0.4,0.54,52
1,2019-03-06 01:00:00,100,0.49,0.51,58
2,2019-03-06 02:00:00,100,0.49,0.54,53
3,2019-03-06 03:00:00,100,0.44,0.56,49
4,2019-03-06 04:00:00,100,0.42,0.52,54


In [None]:
PanelDataFrame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40800 entries, 0 to 40799
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   datetime         40800 non-null  datetime64[ns]
 1   server_id        40800 non-null  int64         
 2   cpu_utilization  40800 non-null  float64       
 3   free_memory      40800 non-null  float64       
 4   session_count    40800 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(2)
memory usage: 1.6 MB


## Time series DataFrame Operations: Indexing and sorting

### It is advisable to use timestamps as indexes if you need to perform time-related operations

In [5]:
PanelDataFrame.set_index('datetime',inplace=True)

In [None]:
PanelDataFrame

### It is also advisable to have the index sorted

In [6]:
PanelDataFrame.sort_index(inplace=True)

In [8]:
PanelDataFrame.head(10)

Unnamed: 0_level_0,server_id,cpu_utilization,free_memory,session_count
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-06,100,0.4,0.54,52
2019-03-06,135,0.5,0.55,55
2019-03-06,110,0.54,0.4,61
2019-03-06,136,0.58,0.4,64
2019-03-06,109,0.57,0.41,61
2019-03-06,137,0.76,0.21,78
2019-03-06,108,0.76,0.28,82
2019-03-06,138,0.43,0.57,48
2019-03-06,107,0.69,0.36,73
2019-03-06,139,0.74,0.29,78


## Time series DataFrame Operations: Shifting

In [11]:
PanelDataFrame.shift(periods=3).head(10)

Unnamed: 0_level_0,server_id,cpu_utilization,free_memory,session_count
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-06,,,,
2019-03-06,,,,
2019-03-06,,,,
2019-03-06,100.0,0.4,0.54,52.0
2019-03-06,135.0,0.5,0.55,55.0
2019-03-06,110.0,0.54,0.4,61.0
2019-03-06,136.0,0.58,0.4,64.0
2019-03-06,109.0,0.57,0.41,61.0
2019-03-06,137.0,0.76,0.21,78.0
2019-03-06,108.0,0.76,0.28,82.0


In [12]:
PanelDataFrame.shift(periods=-3).head(10)

Unnamed: 0_level_0,server_id,cpu_utilization,free_memory,session_count
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-06,136.0,0.58,0.4,64.0
2019-03-06,109.0,0.57,0.41,61.0
2019-03-06,137.0,0.76,0.21,78.0
2019-03-06,108.0,0.76,0.28,82.0
2019-03-06,138.0,0.43,0.57,48.0
2019-03-06,107.0,0.69,0.36,73.0
2019-03-06,139.0,0.74,0.29,78.0
2019-03-06,140.0,0.66,0.36,74.0
2019-03-06,106.0,0.46,0.55,45.0
2019-03-06,141.0,0.53,0.39,56.0


In [None]:
PanelDataFrame.loc['2019-04-08']

Unnamed: 0_level_0,server_id,cpu_utilization,free_memory,session_count,month,day,datetimeInAlternativeFormat
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-04-08 00:00:00,106,0.44,0.62,49,4,8,04/08/2019
2019-04-08 00:00:00,112,0.72,0.29,81,4,8,04/08/2019
2019-04-08 00:00:00,100,0.43,0.54,51,4,8,04/08/2019
2019-04-08 00:00:00,137,0.75,0.28,83,4,8,04/08/2019
2019-04-08 00:00:00,110,0.61,0.40,62,4,8,04/08/2019
...,...,...,...,...,...,...,...
2019-04-08 23:00:00,128,0.64,0.41,64,4,8,04/08/2019
2019-04-08 23:00:00,127,0.67,0.33,78,4,8,04/08/2019
2019-04-08 23:00:00,126,0.71,0.33,73,4,8,04/08/2019
2019-04-08 23:00:00,123,0.71,0.22,83,4,8,04/08/2019


In [None]:
# Let's select a range of observations (a.k.a slicing)

PanelDataFrame.loc['2019-03-01':'2019-05-01']

Unnamed: 0_level_0,server_id,cpu_utilization,free_memory,session_count,month,day,datetimeInAlternativeFormat
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-03-06 00:00:00,100,0.40,0.54,52,3,6,03/06/2019
2019-03-06 00:00:00,135,0.50,0.55,55,3,6,03/06/2019
2019-03-06 00:00:00,110,0.54,0.40,61,3,6,03/06/2019
2019-03-06 00:00:00,136,0.58,0.40,64,3,6,03/06/2019
2019-03-06 00:00:00,109,0.57,0.41,61,3,6,03/06/2019
...,...,...,...,...,...,...,...
2019-04-08 23:00:00,128,0.64,0.41,64,4,8,04/08/2019
2019-04-08 23:00:00,127,0.67,0.33,78,4,8,04/08/2019
2019-04-08 23:00:00,126,0.71,0.33,73,4,8,04/08/2019
2019-04-08 23:00:00,123,0.71,0.22,83,4,8,04/08/2019


## Time series DataFrame Operations: Downsampling


In [14]:
PanelDataFrame.resample("W").mean()

Unnamed: 0_level_0,server_id,cpu_utilization,free_memory,session_count
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-10,124.5,0.620683,0.37865,69.1385
2019-03-17,124.5,0.620794,0.379404,69.149167
2019-03-24,124.5,0.620655,0.379669,69.196667
2019-03-31,124.5,0.620218,0.378476,69.0875
2019-04-07,124.5,0.620451,0.379381,69.120357
2019-04-14,124.5,0.620942,0.380175,69.0975


In [15]:
PanelDataFrame.resample("M").mean()

Unnamed: 0_level_0,server_id,cpu_utilization,free_memory,session_count
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-31,124.5,0.62058,0.37908,69.143301
2019-04-30,124.5,0.620513,0.37948,69.1175


In [16]:
PanelDataFrame.resample("W").max()

Unnamed: 0_level_0,server_id,cpu_utilization,free_memory,session_count
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-10,149,0.89,0.68,96
2019-03-17,149,0.88,0.68,95
2019-03-24,149,0.88,0.66,94
2019-03-31,149,0.91,0.66,93
2019-04-07,149,0.88,0.66,94
2019-04-14,149,0.89,0.66,92


In [17]:
PanelDataFrame.resample("M").sum()

Unnamed: 0_level_0,server_id,cpu_utilization,free_memory,session_count
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-31,3884400,19362.1,11827.31,2157271
2019-04-30,1195200,5956.92,3643.01,663528


In [26]:
DownSampledPanelDataFrame=PanelDataFrame.resample("W").mean()

In [28]:
DownSampledPanelDataFrame

Unnamed: 0_level_0,server_id,cpu_utilization,free_memory,session_count
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-10,124.5,0.620683,0.37865,69.1385
2019-03-17,124.5,0.620794,0.379404,69.149167
2019-03-24,124.5,0.620655,0.379669,69.196667
2019-03-31,124.5,0.620218,0.378476,69.0875
2019-04-07,124.5,0.620451,0.379381,69.120357
2019-04-14,124.5,0.620942,0.380175,69.0975


## Time series DataFrame Operations: Upsampling

In [31]:
DownSampledPanelDataFrame.resample('D').ffill()

Unnamed: 0_level_0,server_id,cpu_utilization,free_memory,session_count
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-03-10,124.5,0.620683,0.37865,69.1385
2019-03-11,124.5,0.620683,0.37865,69.1385
2019-03-12,124.5,0.620683,0.37865,69.1385
2019-03-13,124.5,0.620683,0.37865,69.1385
2019-03-14,124.5,0.620683,0.37865,69.1385
2019-03-15,124.5,0.620683,0.37865,69.1385
2019-03-16,124.5,0.620683,0.37865,69.1385
2019-03-17,124.5,0.620794,0.379404,69.149167
2019-03-18,124.5,0.620794,0.379404,69.149167
2019-03-19,124.5,0.620794,0.379404,69.149167
