# Creating time series using the Poisson distribution to simulate hospital admissions
https://de.wikipedia.org/wiki/Poisson-Verteilung

In [1]:
import pandas as pd

import numpy as np

In [2]:
# Creating a time series index: 72 hours starting with midnight January 1st 2018
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.date_range.html

rng = pd.date_range(start='1/1/2018', periods=72, freq='H')

In [3]:
rng

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
               '2018-01-01 02:00:00', '2018-01-01 03:00:00',
               '2018-01-01 04:00:00', '2018-01-01 05:00:00',
               '2018-01-01 06:00:00', '2018-01-01 07:00:00',
               '2018-01-01 08:00:00', '2018-01-01 09:00:00',
               '2018-01-01 10:00:00', '2018-01-01 11:00:00',
               '2018-01-01 12:00:00', '2018-01-01 13:00:00',
               '2018-01-01 14:00:00', '2018-01-01 15:00:00',
               '2018-01-01 16:00:00', '2018-01-01 17:00:00',
               '2018-01-01 18:00:00', '2018-01-01 19:00:00',
               '2018-01-01 20:00:00', '2018-01-01 21:00:00',
               '2018-01-01 22:00:00', '2018-01-01 23:00:00',
               '2018-01-02 00:00:00', '2018-01-02 01:00:00',
               '2018-01-02 02:00:00', '2018-01-02 03:00:00',
               '2018-01-02 04:00:00', '2018-01-02 05:00:00',
               '2018-01-02 06:00:00', '2018-01-02 07:00:00',
               '2018-01-

In [4]:
# Create a time series using randomly created numbers from numpy.random with the time created above as index
# len(rng) means that number of random numbers matches length of rng = 72 records
ts = pd.DataFrame(np.random.randn(len(rng)), index=rng, columns=['Admissions'])

In [5]:
ts

Unnamed: 0,Admissions
2018-01-01 00:00:00,2.543270
2018-01-01 01:00:00,-0.372079
2018-01-01 02:00:00,0.676628
2018-01-01 03:00:00,-0.402541
2018-01-01 04:00:00,0.745556
...,...
2018-01-03 19:00:00,-0.249617
2018-01-03 20:00:00,1.485040
2018-01-03 21:00:00,-0.121483
2018-01-03 22:00:00,1.090969


In [6]:
# Using numpy.random to create Poisson distribution
# https://numpy.org/doc/stable/reference/random/generated/numpy.random.poisson.html
# First number: lambda = expected number of events on average, second number: Size = number of instances

np.random.poisson(5,10)

array([ 3, 11,  5,  3,  5,  3,  8,  9,  8,  2])

In [7]:
# Update the time series with poisson 

ts = pd.DataFrame(np.random.poisson(10, len(rng)), index=rng, columns=['Admissions'])

In [8]:
ts

Unnamed: 0,Admissions
2018-01-01 00:00:00,9
2018-01-01 01:00:00,14
2018-01-01 02:00:00,10
2018-01-01 03:00:00,5
2018-01-01 04:00:00,11
...,...
2018-01-03 19:00:00,14
2018-01-03 20:00:00,14
2018-01-03 21:00:00,16
2018-01-03 22:00:00,6


In [9]:
# Calculate average
ts.mean()

Admissions    10.194444
dtype: float64

In [14]:
# To select one time stamp
ts.loc['2018-01-01 02:00:00']

Admissions    10
Name: 2018-01-01 02:00:00, dtype: int32

In [15]:
# To selecct a range:
ts.loc['2018-01-02 00:00:00':'2018-01-02 23:00:00']

Unnamed: 0,Admissions
2018-01-02 00:00:00,8
2018-01-02 01:00:00,9
2018-01-02 02:00:00,8
2018-01-02 03:00:00,12
2018-01-02 04:00:00,6
2018-01-02 05:00:00,14
2018-01-02 06:00:00,8
2018-01-02 07:00:00,8
2018-01-02 08:00:00,6
2018-01-02 09:00:00,7


In [17]:
# Average of selected range
ts.loc['2018-01-02 00:00:00':'2018-01-02 23:00:00'].mean()

Admissions    8.708333
dtype: float64

In [20]:
# Grouping the data set by day, mean of the aggregation
ts.resample('D').mean()

Unnamed: 0,Admissions
2018-01-01,10.5
2018-01-02,8.708333
2018-01-03,11.375
