In [2]:
import pandas as pd
import numpy as np

# Chapter 2. Finding and Wrangling Time Series Data

## Where to Find Time Series Data


### Prepared Data Sets

The UCI Machine Learning Repository : https://archive.ics.uci.edu/ml/index.php

The UEA and UCR Time Series Classification Repository: http://www.timeseriesclassification.com/

Government time series data sets: 

https://www.ncdc.noaa.gov/cdo-web/datasets

https://www.bls.gov/

https://fred.stlouisfed.org/

https://www.cdc.gov/flu/weekly/fluviewinteractive.htm

Additional:

https://www.comp-engine.org/

https://cran.r-project.org/web/packages/Mcomp/index.html

https://github.com/carlanetto/M4comp2018

### A Worked Example: Assembling a Time Series Data Collection

In [9]:
emails = pd.read_csv('BookRepo-master/Ch02/data/emails.csv')
YearJoined = pd.read_csv('BookRepo-master/Ch02/data/year_joined.csv')
donations =  pd.read_csv('BookRepo-master/Ch02/data/donations.csv')

In [15]:
YearJoined.head(1)

Unnamed: 0,user,userStats,yearJoined
0,0,silver,2014


In [16]:
## python
YearJoined.groupby('user').count().groupby('userStats').count()

Unnamed: 0_level_0,yearJoined
userStats,Unnamed: 1_level_1
1,1000


In [18]:
emails.head(1)

Unnamed: 0,emailsOpened,user,week
0,3.0,1.0,2015-06-29 00:00:00


In [20]:
## python
emails[emails.emailsOpened < 1]

Unnamed: 0,emailsOpened,user,week


In [21]:
emails[emails.user == 998]

Unnamed: 0,emailsOpened,user,week
25464,1.0,998.0,2017-12-04 00:00:00
25465,3.0,998.0,2017-12-11 00:00:00
25466,3.0,998.0,2017-12-18 00:00:00
25467,3.0,998.0,2018-01-01 00:00:00
25468,3.0,998.0,2018-01-08 00:00:00
25469,2.0,998.0,2018-01-15 00:00:00
25470,3.0,998.0,2018-01-22 00:00:00
25471,2.0,998.0,2018-01-29 00:00:00
25472,3.0,998.0,2018-02-05 00:00:00
25473,3.0,998.0,2018-02-12 00:00:00


In [30]:
(max(emails[emails.user == 998].week))

'2018-05-28 00:00:00'

In [35]:
min(emails[emails.user == 998].week)

'2017-12-04 00:00:00'

In [None]:
(max(emails[emails.member == 998].week) -
min(emails[emails.member == 998].week)).days/7

In [36]:
emails[emails.user == 998].shape

(24, 3)

In [37]:
complete_idx = pd.MultiIndex.from_product((set(emails.week),
set(emails.user)))

In [None]:
all_email = emails.set_index(['week', 'member']).
reindex(complete_idx, fill_value = 0).
reset_index()


In [None]:
all_email.columns = ['week', 'member', 'EmailsOpened']


In [None]:
## python
cutoff_dates = emails.groupby('member').week.
agg(['min', 'max']).reset_index)
cutoff_dates = cutoff_dates.reset_index()


In [None]:
## python
>>> for _, row in cutoff_dates.iterrows():
>>> member = row['member']
>>> start_date = row['min']
>>> end_date = row['max']
>>> all_email.drop(
all_email[all_email.member == member]
[all_email.week < start_date].index, inplace=True)
>>> all_email.drop(all_email[all_email.member == member]
[all_email.week > end_date].index, inplace=True)

In [None]:
## python
>>> donations.timestamp = pd.to_datetime(donations.timestamp)
>>> donations.set_index('timestamp', inplace = True)
>>> agg_don = donations.groupby('member').apply(
lambda df: df.amount.resample("W-MON").sum().dropna())

In [None]:
## python
>>> for member, member_email in all_email.groupby('member'):
>>> member_donations = agg_donations[agg_donations.member
== member]
>>> member_donations.set_index('timestamp', inplace = True)
>>> member_email.set_index ('week', inplace = True)

>>> member_email = all_email[all_email.member == member]
>>> member_email.sort_values('week').set_index('week')
>>> df = pd.merge(member_email, member_donations, how = 'left',
left_index = True,
right_index = True)
>>> df.fillna(0)
>>> df['member'] = df.member_x
>>> merged_df = merged_df.append(df.reset_index()
[['member', 'week', 'emailsOpened',
'amount']])

In [None]:
## python
>>> df = merged_df[merged_df.member == 998]
>>> df['target'] = df.amount.shift(1)
>>> df = df.fillna(0)
>>> df

In [None]:
## python
>>> df['dt'] = df.time - df.time.shift(-1)

## Cleaning Your Data

### Handling Missing Data

In [1]:
Missing data
Changing the frequency of a time series (that is, upsampling and
downsampling)
Smoothing data
Addressing seasonality in data
Preventing unintentional lookaheads

SyntaxError: invalid syntax (<ipython-input-1-761fdbb6bded>, line 1)

# a few specific ways to fill in numbers for those missing values:

Forward fill

Moving average

Interpolation

### LOKAHEAD

The term lookahead is used in time series analysis to denote any

knowledge of the future. You shouldn’t have such knowledge when

designing, training, or evaluating a model. A lookahead is a way, through

data, to find out something about the future earlier than you ought to

know it.

### Downsampling:

downsampling is as simple as selecting

out every nth element

### Upsampling:

Irregular time series

Inputs sampled at different frequencies

Knowledge of time series dynamics

### Smoothing Data

Exponential smoothing

In [3]:
import pandas as pd

In [18]:
Columns = ['Date','Passengers']
air = pd.read_csv('BookRepo-master\Ch02\data\AirPassengers.csv', header=None)
air.columns = Columns

In [19]:
air.head()

Unnamed: 0,Date,Passengers
0,1949-01,112
1,1949-02,118
2,1949-03,132
3,1949-04,129
4,1949-05,121


In [20]:
air['Smooth.5'] = pd.ewma(air, alpha = .5).Passengers
air['Smooth.9'] = pd.ewma(air, alpha = .9).Passengers

AttributeError: module 'pandas' has no attribute 'ewma'

In [34]:
air['Smooth.5'] = air.ewm(alpha = .5).mean().Passengers
air['Smooth.9'] = air.ewm(alpha = .9).mean().Passengers

In [36]:
air.head(11)

Unnamed: 0,Date,Passengers,Smooth.5,Smooth.9
0,1949-01,112,112.0,112.0
1,1949-02,118,116.0,117.454545
2,1949-03,132,125.142857,130.558559
3,1949-04,129,127.2,129.155716
4,1949-05,121,124.0,121.815498
5,1949-06,135,129.587302,133.681562
6,1949-07,148,138.866142,146.568157
7,1949-08,148,143.45098,147.856816
8,1949-09,136,139.7182,137.185682
9,1949-10,119,129.348974,120.818568


Kalman and LOESS incorporate data both earlier and later in

time, so if you use these methods keep in mind the leak of information

backward in time, as well as the fact that they are usually not appropriate for

preparing data to be used in forecasting applications.

# Seasonal Data

# Time Zones

In [44]:
import datetime
import pytz


In [39]:
datetime.datetime.utcnow()

datetime.datetime(2019, 11, 27, 14, 41, 59, 738794)

In [40]:
datetime.datetime.now()

datetime.datetime(2019, 11, 27, 11, 42, 11, 368460)

In [42]:
datetime.datetime.now(datetime.timezone.utc)

datetime.datetime(2019, 11, 27, 14, 42, 21, 714051, tzinfo=datetime.timezone.utc)

In [45]:
western = pytz.timezone('US/Pacific')
western.zone

'US/Pacific'

In [46]:
loc_dt = western.localize(datetime.datetime(2018, 5, 15, 12, 34, 0))

In [47]:
loc_dt

datetime.datetime(2018, 5, 15, 12, 34, tzinfo=<DstTzInfo 'US/Pacific' PDT-1 day, 17:00:00 DST>)

In [48]:
pytz.common_timezones

['Africa/Abidjan', 'Africa/Accra', 'Africa/Addis_Ababa', 'Africa/Algiers', 'Africa/Asmara', 'Africa/Bamako', 'Africa/Bangui', 'Africa/Banjul', 'Africa/Bissau', 'Africa/Blantyre', 'Africa/Brazzaville', 'Africa/Bujumbura', 'Africa/Cairo', 'Africa/Casablanca', 'Africa/Ceuta', 'Africa/Conakry', 'Africa/Dakar', 'Africa/Dar_es_Salaam', 'Africa/Djibouti', 'Africa/Douala', 'Africa/El_Aaiun', 'Africa/Freetown', 'Africa/Gaborone', 'Africa/Harare', 'Africa/Johannesburg', 'Africa/Juba', 'Africa/Kampala', 'Africa/Khartoum', 'Africa/Kigali', 'Africa/Kinshasa', 'Africa/Lagos', 'Africa/Libreville', 'Africa/Lome', 'Africa/Luanda', 'Africa/Lubumbashi', 'Africa/Lusaka', 'Africa/Malabo', 'Africa/Maputo', 'Africa/Maseru', 'Africa/Mbabane', 'Africa/Mogadishu', 'Africa/Monrovia', 'Africa/Nairobi', 'Africa/Ndjamena', 'Africa/Niamey', 'Africa/Nouakchott', 'Africa/Ouagadougou', 'Africa/Porto-Novo', 'Africa/Sao_Tome', 'Africa/Tripoli', 'Africa/Tunis', 'Africa/Windhoek', 'America/Adak', 'America/Anchorage', 'Amer

In [49]:
pytz.country_timezones('tr')

['Europe/Istanbul']

# Preventing Lookahead

In [51]:
"""
Intentionally introduce a lookahead and see how your model
behaves. Try various degrees of lookahead, so you have an idea how
it shifts accuracy. If you have some idea of the accuracy with
lookahead, you have an idea of what the ceiling on a real model
without unfair knowledge of the future will do. Remember that many
time series problems are extremely difficult, so a model with a
lookahead may seem great until you realize you are dealing with a
high-noise/low-signal data set.

Add features slowly, particularly features you might be processing,
so that you can look for jumps. One sign of a lookahead is when a
particular feature is unexpectedly good, and there isn’t a very good
explanation. At the top of your explanation list should always be
“lookahead.”
"""

'\nIntentionally introduce a lookahead and see how your model\nbehaves. Try various degrees of lookahead, so you have an idea how\nit shifts accuracy. If you have some idea of the accuracy with\nlookahead, you have an idea of what the ceiling on a real model\nwithout unfair knowledge of the future will do. Remember that many\ntime series problems are extremely difficult, so a model with a\nlookahead may seem great until you realize you are dealing with a\nhigh-noise/low-signal data set.\n\nAdd features slowly, particularly features you might be processing,\nso that you can look for jumps. One sign of a lookahead is when a\nparticular feature is unexpectedly good, and there isn’t a very good\nexplanation. At the top of your explanation list should always be\n“lookahead.”\n'

# Chapter 3. Exploratory Data Analysis for Time Series

## Familiar Methods

In [52]:
"""
You will want to address the
same exploratory questions you would ask about any new data set, such as:

Are any of the columns strongly correlated with one another?

What is the overall mean of an interesting variable? What is its
variance?

To answer these, you can use familiar techniques such as plotting, taking
summary statistics, applying histograms, and using targeted scatter plots.
"""

'\nYou will want to address the\nsame exploratory questions you would ask about any new data set, such as:\n\nAre any of the columns strongly correlated with one another?\n\nWhat is the overall mean of an interesting variable? What is its\nvariance?\n'

In [53]:
"""
What is the range of values you see, and do they vary by time period
or some other logical unit of analysis?

Does the data look consistent and uniformly measured, or does it
suggest changes in either measurement or behavior over time?

To answer these, you can use familiar techniques such as plotting, taking
summary statistics, applying histograms, and using targeted scatter plots.

"""

'\nWhat is the range of values you see, and do they vary by time period\nor some other logical unit of analysis?\n\nDoes the data look consistent and uniformly measured, or does it\nsuggest changes in either measurement or behavior over time?\n\n'

In [None]:
"""
In a time series context, a hist() of the difference of the data is often more
interesting than a hist() of the untransformed data
"""

## Stationary

In [None]:
"""
Tests for determining whether a process is stationary are called hypothesis
tests. The Augmented Dickey–Fuller (ADF) test is the most commonly used
metric to assess a time series for stationarity problems.

This test posits a null
hypothesis that a unit root is present in a time series
"""

In [2]:
"""the Kwiatkowski-PhillipsSchmidt-Shin (KPSS) test"""

'the Kwiatkowski-PhillipsSchmidt-Shin (KPSS) test'

In [None]:
"""A log transformation and a square root
transformation are two popular options, particularly in the case of changing
variance over time. """

In [None]:
USUALLY TWO ASSUMPTIONS FOR FORECAST MODELS, STATIONARITY AND NORMAL DISTRIBUTION

In [None]:
### Normality transformation

"""
A common one is the Box Cox transformation, which is
implemented in the R forecast package and in scipy.stats in Python
"""

In [None]:
""" 
Self-Correlation
"""

In [None]:
Spurious Correlations

https://www.tylervigen.com/spurious-correlations

In [None]:
COINTEGRATION

# 4. sımulation

In [4]:
import numpy as np
import pandas as pd

In [5]:
## membership status
years = ['2014', '2015', '2016', '2017', '2018']
memberStatus = ['bronze', 'silver', 'gold', 'inactive']
memberYears = np.random.choice(years, 1000,
p = [0.1, 0.1, 0.15, 0.30, 0.35])
memberStats = np.random.choice(memberStatus, 1000,
p = [0.5, 0.3, 0.1, 0.1])
yearJoined = pd.DataFrame({'yearJoined': memberYears,
'memberStats': memberStats})


In [8]:
## python
NUM_EMAILS_SENT_WEEKLY = 3
## we define several functions for different patterns
def never_opens(period_rng):
    return []
def constant_open_rate(period_rng):
    n, p = NUM_EMAILS_SENT_WEEKLY, np.random.uniform(0, 1)
    num_opened = np.random.binomial(n, p, len(period_rng))
    return num_opened

def increasing_open_rate(period_rng):
    return open_rate_with_factor_change(period_rng,
    np.random.uniform(1.01,
    1.30))
def decreasing_open_rate(period_rng):
    return open_rate_with_factor_change(period_rng,np.random.uniform(0.5,0.99))

def open_rate_with_factor_change(period_rng, fac):
    if len(period_rng) < 1 :
        return []
    times = np.random.randint(0, len(period_rng),
    int(0.1 * len(period_rng)))
    num_opened = np.zeros(len(period_rng))
    for prd in range(0, len(period_rng), 2):
        try:
            n, p = NUM_EMAILS_SENT_WEEKLY, np.random.uniform(0,1)
            num_opened[prd:(prd + 2)] = np.random.binomial(n, p,2)
            p = max(min(1, p * fac), 0)
        except:
            num_opened[prd] = np.random.binomial(n, p, 1)
    for t in range(len(times)):
            num_opened[times[t]] = 0
    return num_opened

In [None]:
## python
>>> ## donation behavior
>>> def produce_donations(period_rng, member_behavior, num_emails,
>>> use_id, member_join_year):
>>> donation_amounts = np.array([0, 25, 50, 75, 100, 250, 500,
>>> 1000, 1500, 2000])
>>> member_has = np.random.choice(donation_amounts)
>>> email_fraction = num_emails /
>>> (NUM_EMAILS_SENT_WEEKLY * len(period_rng))
>>> member_gives = member_has * email_fraction
>>> member_gives_idx = np.where(member_gives
>>> >= donation_amounts)[0][-1]
>>> member_gives_idx = max(min(member_gives_idx,
>>> len(donation_amounts) - 2),
>>> 1)
>>> num_times_gave = np.random.poisson(2) *
>>> (2018 - member_join_year)
>>> times = np.random.randint(0, len(period_rng), num_times_gave)
>>> dons = pd.DataFrame({'member' : [],
>>> 'amount' : [],
>>> 'timestamp': []})
>>> for n in range(num_times_gave):
>>> donation = donation_amounts[member_gives_idx
>>> + np.random.binomial(1, .3)]
>>> ts = str(period_rng[times[n]].start_time
>>> + random_weekly_time_delta())
>>> dons = dons.append(pd.DataFrame(
>>> {'member' : [use_id],
>>> 'amount' : [donation],
>>> 'timestamp': [ts]}))
>>>
>>> if dons.shape[0] > 0:
>>> dons = dons[dons.amount != 0]
>>> ## we don't report zero donation events as this would not
>>> ## be recorded in a real world database
>>>
>>> return dons


In [None]:
## python
>>> def random_weekly_time_delta():
>>> days_of_week = [d for d in range(7)]
>>> hours_of_day = [h for h in range(11, 23)]
>>> minute_of_hour = [m for m in range(60)]
>>> second_of_minute = [s for s in range(60)]
>>> return pd.Timedelta(str(np.random.choice(days_of_week))
>>> + " days" ) +
>>> pd.Timedelta(str(np.random.choice(hours_of_day))
>>> + " hours" ) +
>>> pd.Timedelta(str(np.random.choice(minute_of_hour))
>>> + " minutes") +
>>> pd.Timedelta(str(np.random.choice(second_of_minute))
>>> + " seconds")


In [None]:
## python
>>> behaviors = [never_opens,
>>> constant_open_rate,
>>> increasing_open_rate,
>>> decreasing_open_rate]
>>> member_behaviors = np.random.choice(behaviors, 1000,
>>> [0.2, 0.5, 0.1, 0.2])
>>> rng = pd.period_range('2015-02-14', '2018-06-01', freq = 'W')
>>> emails = pd.DataFrame({'member' : [],
>>> 'week' : [],
>>> 'emailsOpened': []})
>>> donations = pd.DataFrame({'member' : [],
>>> 'amount' : [],
>>> 'timestamp': []})
>>> for idx in range(yearJoined.shape[0]):
>>> ## randomly generate the date when a member would have joined
>>> join_date = pd.Timestamp(yearJoined.iloc[idx].yearJoined) +
>>> pd.Timedelta(str(np.random.randint(0, 365)) +
>>> ' days')
>>> join_date = min(join_date, pd.Timestamp('2018-06-01'))
>>>
>>> ## member should not have action timestamps before joining
>>> member_rng = rng[rng > join_date]
>>>
>>> if len(member_rng) < 1:
>>> continue
>>>
>>> info = member_behaviors[idx](member_rng)
>>> if len(info) == len(member_rng):
>>> emails = emails.append(pd.DataFrame(
>>> {'member': [idx] * len(info),
>>> 'week': [str(r.start_time) for r in member_rng],
>>> 'emailsOpened': info}))
>>> donations = donations.append(
>>> produce_donations(member_rng, member_behaviors[idx],
>>> sum(info), idx, join_date.year))

In [None]:
## python
>>> df.set_index(pd.to_datetime(df.timestamp), inplace = True)
>>> df.sort_index(inplace = True)
>>> df.groupby(pd.Grouper(freq='M')).amount.sum().plot()

## PAGE 159 -- Building a Simulation Universe That Runs Itself


In [None]:
## python
>>> import numpy as np
>>> def taxi_id_number(num_taxis):
>>> arr = np.arange(num_taxis)
>>> np.random.shuffle(arr)
>>> for i in range(num_taxis):
>>> yield arr[i]


In [None]:
## python
>>> ids = taxi_id_number(10)
>>> print(next(ids))
>>> print(next(ids))
>>> print(next(ids))

In [None]:
## python
>>> def shift_info():
>>> start_times_and_freqs = [(0, 8), (8, 30), (16, 15)]
>>> indices = np.arange(len(start_times_and_freqs))
>>> while True:
>>> idx = np.random.choice(indices, p = [0.25, 0.5, 0.25])
>>> start = start_times_and_freqs[idx]
>>> yield (start[0], start[0] + 7.5, start[1])

In [None]:
## python
>>> def taxi_process(taxi_id_generator, shift_info_generator):
>>> taxi_id = next(taxi_id_generator)
>>> shift_start, shift_end, shift_mean_trips =
>>> next(shift_info_generator)
>>> actual_trips = round(np.random.normal(loc = shift_mean_trips,
>>> scale = 2))
>>> average_trip_time = 6.5 / shift_mean_trips * 60
>>> # convert mean trip time to minutes
>>> between_events_time = 1.0 / (shift_mean_trips - 1) * 60
>>> # this is an efficient city where cabs are seldom unused
>>> time = shift_start
>>> yield TimePoint(taxi_id, 'start shift', time)
>>> deltaT = np.random.poisson(between_events_time) / 60
>>> time += deltaT
>>> for i in range(actual_trips):
>>> yield TimePoint(taxi_id, 'pick up ', time)
>>> deltaT = np.random.poisson(average_trip_time) / 60
>>> time += deltaT
>>> yield TimePoint(taxi_id, 'drop off ', time)
>>> deltaT = np.random.poisson(between_events_time) / 60
>>> time += deltaT
>>> deltaT = np.random.poisson(between_events_time) / 60
>>> time += deltaT
>>> yield TimePoint(taxi_id, 'end shift ', time)


In [None]:
## python
>>> from dataclasses import dataclass
>>> @dataclass
>>> class TimePoint:
>>> taxi_id: int
>>> name: str
>>> time: float
>>> def __lt__(self, other):
>>> return self.time < other.time


In [None]:
## python
>>> import queue
>>> class Simulator:
>>> def __init__(self, num_taxis):
>>> self._time_points = queue.PriorityQueue()
>>> taxi_id_generator = taxi_id_number(num_taxis)
>>> shift_info_generator = shift_info()
>>> self._taxis = [taxi_process(taxi_id_generator,
>>> shift_info_generator) for
>>> i in range(num_taxis)]
>>> self._prepare_run()
>>> def _prepare_run(self):
>>> for t in self._taxis:
>>> while True:
>>> try:
>>> e = next(t)
>>> self._time_points.put(e)
>>> except:
>>> break
>>> def run(self):
>>> sim_time = 0
>>> while sim_time < 24:
>>> if self._time_points.empty():
>>> break
>>> p = self._time_points.get()
>>> sim_time = p.time
>>> print(p)

In [None]:
## python
>>> sim = Simulator(1000)
>>> sim.run()


## 165 - A Physics Simulation


In [None]:
## python
>>> ### CONFIGURATION
>>> ## physical layout
>>> N = 5 # width of lattice
>>> M = 5 # height of lattice
>>> ## temperature settings
>>> temperature = 0.5
>>> BETA = 1 / temperature

In [None]:
>>> def initRandState(N, M):
>>> block = np.random.choice([-1, 1], size = (N, M))
>>> return block

In [None]:
## python
>>> def costForCenterState(state, i, j, n, m):
>>> centerS = state[i, j]
>>> neighbors = [((i + 1) % n, j), ((i - 1) % n, j),
>>> (i, (j + 1) % m), (i, (j - 1) % m)]
>>> ## notice the % n because we impose periodic boundary cond
>>> ## ignore this if it doesn't make sense - it's merely a
>>> ## physical constraint on the system saying 2D system is like
>>> ## the surface of a donut
>>> interactionE = [state[x, y] * centerS for (x, y) in neighbors]
>>> return np.sum(interactionE)

In [None]:
## python
>>> def magnetizationForState(state):
>>> return np.sum(state)

In [None]:
## python
>>> def mcmcAdjust(state):
>>> n = state.shape[0]
>>> m = state.shape[1]
>>> x, y = np.random.randint(0, n), np.random.randint(0, m)
>>> centerS = state[x, y]
>>> cost = costForCenterState(state, x, y, n, m)
>>> if cost < 0:
>>> centerS *= -1
>>> elif np.random.random() < np.exp(-cost * BETA):
>>> centerS *= -1
>>> state[x, y] = centerS
>>> return state


In [None]:
## python
>>> def runState(state, n_steps, snapsteps = None):
>>> if snapsteps is None:
>>> snapsteps = np.linspace(0, n_steps, num = round(n_steps / (M * N *
100)),
>>> dtype = np.int32)
>>> saved_states = []
>>> sp = 0
>>> magnet_hist = []
>>> for i in range(n_steps):
>>> state = mcmcAdjust(state)
>>> magnet_hist.append(magnetizationForState(state))
>>> if sp < len(snapsteps) and i == snapsteps[sp]:
>>> saved_states.append(np.copy(state))
>>> sp += 1
>>> return state, saved_states, magnet_hist


In [None]:
## python
>>> ### RUN A SIMULATION
>>> init_state = initRandState(N, M)
>>> print(init_state)
>>> final_state = runState(np.copy(init_state), 1000)

In [None]:
## python
>>> we collect each time series as a separate element in results list
>>> results = []
>>> for i in range(100):
>>> init_state = initRandState(N, M)
>>> final_state, states, magnet_hist = runState(init_state, 1000)
>>> results.append(magnet_hist)
>>>
>>> ## we plot each curve with some transparency so we can see
>>> ## curves that overlap one another
>>> for mh in results:
>>> plt.plot(mh,'r', alpha=0.2)

## Chapter 5. Storing Temporal Data


In [None]:
SQL, NOSQL, Flat File

In [None]:
Time series–specific databases and related monitoring tools

InfluxDB, Prometheus

# Chapter 6. Statistical Models for Time Series 

In [None]:
"""a linear regression assumes you have independently and identically distributed (iid) data"""

In [None]:
ACF

Ljung-Box test