# Time Series Machine Learning Part 1 Assignment

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.ensemble import AdaBoostRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

### Import the Netflix stock price data set (NFLX_data.csv).

In [2]:
df = pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%205/NFLX_data.csv')

In [3]:
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1259 entries, 0 to 1258
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    1259 non-null   object 
 1   open    1259 non-null   float64
 2   high    1259 non-null   float64
 3   low     1259 non-null   float64
 4   close   1259 non-null   float64
 5   volume  1259 non-null   int64  
 6   Name    1259 non-null   object 
dtypes: float64(4), int64(1), object(2)
memory usage: 69.0+ KB


Unnamed: 0,date,open,high,low,close,volume,Name
0,2013-02-08,25.9635,26.28,25.7157,25.8528,25649820,NFLX
1,2013-02-11,25.5685,26.0071,24.9714,25.4128,29321782,NFLX
2,2013-02-12,25.8085,26.2228,25.1014,25.4214,34388802,NFLX
3,2013-02-13,25.8428,26.6285,25.6657,26.6098,40799094,NFLX
4,2013-02-14,26.7557,27.1214,26.3844,26.7714,31968685,NFLX


In [4]:
df['date'] = pd.to_datetime(df['date'])
df.columns = ['Date', 'Open', 'High', 'Low', 'Observed', 'Volume', 'Ticker']
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1259 entries, 0 to 1258
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Date      1259 non-null   datetime64[ns]
 1   Open      1259 non-null   float64       
 2   High      1259 non-null   float64       
 3   Low       1259 non-null   float64       
 4   Observed  1259 non-null   float64       
 5   Volume    1259 non-null   int64         
 6   Ticker    1259 non-null   object        
dtypes: datetime64[ns](1), float64(4), int64(1), object(1)
memory usage: 69.0+ KB


### Transform the data by shifting the series and creating features that will allow us to forecast the price 30 days into the future from 90 days of daily history.

In [5]:
history = 90
future = 30

df.set_index('Date', inplace=True)
shifted = df.copy()[['Observed']]
shifted.columns=['t+0']

past_shifts = [x for x in range(1, history+1)]
future_shifts = [x for x in range(1, future)]

In [6]:
past_shifts

[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90]

In [7]:
future_shifts

[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29]

In [8]:
shifted.head()

Unnamed: 0_level_0,t+0
Date,Unnamed: 1_level_1
2013-02-08,25.8528
2013-02-11,25.4128
2013-02-12,25.4214
2013-02-13,26.6098
2013-02-14,26.7714


In [9]:
for shift in past_shifts:
  shifted[f't-{shift}'] = shifted['t+0'].shift(shift)

In [10]:
for shift in future_shifts:
  shifted[f't+{shift}'] = shifted['t+0'].shift(-shift)

In [11]:
shifted.dropna(inplace=True)
shifted.head()

Unnamed: 0_level_0,t+0,t-1,t-2,t-3,t-4,t-5,t-6,t-7,t-8,t-9,t-10,t-11,t-12,t-13,t-14,t-15,t-16,t-17,t-18,t-19,t-20,t-21,t-22,t-23,t-24,t-25,t-26,t-27,t-28,t-29,t-30,t-31,t-32,t-33,t-34,t-35,t-36,t-37,t-38,t-39,...,t-80,t-81,t-82,t-83,t-84,t-85,t-86,t-87,t-88,t-89,t-90,t+1,t+2,t+3,t+4,t+5,t+6,t+7,t+8,t+9,t+10,t+11,t+12,t+13,t+14,t+15,t+16,t+17,t+18,t+19,t+20,t+21,t+22,t+23,t+24,t+25,t+26,t+27,t+28,t+29
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
2013-06-19,33.1871,32.69,32.7471,30.57,30.77,29.6628,30.6371,31.5614,31.46,31.1057,31.9228,32.1871,31.71,32.3214,31.8085,30.7628,30.5985,32.6771,32.3114,32.6514,33.87,34.2214,34.1428,33.8614,34.7714,33.4243,32.7684,31.0991,30.9157,29.8014,29.4643,30.0985,30.4928,30.6414,30.4157,30.8671,30.7161,30.7928,30.5357,30.96,...,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714,26.6098,25.4214,25.4128,25.8528,31.9314,30.9857,30.8,30.4143,30.3,30.71,30.1557,32.04,31.6371,31.5585,32.1571,33.3,35.3403,34.8314,34.8814,36.7515,36.8542,37.2114,38.2742,38.0585,37.7965,37.4228,35.7514,34.4714,35.2485,35.1871,34.9943,34.8228,34.9263
2013-06-20,31.9314,33.1871,32.69,32.7471,30.57,30.77,29.6628,30.6371,31.5614,31.46,31.1057,31.9228,32.1871,31.71,32.3214,31.8085,30.7628,30.5985,32.6771,32.3114,32.6514,33.87,34.2214,34.1428,33.8614,34.7714,33.4243,32.7684,31.0991,30.9157,29.8014,29.4643,30.0985,30.4928,30.6414,30.4157,30.8671,30.7161,30.7928,30.5357,...,26.2985,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714,26.6098,25.4214,25.4128,30.9857,30.8,30.4143,30.3,30.71,30.1557,32.04,31.6371,31.5585,32.1571,33.3,35.3403,34.8314,34.8814,36.7515,36.8542,37.2114,38.2742,38.0585,37.7965,37.4228,35.7514,34.4714,35.2485,35.1871,34.9943,34.8228,34.9263,35.5885
2013-06-21,30.9857,31.9314,33.1871,32.69,32.7471,30.57,30.77,29.6628,30.6371,31.5614,31.46,31.1057,31.9228,32.1871,31.71,32.3214,31.8085,30.7628,30.5985,32.6771,32.3114,32.6514,33.87,34.2214,34.1428,33.8614,34.7714,33.4243,32.7684,31.0991,30.9157,29.8014,29.4643,30.0985,30.4928,30.6414,30.4157,30.8671,30.7161,30.7928,...,26.3314,26.2985,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714,26.6098,25.4214,30.8,30.4143,30.3,30.71,30.1557,32.04,31.6371,31.5585,32.1571,33.3,35.3403,34.8314,34.8814,36.7515,36.8542,37.2114,38.2742,38.0585,37.7965,37.4228,35.7514,34.4714,35.2485,35.1871,34.9943,34.8228,34.9263,35.5885,35.1685
2013-06-24,30.8,30.9857,31.9314,33.1871,32.69,32.7471,30.57,30.77,29.6628,30.6371,31.5614,31.46,31.1057,31.9228,32.1871,31.71,32.3214,31.8085,30.7628,30.5985,32.6771,32.3114,32.6514,33.87,34.2214,34.1428,33.8614,34.7714,33.4243,32.7684,31.0991,30.9157,29.8014,29.4643,30.0985,30.4928,30.6414,30.4157,30.8671,30.7161,...,26.8685,26.3314,26.2985,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714,26.6098,30.4143,30.3,30.71,30.1557,32.04,31.6371,31.5585,32.1571,33.3,35.3403,34.8314,34.8814,36.7515,36.8542,37.2114,38.2742,38.0585,37.7965,37.4228,35.7514,34.4714,35.2485,35.1871,34.9943,34.8228,34.9263,35.5885,35.1685,36.2628
2013-06-25,30.4143,30.8,30.9857,31.9314,33.1871,32.69,32.7471,30.57,30.77,29.6628,30.6371,31.5614,31.46,31.1057,31.9228,32.1871,31.71,32.3214,31.8085,30.7628,30.5985,32.6771,32.3114,32.6514,33.87,34.2214,34.1428,33.8614,34.7714,33.4243,32.7684,31.0991,30.9157,29.8014,29.4643,30.0985,30.4928,30.6414,30.4157,30.8671,...,27.0528,26.8685,26.3314,26.2985,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714,30.3,30.71,30.1557,32.04,31.6371,31.5585,32.1571,33.3,35.3403,34.8314,34.8814,36.7515,36.8542,37.2114,38.2742,38.0585,37.7965,37.4228,35.7514,34.4714,35.2485,35.1871,34.9943,34.8228,34.9263,35.5885,35.1685,36.2628,36.5571


In [13]:
x = shifted[[f't-{shift}' for shift in past_shifts]]
x.head()

Unnamed: 0_level_0,t-1,t-2,t-3,t-4,t-5,t-6,t-7,t-8,t-9,t-10,t-11,t-12,t-13,t-14,t-15,t-16,t-17,t-18,t-19,t-20,t-21,t-22,t-23,t-24,t-25,t-26,t-27,t-28,t-29,t-30,t-31,t-32,t-33,t-34,t-35,t-36,t-37,t-38,t-39,t-40,...,t-51,t-52,t-53,t-54,t-55,t-56,t-57,t-58,t-59,t-60,t-61,t-62,t-63,t-64,t-65,t-66,t-67,t-68,t-69,t-70,t-71,t-72,t-73,t-74,t-75,t-76,t-77,t-78,t-79,t-80,t-81,t-82,t-83,t-84,t-85,t-86,t-87,t-88,t-89,t-90
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
2013-06-19,32.69,32.7471,30.57,30.77,29.6628,30.6371,31.5614,31.46,31.1057,31.9228,32.1871,31.71,32.3214,31.8085,30.7628,30.5985,32.6771,32.3114,32.6514,33.87,34.2214,34.1428,33.8614,34.7714,33.4243,32.7684,31.0991,30.9157,29.8014,29.4643,30.0985,30.4928,30.6414,30.4157,30.8671,30.7161,30.7928,30.5357,30.96,30.9985,...,23.2943,23.5228,23.8128,24.2485,25.2414,26.0614,27.04,27.1771,27.23,25.8271,25.9,25.9985,26.15,25.91,26.5128,26.4071,26.91,27.48,26.0157,25.7785,26.3857,25.9371,26.1343,25.9614,25.8871,27.0528,26.8685,26.3314,26.2985,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714,26.6098,25.4214,25.4128,25.8528
2013-06-20,33.1871,32.69,32.7471,30.57,30.77,29.6628,30.6371,31.5614,31.46,31.1057,31.9228,32.1871,31.71,32.3214,31.8085,30.7628,30.5985,32.6771,32.3114,32.6514,33.87,34.2214,34.1428,33.8614,34.7714,33.4243,32.7684,31.0991,30.9157,29.8014,29.4643,30.0985,30.4928,30.6414,30.4157,30.8671,30.7161,30.7928,30.5357,30.96,...,24.1943,23.2943,23.5228,23.8128,24.2485,25.2414,26.0614,27.04,27.1771,27.23,25.8271,25.9,25.9985,26.15,25.91,26.5128,26.4071,26.91,27.48,26.0157,25.7785,26.3857,25.9371,26.1343,25.9614,25.8871,27.0528,26.8685,26.3314,26.2985,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714,26.6098,25.4214,25.4128
2013-06-21,31.9314,33.1871,32.69,32.7471,30.57,30.77,29.6628,30.6371,31.5614,31.46,31.1057,31.9228,32.1871,31.71,32.3214,31.8085,30.7628,30.5985,32.6771,32.3114,32.6514,33.87,34.2214,34.1428,33.8614,34.7714,33.4243,32.7684,31.0991,30.9157,29.8014,29.4643,30.0985,30.4928,30.6414,30.4157,30.8671,30.7161,30.7928,30.5357,...,23.7243,24.1943,23.2943,23.5228,23.8128,24.2485,25.2414,26.0614,27.04,27.1771,27.23,25.8271,25.9,25.9985,26.15,25.91,26.5128,26.4071,26.91,27.48,26.0157,25.7785,26.3857,25.9371,26.1343,25.9614,25.8871,27.0528,26.8685,26.3314,26.2985,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714,26.6098,25.4214
2013-06-24,30.9857,31.9314,33.1871,32.69,32.7471,30.57,30.77,29.6628,30.6371,31.5614,31.46,31.1057,31.9228,32.1871,31.71,32.3214,31.8085,30.7628,30.5985,32.6771,32.3114,32.6514,33.87,34.2214,34.1428,33.8614,34.7714,33.4243,32.7684,31.0991,30.9157,29.8014,29.4643,30.0985,30.4928,30.6414,30.4157,30.8671,30.7161,30.7928,...,24.7157,23.7243,24.1943,23.2943,23.5228,23.8128,24.2485,25.2414,26.0614,27.04,27.1771,27.23,25.8271,25.9,25.9985,26.15,25.91,26.5128,26.4071,26.91,27.48,26.0157,25.7785,26.3857,25.9371,26.1343,25.9614,25.8871,27.0528,26.8685,26.3314,26.2985,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714,26.6098
2013-06-25,30.8,30.9857,31.9314,33.1871,32.69,32.7471,30.57,30.77,29.6628,30.6371,31.5614,31.46,31.1057,31.9228,32.1871,31.71,32.3214,31.8085,30.7628,30.5985,32.6771,32.3114,32.6514,33.87,34.2214,34.1428,33.8614,34.7714,33.4243,32.7684,31.0991,30.9157,29.8014,29.4643,30.0985,30.4928,30.6414,30.4157,30.8671,30.7161,...,24.7428,24.7157,23.7243,24.1943,23.2943,23.5228,23.8128,24.2485,25.2414,26.0614,27.04,27.1771,27.23,25.8271,25.9,25.9985,26.15,25.91,26.5128,26.4071,26.91,27.48,26.0157,25.7785,26.3857,25.9371,26.1343,25.9614,25.8871,27.0528,26.8685,26.3314,26.2985,25.6171,25.6943,26.7357,26.7314,28.0643,27.0731,26.7714


### Split the data into a training set and a testing set. Make the test set size 20%. Instantiate an AdaBoost model and fit it to the training set. Generate predictions for the test set. Evaluate the results using R-Squared, Mean Absolute Error, and Root Mean Squared Error metrics.

In [16]:
for future_shift in future_shifts:
  y = shifted[f't+{future_shift}']
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=False)

  model = AdaBoostRegressor()
  model.fit(x_train, y_train)

  predictions = model.predict(x_test)
  print(f'R^2 for t+{future_shift}:', r2_score(y_test, predictions), 
        f'MAE for t+{future_shift}:', mean_absolute_error(y_test, predictions),
        f'RMSE for t+{future_shift}:', np.sqrt(mean_squared_error(y_test, predictions)))

R^2 for t+1: -2.432642991807367 MAE for t+1: 31.407513575386115 RMSE for +1: 36.972929604552576
R^2 for t+2: -2.4698137359773047 MAE for t+2: 31.432297156684825 RMSE for +2: 37.12690699899242
R^2 for t+3: -3.4451462061288325 MAE for t+3: 37.037494618380144 RMSE for +3: 41.97024431667798
R^2 for t+4: -3.4972453208036844 MAE for t+4: 37.48774738353783 RMSE for +4: 42.285789744261024
R^2 for t+5: -3.0838173911957103 MAE for t+5: 35.57141511924491 RMSE for +5: 40.45089756774565
R^2 for t+6: -2.7899047342858636 MAE for t+6: 33.920417244391984 RMSE for +6: 39.12718557176378
R^2 for t+7: -2.622823934918431 MAE for t+7: 33.084189264649595 RMSE for +7: 38.46609964521835
R^2 for t+8: -2.210005086390399 MAE for t+8: 30.307694675284708 RMSE for +8: 36.43802096898648
R^2 for t+9: -3.1915710872755207 MAE for t+9: 36.456614594594704 RMSE for +9: 41.822854477458584
R^2 for t+10: -2.976678584772164 MAE for t+10: 35.50210542119214 RMSE for +10: 40.93444813697182
R^2 for t+11: -3.2130764191475505 MAE for

### Visually examine the results by creating a scatter plot where the x axis represents the observed results and the y axis represents the predictions.

In [17]:
def iscatter(df, x, y, color=None, size=None, title=''):
    fig = px.scatter(df, x=x, y=y, color=color, size=size, 
                     title=title, template='none')
    
    fig.update_traces(marker_line_color='black', 
                  marker_line_width=1)

    fig.show()

In [18]:
results = pd.DataFrame([list(y_test), predictions], index=['Observed', 'Predicted']).T
iscatter(results, 'Observed', 'Predicted', title='Observed vs. Predicted')