# Time Series Machine Learning Part 1 Assignment

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.ensemble import AdaBoostRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

### Import the Netflix stock price data set (NFLX_data.csv).

In [2]:
df = pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%205/NFLX_data.csv')

In [3]:
df

Unnamed: 0,date,open,high,low,close,volume,Name
0,2013-02-08,25.9635,26.2800,25.7157,25.8528,25649820,NFLX
1,2013-02-11,25.5685,26.0071,24.9714,25.4128,29321782,NFLX
2,2013-02-12,25.8085,26.2228,25.1014,25.4214,34388802,NFLX
3,2013-02-13,25.8428,26.6285,25.6657,26.6098,40799094,NFLX
4,2013-02-14,26.7557,27.1214,26.3844,26.7714,31968685,NFLX
...,...,...,...,...,...,...,...
1254,2018-02-01,266.4100,271.9500,263.3800,265.0700,9669011,NFLX
1255,2018-02-02,263.0000,270.6200,262.7100,267.4300,9123610,NFLX
1256,2018-02-05,262.0000,267.8990,250.0300,254.2600,11896053,NFLX
1257,2018-02-06,247.7000,266.7000,245.0000,265.7200,12595801,NFLX


### Transform the data by shifting the series and creating features that will allow us to forecast the price 30 days into the future from 90 days of daily history.

In [4]:
df = df[['date','close']]

In [5]:
history = 90
future = 30

shifts = [x + future for x in range(1, history+1)]
for shift in shifts:
  df[f't-{shift}'] = df['close'].shift(shift)
df = df.dropna()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



### Split the data into a training set and a testing set. Make the test set size 20%.

In [6]:
df.set_index('date', inplace=True)
x = df.drop(columns=['close'])
y = df['close']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=False)

### Instantiate an AdaBoost model and fit it to the training set.

In [7]:
model = AdaBoostRegressor()
model.fit(x_train,y_train)

AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
                  n_estimators=50, random_state=None)

### Generate predictions for the test set.

In [9]:
predictions = model.predict(x_test)
predictions

array([134.72860465, 134.94140496, 134.72860465, 134.72860465,
       134.72860465, 134.72860465, 137.0376    , 137.0376    ,
       134.94140496, 134.72860465, 120.2002947 , 124.7855303 ,
       124.7855303 , 124.7855303 , 129.73446512, 134.72860465,
       134.72860465, 129.73446512, 134.72860465, 114.39176471,
       114.39176471, 114.39176471, 114.39176471, 114.39176471,
       114.39176471, 114.39176471, 114.39176471, 114.39176471,
       114.39176471, 114.39176471, 116.19791667, 114.39176471,
       116.19791667, 116.19791667, 116.19791667, 116.19791667,
       116.74944444, 116.74944444, 116.74944444, 116.74944444,
       116.74944444, 116.74944444, 116.74944444, 116.74944444,
       116.74944444, 116.74944444, 116.74944444, 116.74944444,
       116.74944444, 117.68      , 116.74944444, 116.74944444,
       117.68      , 116.74944444, 116.74944444, 116.74944444,
       117.68      , 117.68      , 117.68      , 117.68      ,
       117.68      , 117.68      , 117.68      , 117.68

### Evaluate the results using R-Squared, Mean Absolute Error, and Root Mean Squared Error metrics.

In [7]:
print('R-Squared', r2_score(y_test, predictions))
print('Mean Absolute Error:', mean_absolute_error(y_test, predictions))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, predictions)))

### Visually examine the results by creating a scatter plot where the x axis represents the observed results and the y axis represents the predictions.

In [11]:
def iscatter(df, x, y, color=None, size=None, title=''):
    fig = px.scatter(df, x=x, y=y, color=color, size=size, 
                     title=title, template='none')
    
    fig.update_traces(marker_line_color='black', 
                  marker_line_width=1)

    fig.show()
    
results = pd.DataFrame([list(y_test), predictions], index=['close', 'Predicted']).T
iscatter(results, 'close', 'Predicted', title='Observed vs. Predicted')