# Time Series Machine Learning Part 1 Assignment

In [75]:
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.ensemble import AdaBoostRegressor as ABR
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

### Import the Netflix stock price data set (NFLX_data.csv).

In [76]:
NFLX = 'https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%205/NFLX_data.csv'
data = pd.read_csv(NFLX)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1259 entries, 0 to 1258
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    1259 non-null   object 
 1   open    1259 non-null   float64
 2   high    1259 non-null   float64
 3   low     1259 non-null   float64
 4   close   1259 non-null   float64
 5   volume  1259 non-null   int64  
 6   Name    1259 non-null   object 
dtypes: float64(4), int64(1), object(2)
memory usage: 69.0+ KB


In [77]:
data['date'] = pd.to_datetime(data['date'])
data = data[['date','close']] 
print(data.info())
data.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1259 entries, 0 to 1258
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    1259 non-null   datetime64[ns]
 1   close   1259 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 19.8 KB
None


Unnamed: 0,date,close
0,2013-02-08,25.8528
1,2013-02-11,25.4128
2,2013-02-12,25.4214
3,2013-02-13,26.6098
4,2013-02-14,26.7714


### Transform the data by shifting the series and creating features that will allow us to forecast the price 30 days into the future from 90 days of daily history.

In [78]:
history = 90
future = 30
shifts = [x + future for x in range(1, history + 1)]
for shift in shifts:
  data['t-'+ str(shift)] = data['close'].shift(shift)
data.dropna(inplace=True)

### Split the data into a training set and a testing set. Make the test set size 20%.

In [79]:
data.columns

Index(['date', 'close', 't-31', 't-32', 't-33', 't-34', 't-35', 't-36', 't-37',
       't-38', 't-39', 't-40', 't-41', 't-42', 't-43', 't-44', 't-45', 't-46',
       't-47', 't-48', 't-49', 't-50', 't-51', 't-52', 't-53', 't-54', 't-55',
       't-56', 't-57', 't-58', 't-59', 't-60', 't-61', 't-62', 't-63', 't-64',
       't-65', 't-66', 't-67', 't-68', 't-69', 't-70', 't-71', 't-72', 't-73',
       't-74', 't-75', 't-76', 't-77', 't-78', 't-79', 't-80', 't-81', 't-82',
       't-83', 't-84', 't-85', 't-86', 't-87', 't-88', 't-89', 't-90', 't-91',
       't-92', 't-93', 't-94', 't-95', 't-96', 't-97', 't-98', 't-99', 't-100',
       't-101', 't-102', 't-103', 't-104', 't-105', 't-106', 't-107', 't-108',
       't-109', 't-110', 't-111', 't-112', 't-113', 't-114', 't-115', 't-116',
       't-117', 't-118', 't-119', 't-120'],
      dtype='object')

In [80]:
x = data.drop(['date', 'close'], axis=1)
y = data['close']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.3, shuffle=False)

### Instantiate an AdaBoost model and fit it to the training set.

In [81]:
model = ABR()
model.fit(x_train, y_train)


AdaBoostRegressor(base_estimator=None, learning_rate=1.0, loss='linear',
                  n_estimators=50, random_state=None)

### Generate predictions for the test set.

In [82]:
predictions = model.predict(x_test)

### Evaluate the results using R-Squared, Mean Absolute Error, and Root Mean Squared Error metrics.

In [83]:
print('R-Squared:', r2_score(y_test, predictions))
print('Mean Absolute Error:', mean_absolute_error(y_test, predictions))
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, predictions)))

R-Squared: -2.7331307771891447
Mean Absolute Error: 57.95961635211553
Root Mean Squared Error: 67.80091319958004


### Visually examine the results by creating a scatter plot where the x axis represents the observed results and the y axis represents the predictions.

In [84]:
def iscatter(df, x, y, color=None, size=None, title=''):
    fig = px.scatter(df, x=x, y=y, color=color, size=size, 
                     title=title, template='none')
    
    fig.update_traces(marker_line_color='black', 
                  marker_line_width=1)

    fig.show()

In [85]:
iscatter(data, y_test, predictions, title='Close vs Predicted' )

In [86]:
#history = 90
#future = 31 

#past_shifts = [x for x in range(1, history + 1)]
#future_shifts = [x for x in range(1, future)]

#shifted = data.copy()[['date','close']]
#shifted.columns = ['date','t+0']

#for shift in past_shifts:
#  shifted[f't-{shift}'] = shifted['t+0'].shift(shift)

#for shift in future_shifts:
#  shifted['t+' + str(shift)] = shifted['t+0'].shift(-shift)
#shifted.head()
#shifted.dropna(inplace=True)