[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/lazysasha/assets-price-prediction/blob/main/workshop/basic_exercises.ipynb)

In [None]:
import tensorflow as tf

# Make sure we run on Tensowflow 2.0:
print(tf.__version__)

# Read and prepare dataset
First, we start with defining helper functions to plot time series and reading data from csv:

In [None]:
import csv
from datetime import datetime, timedelta
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras

def plot_series(time, series, format="-", start=0, end=None, title="Asset price history", legend=[]):
    plt.plot(time[start:end], series[start:end], format)
    plt.title(title)
    plt.xlabel("Time")
    plt.ylabel("Value")
    plt.legend(legend)
    plt.grid(True)

def read_data(filepath='sample_data/BTC-USD.csv'):
    DATE_FORMAT = "%Y-%m-%d"
    time_step = []
    prices = []
    with open(filepath) as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        next(reader) # skip header
        for row in reader:
            # Exercise 1: Parse csv file with your data
            ### START CODE HERE ### (≈ 3 lines of code)
            if YOUR_CODE_HERE: # TODO: make sure we filter out the values that contain a 'null' string in 'Close' column (Yahoo Finance specifics).
                time_step.append(YOUR_CODE_HERE)    # TODO: convert a string value from `Date` cell into a date object and append it to `time_step` array.
                                                    # Hint: use datetime.strptime() to parse a string into a datetime using a custom format: https://www.programiz.com/python-programming/datetime/strptime
                                                    # Hint: Use date() function to extract only a date part from the datetime object.
                prices.append(YOUR_CODE_HERE)       # TODO: append a 'Close' price to the `prices` array. Hint: make sure it is converted to float.
            ### END CODE HERE ###

    ### START CODE HERE ### (≈ 1 line of code)
    return # TODO: return a pair of `time_step` and `prices` arrays, each of them is wrapped into a numpy array. See https://numpy.org/doc/stable/user/basics.creation.html for reference
    ### END CODE HERE ###

### START CODE HERE ### (≈ 1 line of code)
time, series = # TODO: call read_data() function with the path to your dataset as an argument
### END CODE HERE ###

plt.figure(figsize=(10, 6))
plot_series(time, series)
plt.show()

# When implemented, you should see a price chart of your asset

Now that we have the time series loaded in, let's split it into a training and validation set, so we can start forecasting:

In [None]:
# Exercise 1: Split dataset into training and validation sets
### START CODE HERE ### (≈ 5 lines of code)
split_time =    # TODO: calculate the split index for 80% train/validation ratio of `time` array.
                # For example: if there are 100 elements in 'time' array, the `split_time` should be set to 80.
                # Hint: Use len(my_arr) function to get the length of an array. Make sure the result is converted to int.
time_train =    # TODO: extract a subarray with first `split_time` elements from `time` array.
                # Hint: Checkout numpy array slice notation: https://stackoverflow.com/questions/509211/understanding-slice-notation
x_train =       # TODO: extract a subarray with first `split_time` elements from `series` array.
time_valid =    # TODO: extract a subarray with the remaining elements after `split_time` from `time` array.
x_valid =       # TODO: extract a subarray with the remaining elements after `split_time` from `series` array.
### END CODE HERE ###

# Plot training data:
plt.figure(figsize=(10, 6))
plot_series(time_train, x_train, title="Training set")
plt.show()

# Plot validation data
plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid, title="Validation set")
plt.show()

# Naive Forecast

In [None]:
# Exercise 2: Implement Naive forecast
# calculate the forecast based on the previous day
### START CODE HERE ### (≈ 1 line of code)
naive_forecast =    # TODO: 'calculate' the naive forecast based on a `series` array
                    # Hint: It can be done just by slicing the `series` array from `split_time-1` until the end
### END CODE HERE ###

# Plot naive forecast vs. Actual data
plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid)
plot_series(time_valid, naive_forecast, title="Naive prediction", legend=["Actual", "Forecast"])

Let's zoom in on the start of the validation period:

In [None]:
plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid, start=0, end=50)
plot_series(time_valid, naive_forecast, start=1, end=51, title="Naive prediction (zoomed in)", legend=["Actual", "Forecast"])

You can see that the naive forecast lags 1 step behind the time series.

Now let's compute the mean squared error and the mean absolute error between the forecasts and the predictions in the validation period:

In [None]:
# Exercise 2: Implement Naive forecast (print MSE and MAE values)
print("MSE:")
### START CODE HERE ### (≈ 1 line of code)
print(YOUR_CODE_HERE)   # TODO: calculate mean squared error based on x_valid and naive_forecast and convert it into a numpy object
                        # Hint: use `keras.metrics.mean_squared_error`
                        # Hint: See example: https://www.tensorflow.org/api_docs/python/tf/keras/losses/MSE
                        # Hint: use numpy() to convert a tensor metric into a numpy digit
### END CODE HERE ###

print("MAE:")
### START CODE HERE ### (≈ 1 line of code)
print(YOUR_CODE_HERE)   # TODO: calculate mean absolute error based on x_valid and naive_forecast and convert it into a numpy object
                        # Hint: use `keras.metrics.mean_absolute_error`
                        # Hint: See example: https://www.tensorflow.org/api_docs/python/tf/keras/losses/MAE
                        # Hint: use numpy() to convert a tensor metric into a numpy digit
### END CODE HERE ###

We will use the results of Naive implementation as our baseline to compare against other models.

# Moving Average
Now let's implement Moving Average. First, we define a function that will calculate moving average on a given time series:

In [None]:
# Exercise 3: Implement Moving Average forecast
def moving_average_forecast(series, window_size):
    """Forecasts the mean of the last few values.
       If window_size=1, then this is equivalent to naive forecast"""
    forecast = []
    for time in range(len(series) - window_size):
        ### START CODE HERE ### (≈ 2 lines of code)
        forecast.append(YOUR_CODE_HERE) # TODO: calculate the mean of `series` subarray between `time` and `time + window_size` and append it to the forecast
    return # TODO: return a `forecast` array wrapped into a numpy array. See https://numpy.org/doc/stable/user/basics.creation.html for reference
    ### END CODE HERE ###


In [None]:
moving_avg = moving_average_forecast(series, 30)[split_time - 30:]

plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid)
plot_series(time_valid, moving_avg)

Let's print the performance metrics:

In [None]:
print("MSE:")
# Exercise 3: Implement Moving Average forecast (print MAE and MSE values)
### START CODE HERE ### (≈ 1 line of code)
print(YOUR_CODE_HERE)   # TODO: calculate mean squared error based on x_valid and moving_avg and convert it into a numpy object
                        # Hint: use `keras.metrics.mean_squared_error`
                        # Hint: See example: https://www.tensorflow.org/api_docs/python/tf/keras/losses/MSE
                        # Hint: use numpy() to convert a tensor metric into a numpy digit
### END CODE HERE ###

print("MAE:")
### START CODE HERE ### (≈ 1 line of code)
print(YOUR_CODE_HERE)   # TODO: calculate mean absolute error based on x_valid and moving_avg and convert it into a numpy object
                        # Hint: use `keras.metrics.mean_absolute_error`
                        # Hint: See example: https://www.tensorflow.org/api_docs/python/tf/keras/losses/MAE
                        # Hint: use numpy() to convert a tensor metric into a numpy digit
### END CODE HERE ###