# Introduction

> Notebook setup

### Notebook functions and setup
The following are functions that are used within the notebook to reduce and DRY.

In [4]:
%load_ext autoreload
%autoreload 2


#eventually mode all but essential local to function blocks.
#import datetime
import logging
#import math
import os
import sys
#from datetime import datetime, timedelta

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
#import requests
import seaborn as sns
import ta
from sklearn import svm
from sklearn.metrics import (explained_variance_score, mean_squared_error,
                             r2_score)
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from ta import add_all_ta_features

from src.features.build_features import StockTechnicals
from src.models.backtest_strategy import TradeHoldStrategy
from src.visualization.visualize import DisplayTicker

nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)


logging.basicConfig(
    stream=sys.stdout, 
    level=logging.INFO) #DEBUG to see all.

logger = logging.getLogger('NOTEBOOK_LOGGER')


# functions...

## pulling data.
def pull_data_files(data_path, symbols, start, end):
    import os
    import requests
    os.makedirs(data_path, exist_ok=True)
    parms = { 'period1': start, 'period2':end, 'interval': '1d', 'events': 'history' }
    base_url = 'https://query1.finance.yahoo.com/v7/finance/download/{}'
    for s in symbols:
        r = requests.get(base_url.format(s), params=parms)
        logger.debug('calling: {}'.format(r.url))

        filename = '{}/{}.csv'.format(data_path, s).replace('^', '_')

        with open(filename, 'wb') as fd:
            for chunk in r.iter_content(chunk_size=128):
                fd.write(chunk)


    logger.info('files downloaded to {}'.format(data_path))


def get_epoch_date(y,m,d):
    import datetime
    import math
    return math.trunc(datetime.datetime(y,m,d).timestamp())


def convert_data_add_features(stock_ticker_csv, start_date, end_date, close_feature='Close'):
    """
    stock_ticker_csv : csv file
    end_date : string
    days_to_backtrack : int
    Takes in a CSV File and converts the date to numbers
    Returns DF with Dates converted to numerical format and the original Dates for plotting
    Adds some features to the DataFrame
    """
    from datetime import datetime, timedelta
    start_date = datetime.strptime(start_date, '%Y-%m-%d')
    end_date = datetime.strptime(end_date, '%Y-%m-%d')
 

    # Load the CSV File
    stock_ticker = pd.read_csv(stock_ticker_csv, index_col=['Date'], parse_dates=['Date'])
    stock_ticker.sort_values('Date')
    stock_ticker = stock_ticker[start_date: end_date]
    #print(stock_ticker.info())

    stock_ticker.reset_index(inplace=True)
    
    data_df = stock_ticker.copy()
    data_df = data_df.reset_index()
    org_dates = data_df['Date']
    data_df['Pretty Date'] = data_df['Date']
    

    # Converting Dates to Numbers - SVR doesn't work with dates
    data_df['Date'] = data_df['Date'].map(mdates.date2num)

    indicator_bb = ta.volatility.BollingerBands(close=data_df[close_feature], n=20, ndev=2)
    indicator_SMA20 = ta.trend.SMAIndicator(close=data_df[close_feature],n=20, fillna=True)
    indicator_MACD = ta.trend.MACD(close=data_df[close_feature],n_fast=5, n_slow=30, fillna=True)
  
    # Features added to original date
    data_df['SMA_20'] = indicator_SMA20.sma_indicator()
    data_df['bb_bbm'] = indicator_bb.bollinger_mavg()
    data_df['bb_bbh'] = indicator_bb.bollinger_hband()
    data_df['bb_bbl'] = indicator_bb.bollinger_lband()
    data_df['MACD'] = indicator_MACD.macd()
    data_df['MACD signal'] = indicator_MACD.macd_signal()

    # Return DF with Dates converted to numerical format and the original Dates for plotting
    return data_df, org_dates




The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Download the data from the Market Data Provider

> Note: this uses a local data path setting

In [5]:


## setup for file download to local path -- this is used for other
local_data_path = './.data'

## dates in epoch time
start_epoch = get_epoch_date(2008,3,30)
end_epoch   = get_epoch_date(2020,3,1)

# our symbols as needed by the provider. NOTE ^GSPC becomes _GSPC
symbols = ['MSFT', 'AAPL', 'GOOG', '^GSPC', 'AMZN' ]

# the actual pull
pull_data_files(local_data_path, symbols, start_epoch, end_epoch)

INFO:NOTEBOOK_LOGGER:files downloaded to ./.data


## Feature enrichment

This step changes dates to serial numbers and adds several features

- SMA 20
- bollinger band for 20 days:
    - Moving Average
    - High and Low band
- MACD


In [7]:

## setup some data frames for modeling
start_date = '2019-01-31'
end_date = '2020-03-31'

file_target = '{}/{}.csv'

MSFT, MSFT_dates = convert_data_add_features(file_target.format(local_data_path, 'MSFT'),start_date, end_date)
AAPL, AAPL_dates = convert_data_add_features(file_target.format(local_data_path, 'AAPL'),start_date, end_date)
AMZN, AMZN_dates = convert_data_add_features(file_target.format(local_data_path, 'AMZN'),start_date, end_date)
GOOG, GOOG_dates = convert_data_add_features(file_target.format(local_data_path, 'GOOG'),start_date, end_date)
_GSPC, _GSPC_dates = convert_data_add_features(file_target.format(local_data_path, '_GSPC'),start_date, end_date)

# Basic Exploratory Analysis

>TODO: what kind of diagrams, tables, etc. to put here?

# Feature Wrangling and Setup

# Model Creation and Tuning

# Results and Comparison

# Summary