# Forecasting prices

## Overview

Train several models to see what's the better performance.

Models:
* Decision Tree Regresion
* KNN
* Bagging
* Boosting

## Import libraries

In [5]:
import datetime as dt
import os
import pandas as pd
import numpy as np
import csv

# Sklearn
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Add plotly for interactive charts
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools



## Initial variables

In [6]:
symbol = "AABA"

## Reading data from CSV file

In [7]:
# Get values from cvs file
filename = "data/" + symbol + ".csv"
portf_value = pd.read_csv(filename, sep=",", header=None)

In [8]:
# Normalize the prices Dataframe
normed = portf_value.copy()

In [9]:
normed['date'] = portf_value.index
normed.set_index('date', inplace=True)

normed.columns.values[1] = symbol

# Convert price column to float
normed[symbol] = pd.to_numeric(normed[symbol], errors='coerce').fillna(0)

In [10]:
normed.info()
normed.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 128 entries, 0 to 127
Data columns (total 2 columns):
0       128 non-null object
AABA    128 non-null float64
dtypes: float64(1), object(1)
memory usage: 3.0+ KB


Unnamed: 0_level_0,0,AABA
date,Unnamed: 1_level_1,Unnamed: 2_level_1
0,date,0.0
1,2018-07-09,75.610001
2,2018-07-10,75.970001
3,2018-07-11,73.760002
4,2018-07-12,74.610001


## Getting indicator values

In [14]:
# Momentum
price = normed[symbol]
window = 10
momentum = pd.Series(np.nan, index=price.index)
momentum.iloc[window:] = (price.iloc[window:] / price.values[:-window]) - 1


In [15]:
# Rolling Mean
rolling_mean = price.rolling(window).mean()

In [16]:
# SMA
sma = price / rolling_mean - 1

In [17]:
# RSI
def get_RSI(prices, n=14):
    deltas = np.diff(prices)
    seed = deltas[:n+1]
    up = seed[seed>=0].sum()/n
    down = -seed[seed<0].sum()/n
    rs = up/down
    rsi = np.zeros_like(prices)
    rsi[:n] = 100. - 100./(1.+rs)

    for i in range(n, len(prices)):
        delta = deltas[i-1] # cause the diff is 1 shorter
        if delta>0:
            upval = delta
            downval = 0.
        else:
            upval = 0.
            downval = -delta
        up = (up*(n-1) + upval)/n
        down = (down*(n-1) + downval)/n
        rs = up/down
        rsi[i] = 100. - 100./(1.+rs)
    return rsi

rsi = get_RSI(price)

# Clean nan values
normed = normed.fillna(0)

# Sort dataframe by index
normed.sort_index()

Unnamed: 0_level_0,0,AABA
date,Unnamed: 1_level_1,Unnamed: 2_level_1
0,date,0.000000
1,2018-07-09,75.610001
2,2018-07-10,75.970001
3,2018-07-11,73.760002
4,2018-07-12,74.610001
5,2018-07-13,74.480003
6,2018-07-16,74.660004
7,2018-07-17,75.360001
8,2018-07-18,74.660004
9,2018-07-19,73.360001
