## Load and prep columns

In [13]:
import xgboost as xgb
import numpy as np
import pandas as pd
from xgboost.sklearn import XGBRegressor
from xgboost import plot_importance
from matplotlib import pyplot
from sklearn.model_selection import KFold, train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from scipy import stats

%config InlineBackend.figure_format = 'retina'

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 200)


# Define columns
data_columns = ['symbol', 'quoteDate', 'adjustedPrice', 'volume', 'previousClose', 'change', 'changeInPercent', 
                '52WeekHigh', '52WeekLow', 'changeFrom52WeekHigh', 'changeFrom52WeekLow', 
                'percebtChangeFrom52WeekHigh', 'percentChangeFrom52WeekLow', 'Price200DayAverage', 
                'Price52WeekPercChange', '1WeekVolatility', '2WeekVolatility', '4WeekVolatility', '8WeekVolatility', 
                '12WeekVolatility', '26WeekVolatility','52WeekVolatility','4WeekBollingerPrediction', '4WeekBollingerType',
                '12WeekBollingerPrediction', '12WeekBollingerType', 'allordpreviousclose', 'allordchange', 
                'allorddayshigh', 'allorddayslow', 'allordpercebtChangeFrom52WeekHigh', 
                'allordpercentChangeFrom52WeekLow', 'asxpreviousclose', 'asxchange', 'asxdayshigh', 
                'asxdayslow', 'asxpercebtChangeFrom52WeekHigh', 'asxpercentChangeFrom52WeekLow', 'exDividendDate', 
                'exDividendPayout', '640106_A3597525W', 'AINTCOV', 'AverageVolume', 'BookValuePerShareYear', 
                'CashPerShareYear', 'DPSRecentYear', 'EBITDMargin', 'EPS', 'EPSGrowthRate10Years', 
                'EPSGrowthRate5Years', 'FIRMMCRT', 'FXRUSD', 'Float', 'GRCPAIAD', 'GRCPAISAD', 'GRCPBCAD', 
                'GRCPBCSAD', 'GRCPBMAD', 'GRCPNRAD', 'GRCPRCAD', 'H01_GGDPCVGDP', 'H01_GGDPCVGDPFY', 'H05_GLFSEPTPOP', 
                'IAD', 'LTDebtToEquityQuarter', 'LTDebtToEquityYear', 'MarketCap',
                'NetIncomeGrowthRate5Years', 'NetProfitMarginPercent', 'OperatingMargin', 'PE',
                'PriceToBook', 'ReturnOnAssets5Years', 'ReturnOnAssetsTTM', 'ReturnOnAssetsYear', 
                'ReturnOnEquity5Years', 'ReturnOnEquityTTM', 'ReturnOnEquityYear', 'RevenueGrowthRate10Years', 
                'RevenueGrowthRate5Years', 'TotalDebtToAssetsQuarter', 'TotalDebtToAssetsYear', 
                'TotalDebtToEquityQuarter', 'TotalDebtToEquityYear', 'bookValue', 'earningsPerShare', 
                'ebitda', 'epsEstimateCurrentYear', 'marketCapitalization', 'peRatio', 'pegRatio', 'pricePerBook', 
                'pricePerEpsEstimateCurrentYear', 'pricePerEpsEstimateNextYear', 'pricePerSales']


returns = {
    '1': 'Future1WeekReturn',
    '2': 'Future2WeekReturn',
    '4': 'Future4WeekReturn',
    '8': 'Future8WeekReturn',
    '12': 'Future12WeekReturn',
    '26': 'Future26WeekReturn',
    '52': 'Future52WeekReturn',
    '1ra': 'Future1WeekRiskAdjustedReturn',
    '2ra': 'Future2WeekRiskAdjustedReturn',
    '4ra': 'Future4WeekRiskAdjustedReturn',
    '8ra': 'Future8WeekRiskAdjustedReturn',
    '12ra': 'Future12WeekRiskAdjustedReturn',
    '26ra': 'Future26WeekRiskAdjustedReturn',
    '52ra': 'Future52WeekRiskAdjustedReturn'
}

# Load data
raw_data = pd.read_csv('data/companyQuotes-20170417-001.csv')
raw_data.head(5)

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,symbol,quoteDate,lastTradePriceOnly,adjustedPrice,volume,daysHigh,daysLow,previousClose,change,changeInPercent,52WeekHigh,52WeekLow,changeFrom52WeekHigh,changeFrom52WeekLow,percebtChangeFrom52WeekHigh,percentChangeFrom52WeekLow,Price200DayAverage,Price52WeekPercChange,1WeekVolatility,2WeekVolatility,4WeekVolatility,8WeekVolatility,12WeekVolatility,26WeekVolatility,52WeekVolatility,4WeekBollingerBandLower,4WeekBollingerBandUpper,4WeekBollingerPrediction,4WeekBollingerType,12WeekBollingerBandLower,12WeekBollingerBandUpper,12WeekBollingerPrediction,12WeekBollingerType,allordpreviousclose,allordchange,allorddayshigh,allorddayslow,allordpercebtChangeFrom52WeekHigh,allordpercentChangeFrom52WeekLow,asxpreviousclose,asxchange,asxdayshigh,asxdayslow,asxpercebtChangeFrom52WeekHigh,asxpercentChangeFrom52WeekLow,exDividendDate,exDividendPayout,640106_A3597525W,AINTCOV,AverageVolume,Beta,BookValuePerShareYear,CashPerShareYear,DPSRecentYear,EBITDMargin,EPS,EPSGrowthRate10Years,EPSGrowthRate5Years,FIRMMCRT,FXRUSD,Float,GRCPAIAD,GRCPAISAD,GRCPBCAD,GRCPBCSAD,GRCPBMAD,GRCPNRAD,GRCPRCAD,H01_GGDPCVGDP,H01_GGDPCVGDPFY,H05_GLFSEPTPOP,IAD,LTDebtToEquityQuarter,LTDebtToEquityYear,MarketCap,NetIncomeGrowthRate5Years,NetProfitMarginPercent,OperatingMargin,PE,PriceToBook,QuoteLast,ReturnOnAssets5Years,ReturnOnAssetsTTM,ReturnOnAssetsYear,ReturnOnEquity5Years,ReturnOnEquityTTM,ReturnOnEquityYear,RevenueGrowthRate10Years,RevenueGrowthRate5Years,TotalDebtToAssetsQuarter,TotalDebtToAssetsYear,TotalDebtToEquityQuarter,TotalDebtToEquityYear,bookValue,earningsPerShare,ebitda,epsEstimateCurrentYear,marketCapitalization,peRatio,pegRatio,pricePerBook,pricePerEpsEstimateCurrentYear,pricePerEpsEstimateNextYear,pricePerSales,Future1WeekDividend,Future1WeekPrice,Future1WeekReturn,Future1WeekRiskAdjustedReturn,Future2WeekDividend,Future2WeekPrice,Future2WeekReturn,Future2WeekRiskAdjustedReturn,Future4WeekDividend,Future4WeekPrice,Future4WeekReturn,Future4WeekRiskAdjustedReturn,Future8WeekDividend,Future8WeekPrice,Future8WeekReturn,Future8WeekRiskAdjustedReturn,Future12WeekDividend,Future12WeekPrice,Future12WeekReturn,Future12WeekRiskAdjustedReturn,Future26WeekDividend,Future26WeekPrice,Future26WeekReturn,Future26WeekRiskAdjustedReturn,Future52WeekDividend,Future52WeekPrice,Future52WeekReturn,Future52WeekRiskAdjustedReturn
0,CCA,2016-06-30,0.56,0.56,1876700,0.77,0.535,0.765,-0.205,-0.267974,0.56,0.56,,,,,,,,,,,,,,0.56,0.56,Steady,Within,0.56,0.56,Steady,Within,5221.0,41.299805,5324.399902,5221.0,-0.082183,0.08396,5142.399902,39.100097,5247.600098,5142.399902,-0.098884,0.079135,,,108.2,3828.0,,,0.24,-0.213,,,-30.8,,,1.77,0.7426,63500000.0,87.307409,89.9579,87.425456,92.706255,87.037799,85.756318,96.070719,412937.0,1.227102,61.092677,,,,36000000.0,,,-4147.2,,,,,,,,,-67.4,,,,,,,,,,,,,,,,,,,0.55,-1.785714,-7.688946,,0.65,16.071429,2.693578,,0.64,14.285714,2.260607,,0.61,8.928571,1.801456,,0.54,-3.571429,-19.327464,,0.56,,,,,,
1,CCA,2016-07-01,0.59,0.59,985000,0.625,0.5,0.515,0.075,0.145631,0.59,0.56,0.0,0.03,0.0,0.053571,,,0.015,0.015,0.015,0.015,0.015,0.015,0.015,0.545,0.605,Steady,Within,0.545,0.605,Steady,Within,5310.399902,89.399902,5356.5,5310.399902,-0.066467,0.10252,5233.399902,91.0,5278.899902,5233.399902,-0.082938,0.098231,,,108.2,3828.0,,,0.24,-0.213,0.0,,-30.8,,,1.77,0.746,63500000.0,87.307409,89.9579,87.425456,92.706255,87.037799,85.756318,96.070719,412937.0,1.227102,61.092677,,,,36000000.0,,,-4147.2,0.0,,,,,,,,-67.4,,,,,,0.0,,,,,,,,,,,,0.0,0.54,-8.474576,-33.621753,0.0,0.65,10.169492,1.644524,0.0,0.67,13.559322,2.167481,0.0,0.6,1.694915,0.343165,0.0,0.52,-11.864407,-65.487513,0.0,0.56,-5.084746,-25.146688,,,,
2,CCA,2016-07-04,0.59,0.59,389500,0.595,0.555,0.565,0.025,0.044248,0.59,0.56,0.0,0.03,0.0,0.053571,,,0.014142,0.014142,0.014142,0.014142,0.014142,0.014142,0.014142,0.551716,0.608284,Steady,Within,0.551716,0.608284,Steady,Within,5327.100098,16.700196,5365.200195,5303.100098,-0.063532,0.105988,5246.600098,13.200196,5281.799805,5218.5,-0.080625,0.101001,,,108.2,3828.0,,,0.24,-0.213,0.0,,-30.8,,,1.77,0.7506,63500000.0,90.544594,91.341998,90.991309,92.573106,89.67533,88.864985,100.033999,417044.0,1.12484,61.122687,,,,36000000.0,,,-4147.2,0.0,,,,,,,,-67.4,,,,,,0.0,,,,,,,,,,,,0.0,0.66,11.864407,1.980153,0.0,0.73,23.728814,3.26557,0.0,0.68,15.254237,2.464773,0.0,0.57,-3.389831,-16.909912,0.0,0.56,-5.084746,-28.202774,0.0,0.56,-5.084746,-25.16475,,,,
3,CCA,2016-07-05,0.475,0.475,288500,0.59,0.475,0.59,-0.115,-0.194915,0.59,0.475,-0.115,0.0,-0.194915,0.0,,,0.047087,0.047087,0.047087,0.047087,0.047087,0.047087,0.047087,0.459576,0.647924,Steady,Within,0.459576,0.647924,Steady,Within,5365.200195,38.100097,5365.899902,5306.899902,-0.056834,0.113898,5281.799805,35.199707,5282.299805,5221.100098,-0.074456,0.108388,,,108.2,3828.0,,,0.24,-0.213,0.0,,-30.8,,,1.77,0.7517,63500000.0,90.544594,91.341998,90.991309,92.573106,89.67533,88.864985,100.033999,417044.0,1.12484,61.122687,,,,36000000.0,,,-4147.2,0.0,,,,,,,,-67.4,,,,,,0.0,,,,,,,,,,,,0.0,0.66,38.947368,7.392109,0.0,0.7,47.368421,7.96558,0.0,0.68,43.157895,8.978208,0.0,0.6,26.315789,6.024019,0.0,0.58,22.105263,4.17463,0.0,0.6,26.315789,5.414882,,,,
4,CCA,2016-07-06,0.575,0.575,578900,0.58,0.52,0.52,0.055,0.105769,0.59,0.475,-0.015,0.1,-0.025424,0.210526,,,0.042965,0.042965,0.042965,0.042965,0.042965,0.042965,0.042965,0.47207,0.64393,Steady,Within,0.47207,0.64393,Steady,Within,5312.799805,-52.40039,5312.799805,5237.799805,-0.066046,0.103019,5228.0,-53.799805,5228.0,5148.700195,-0.083884,0.097098,,,108.2,3828.0,,,0.24,-0.213,0.0,,-30.8,,,1.77,0.7436,63500000.0,90.544594,91.341998,90.991309,92.573106,89.67533,88.864985,100.033999,417044.0,1.12484,61.122687,,,,36000000.0,,,-4147.2,0.0,,,,,,,,-67.4,,,,,,0.0,,,,,,,,,,,,0.0,0.66,14.782609,2.619778,0.0,0.69,20.0,3.48737,0.0,0.6,4.347826,0.940744,0.0,0.6,4.347826,1.010167,0.0,0.54,-6.086957,-32.554628,0.0,0.62,7.826087,1.607076,,,,


## Apply filter for specific symbolx

In [None]:
# Run filter for a few companies
include_symbols = ['BHP', 'CBA', 'AOU', 'AYS', 'ATT', 'A01', 'BUD', 'AAP', 'AIV', 'AIB', '4DS']
reduced_data = raw_data[raw_data['symbol'].isin(include_symbols)]
print(len(reduced_data))

In [None]:
filtered_data = reduced_data

## Set-up learning data

In [14]:
# Set target column
target_column = returns['8']


In [17]:
# Remove rows missing the target column
filtered_data = raw_data.dropna(subset=[target_column], how='all')

all_columns = data_columns[:]

all_columns.insert(0, target_column)

print(all_columns)

# Columns to use
filtered_data = filtered_data[all_columns]


print(filtered_data.dtypes)


['Future8WeekReturn', 'symbol', 'quoteDate', 'adjustedPrice', 'volume', 'previousClose', 'change', 'changeInPercent', '52WeekHigh', '52WeekLow', 'changeFrom52WeekHigh', 'changeFrom52WeekLow', 'percebtChangeFrom52WeekHigh', 'percentChangeFrom52WeekLow', 'Price200DayAverage', 'Price52WeekPercChange', '1WeekVolatility', '2WeekVolatility', '4WeekVolatility', '8WeekVolatility', '12WeekVolatility', '26WeekVolatility', '52WeekVolatility', '4WeekBollingerPrediction', '4WeekBollingerType', '12WeekBollingerPrediction', '12WeekBollingerType', 'allordpreviousclose', 'allordchange', 'allorddayshigh', 'allorddayslow', 'allordpercebtChangeFrom52WeekHigh', 'allordpercentChangeFrom52WeekLow', 'asxpreviousclose', 'asxchange', 'asxdayshigh', 'asxdayslow', 'asxpercebtChangeFrom52WeekHigh', 'asxpercentChangeFrom52WeekLow', 'exDividendDate', 'exDividendPayout', '640106_A3597525W', 'AINTCOV', 'AverageVolume', 'BookValuePerShareYear', 'CashPerShareYear', 'DPSRecentYear', 'EBITDMargin', 'EPS', 'EPSGrowthRate10

## Run auto-ml

In [None]:
from auto_ml import Predictor
from auto_ml.utils import get_boston_dataset

# Split data frome into 70 / 30 train test
msk = np.random.rand(len(filtered_data)) < 0.7
df_train = filtered_data[msk]
df_test = filtered_data[~msk]

column_descriptions = {
    'Future8WeekReturn': 'output'
    , 'symbol': 'categorical'
    , 'quoteDate': 'date'
    , '4WeekBollingerPrediction': 'categorical'
    , '4WeekBollingerType': 'categorical'
    , '12WeekBollingerPrediction': 'categorical'
    , '12WeekBollingerType': 'categorical'
    , 'exDividendDate': 'date'
}


ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions)

ml_predictor.train(df_train)

ml_predictor.score(df_test, df_test.MEDV)