In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import talib as ta

In [2]:
stocks = pd.read_csv('../data/clean/top_stocks.csv', parse_dates=True)

stocks = stocks.reset_index().set_index(['symbol', 'date']).sort_index().reset_index()

In [3]:
stocks_by_symbol = stocks.groupby('symbol')

In [4]:
symbols = stocks.symbol.unique()

In [5]:
COLUMNS = set()

In [6]:
def technical_indicator(stock, name, func, normalize, *args, **kwargs):
    if 'normalize_by' not in kwargs:
        kwargs['normalize_by'] = args[0]
        
    stock.loc[:, name] = func(*args) / (kwargs['normalize_by'] if normalize else 1)
    COLUMNS.add(name)
    
def technical_indicator_time_periods(stock, name, func, timeperiods, normalize=False, *args, **kwargs):
    if 'normalize_by' not in kwargs:
        kwargs['normalize_by'] = args[0]
    
    for tp in timeperiods:
        column = f'{name}_{tp}'
        stock.loc[:, column] = func(*args, timeperiod=tp) / (kwargs['normalize_by'] if normalize else 1)
        COLUMNS.add(column)

## Overlap Studies

In [7]:
def bollinger_bands(stock, timeperiods, close):
    for tp in timeperiods:
        column_upper = f'bb_upper_{tp}'
        column_middle = f'bb_middle_{tp}'
        column_lower = f'bb_lower_{tp}'
        column_width = f'bb_width_{tp}'
        
        upper, middle, lower = ta.BBANDS(close, timeperiod=tp)
        
        stock.loc[:, column_upper] = upper / close
        stock.loc[:, column_middle] = middle / close
        stock.loc[:, column_lower] = lower / close
        stock.loc[:, column_width] = (upper - lower) / close
        
        COLUMNS.update([column_upper, column_middle, column_lower, column_width])

In [8]:
def double_exponential_moving_average(stock, timeperiods, close):
    technical_indicator_time_periods(stock, 'dema', ta.DEMA, timeperiods, True, close)

In [9]:
def exponential_moving_average(stock, timeperiods, close):
    technical_indicator_time_periods(stock, 'ema', ta.EMA, timeperiods, True, close)

In [10]:
def hilbert_transform_trendline(stock, close):
    technical_indicator(stock, 'ht_trendline', ta.HT_TRENDLINE, True, close)

In [11]:
def kaufman_adaptive_moving_average(stock, timeperiods, close):
    technical_indicator_time_periods(stock, 'kama', ta.KAMA, timeperiods, True, close)

In [12]:
def moving_average(stock, timeperiods, close):
    technical_indicator_time_periods(stock, 'ma', ta.MA, timeperiods, True, close)

In [13]:
def mesa_adaptive_moving_average(stock, close):
    mama, fama = ta.MAMA(close, fastlimit=0.9, slowlimit=0.1)

    stock.loc[:, 'mama'] = mama / close
    stock.loc[:, 'fama'] = fama / close

    COLUMNS.update(['mama', 'fama'])

In [14]:
def midpoint_over_period(stock, timeperiods, close):
    technical_indicator_time_periods(stock, 'midpoint', ta.MIDPOINT, timeperiods, True, close)

In [15]:
def midpoint_price_over_period(stock, timeperiods, high, low, close):
    technical_indicator_time_periods(stock, 'midprice', ta.MIDPRICE, timeperiods, True, high, low, normalize_by=close)

In [16]:
def parabolic_sar(stock, high, low, close):
    technical_indicator(stock, 'sar', ta.SAR, True, high, low, normalize_by=close)

In [17]:
def triple_exponential_moving_average_t3(stock, timeperiods, close):
    for tp in timeperiods:
        column = f't3_{tp}'
        stock.loc[:, column] = ta.T3(close, timeperiod=tp, vfactor=0.7) / close
        COLUMNS.add(column)

In [18]:
def triple_exponential_moving_average(stock, timeperiods, close):
    technical_indicator_time_periods(stock, 'tema', ta.TEMA, timeperiods, True, close)

In [19]:
def triangular_moving_average(stock, timeperiods, close):
    technical_indicator_time_periods(stock, 'trima', ta.TRIMA, timeperiods, True, close)

In [20]:
def weighted_moving_average(stock, timeperiods, close):
    technical_indicator_time_periods(stock, 'wma', ta.WMA, timeperiods, True, close)

## Momentum Indicators

In [21]:
def average_directional_movement_index(stock, timeperiods, high, low, close):
    technical_indicator_time_periods(stock, 'adx', ta.ADX, timeperiods, False, high, low, close)
    technical_indicator_time_periods(stock, 'adxr', ta.ADXR, timeperiods, False, high, low, close)

In [22]:
def absolute_price_oscillator(stock, close):
    technical_indicator(stock, 'apo', ta.APO, True, close)

In [23]:
def balance_of_power(stock, open_, high, low, close):
    technical_indicator(stock, 'bop', ta.BOP, False, open_, high, low, close)

In [24]:
def commodity_channel_index(stock, timeperiods, high, low, close):
    technical_indicator_time_periods(stock, 'cci', ta.CCI, timeperiods, False, high, low, close)

In [25]:
def chande_momentum_oscillator(stock, timeperiods, close):
    technical_indicator_time_periods(stock, 'cmo', ta.CMO, timeperiods, False, close)

In [26]:
def directional_movement(stock, timeperiods, high, low, close):
    technical_indicator_time_periods(stock, 'plus_dm', ta.PLUS_DM, timeperiods, True, high, low, normalize_by=close)
    technical_indicator_time_periods(stock, 'minus_dm', ta.MINUS_DM, timeperiods, True, high, low, normalize_by=close)
    technical_indicator_time_periods(stock, 'dx', ta.DX, timeperiods, False, high, low, close)

In [27]:
def directional_indicator(stock, timeperiods, high, low, close):
    technical_indicator_time_periods(stock, 'plus_di', ta.PLUS_DI, timeperiods, False, high, low, close)
    technical_indicator_time_periods(stock, 'minus_di', ta.MINUS_DI, timeperiods, False, high, low, close)

In [28]:
def moving_average_convergence_divergence(stock, close):
    line, hist, signal = ta.MACD(close)

    stock.loc[:, 'macd_line'] = line / close
    stock.loc[:, 'macd_hist'] = hist / close
    stock.loc[:, 'macd_signal'] = signal / close

    COLUMNS.update(['macd_line', 'macd_hist', 'macd_signal'])

In [29]:
def money_flow_index(stock, timeperiods, high, low, close, volume):
    technical_indicator_time_periods(stock, 'mfi', ta.MFI, timeperiods, False, high, low, close, volume)

In [30]:
def momentum(stock, timeperiods, close):
    technical_indicator_time_periods(stock, 'mom', ta.MOM, timeperiods, True, close)

In [31]:
def percentage_price_oscillator(stock, close):
    technical_indicator(stock, 'ppo', ta.PPO, True, close)

In [32]:
def rate_of_change(stock, timeperiods, close):
    technical_indicator_time_periods(stock, 'roc', ta.ROC, timeperiods, False, close)
    technical_indicator_time_periods(stock, 'rocp', ta.ROCP, timeperiods, False, close)
    technical_indicator_time_periods(stock, 'rocr', ta.ROCR, timeperiods, False, close)

In [33]:
def relative_strength_index(stock, timeperiods, close):
    technical_indicator_time_periods(stock, 'rsi', ta.RSI, timeperiods, False, close)

In [34]:
def stochastic(stock, high, low, close):
    slow_k, slow_d = ta.STOCH(high, low, close)
    fast_k, fast_d = ta.STOCHF(high, low, close)
    rsi_k, rsi_d = ta.STOCHRSI(close)

    stock.loc[:, 'slow_k'] = slow_k
    stock.loc[:, 'slow_d'] = slow_d
    stock.loc[:, 'fast_k'] = fast_k
    stock.loc[:, 'fast_d'] = fast_d
    stock.loc[:, 'rsi_k'] = rsi_k
    stock.loc[:, 'rsi_d'] = rsi_d

    COLUMNS.update(['slow_k', 'slow_d', 'fast_k', 'fast_d', 'rsi_k', 'rsi_d'])

In [35]:
def stochastic_rsi(stock, timeperiods, close):
    

    stock.loc[:, 'slow_k'] = slow_k
    stock.loc[:, 'slow_d'] = slow_d
    stock.loc[:, 'fast_k'] = fast_k
    stock.loc[:, 'fast_d'] = fast_d

    COLUMNS.update(['slow_k', 'slow_d', 'fast_k', 'fast_d'])

In [36]:
def one_day_rate_of_change_of_a_triple_smooth_ema(stock, timeperiods, close):
    technical_indicator_time_periods(stock, 'trix', ta.TRIX, timeperiods, True, close)

In [37]:
def ultimate_oscillator(stock, high, low, close):
    technical_indicator(stock, 'ult_osc', ta.ULTOSC, False, high, low, close)

In [38]:
def williams_percent_r(stock, timeperiods, high, low, close):
    technical_indicator_time_periods(stock, 'will_r', ta.WILLR, timeperiods, False, high, low, close)

## Volume Indicators

In [39]:
def chaikin_ad(stock, high, low, close, volume):
    technical_indicator(stock, 'chaikin_ad_line', ta.AD, True, high, low, close, volume, normalize_by=volume)
    technical_indicator(stock, 'chaikin_ad_osc', ta.ADOSC, True, high, low, close, volume, normalize_by=volume)

In [40]:
def on_balance_volume(stock, close, volume):
    technical_indicator(stock, 'obv', ta.OBV, True, close, volume, normalize_by=volume)

## Volatility Indicators

In [41]:
def normalized_average_true_range(stock, timeperiods, high, low, close):
    technical_indicator_time_periods(stock, 'natr', ta.NATR, timeperiods, False, high, low, close)

In [42]:
def true_range(stock, high, low, close):
    technical_indicator(stock, 'trange', ta.TRANGE, True, high, low, close, normalize_by=close)

## Price Transform

In [43]:
def average_price(stock, open_, high, low, close):
    technical_indicator(stock, 'avg_price', ta.AVGPRICE, True, open_, high, low, close, normalize_by=close)

In [44]:
def median_price(stock, high, low, close):
    technical_indicator(stock, 'median_price', ta.MEDPRICE, True, high, low, normalize_by=close)

In [45]:
def typical_price(stock, high, low, close):
    technical_indicator(stock, 'typical_price', ta.TYPPRICE, True, high, low, close, normalize_by=close)

In [46]:
def weighted_close_price(stock, high, low, close):
    technical_indicator(stock, 'wcl_price', ta.WCLPRICE, True, high, low, close, normalize_by=close)

## Cycle Indicators

In [47]:
def dominant_cycle_period(stock, close):
    technical_indicator(stock, 'ht_dcperiod', ta.HT_DCPERIOD, False, close)

In [48]:
def dominant_cycle_phase(stock, close):
    technical_indicator(stock, 'ht_dcphase', ta.HT_DCPHASE, False, close)

In [49]:
def phasor_components(stock, close):
    inphase, quadrature = ta.HT_PHASOR(close)
    
    stock.loc[:, 'ht_phasor_inphase'] = inphase
    stock.loc[:, 'ht_phasor_quadrature'] = quadrature
    
    COLUMNS.update(['ht_phasor_inphase', 'ht_phasor_quadrature'])

In [50]:
def sine_wave(stock, close):
    sine, lead = ta.HT_SINE(close)
    
    stock.loc[:, 'ht_sine'] = sine
    stock.loc[:, 'ht_sine_lead'] = lead
    
    COLUMNS.update(['ht_sine', 'ht_sine_lead'])

In [51]:
def trend_mode(stock, close):
    technical_indicator(stock, 'ht_trendmode', ta.HT_TRENDMODE, False, close)

## Pattern Recognition

In [52]:
def pattern_recognition(stock, open_, high, low, close):
    technical_indicator(stock, 'two_crows', ta.CDL2CROWS, False, open_, high, low, close)
    technical_indicator(stock, 'three_black_crows', ta.CDL3BLACKCROWS, False, open_, high, low, close)
    technical_indicator(stock, 'three_inside_up_down', ta.CDL3INSIDE, False, open_, high, low, close)
    technical_indicator(stock, 'three_line_strike', ta.CDL3LINESTRIKE, False, open_, high, low, close)
    technical_indicator(stock, 'three_outside_up_down', ta.CDL3OUTSIDE, False, open_, high, low, close)
    technical_indicator(stock, 'three_stars_in_the_south', ta.CDL3STARSINSOUTH, False, open_, high, low, close)
    technical_indicator(stock, 'three_advancing_white_soldiers', ta.CDL3WHITESOLDIERS, False, open_, high, low, close)
    technical_indicator(stock, 'abandoned_baby', ta.CDLABANDONEDBABY, False, open_, high, low, close)
    technical_indicator(stock, 'advance_block', ta.CDLADVANCEBLOCK, False, open_, high, low, close)
    technical_indicator(stock, 'belt_hold', ta.CDLBELTHOLD, False, open_, high, low, close)
    technical_indicator(stock, 'breakaway', ta.CDLBREAKAWAY, False, open_, high, low, close)
    technical_indicator(stock, 'closing_marubozu', ta.CDLCLOSINGMARUBOZU, False, open_, high, low, close)
    technical_indicator(stock, 'concealing_baby_swallow', ta.CDLCONCEALBABYSWALL, False, open_, high, low, close)
    technical_indicator(stock, 'counterattack', ta.CDLCOUNTERATTACK, False, open_, high, low, close)
    technical_indicator(stock, 'dark_cloud_cover', ta.CDLDARKCLOUDCOVER, False, open_, high, low, close)
    technical_indicator(stock, 'doji', ta.CDLDOJI, False, open_, high, low, close)
    technical_indicator(stock, 'doji_star', ta.CDLDOJISTAR, False, open_, high, low, close)
    technical_indicator(stock, 'dragonfly_doji', ta.CDLDRAGONFLYDOJI, False, open_, high, low, close)
    technical_indicator(stock, 'engulfing_patter', ta.CDLENGULFING, False, open_, high, low, close)
    technical_indicator(stock, 'evening_doji_star', ta.CDLEVENINGDOJISTAR, False, open_, high, low, close)
    technical_indicator(stock, 'evening_star', ta.CDLEVENINGSTAR, False, open_, high, low, close)
    technical_indicator(stock, 'up_down_gap_side_by_side_white_lines', ta.CDLGAPSIDESIDEWHITE, False, open_, high, low, close)
    technical_indicator(stock, 'gravestone_doji', ta.CDLGRAVESTONEDOJI, False, open_, high, low, close)
    technical_indicator(stock, 'hammer', ta.CDLHAMMER, False, open_, high, low, close)
    technical_indicator(stock, 'hanging_man', ta.CDLHANGINGMAN, False, open_, high, low, close)
    technical_indicator(stock, 'harami_pattern', ta.CDLHARAMI, False, open_, high, low, close)
    technical_indicator(stock, 'harami_cross_pattern', ta.CDLHARAMICROSS, False, open_, high, low, close)
    technical_indicator(stock, 'high_wave_candle', ta.CDLHIGHWAVE, False, open_, high, low, close)
    technical_indicator(stock, 'hikkake_pattern', ta.CDLHIKKAKE, False, open_, high, low, close)
    technical_indicator(stock, 'modified_hikkake_pattern', ta.CDLHIKKAKEMOD, False, open_, high, low, close)
    technical_indicator(stock, 'homing_pigeon', ta.CDLHOMINGPIGEON, False, open_, high, low, close)
    technical_indicator(stock, 'identical_three_crows', ta.CDLIDENTICAL3CROWS, False, open_, high, low, close)
    technical_indicator(stock, 'in_neck_pattern', ta.CDLINNECK, False, open_, high, low, close)
    technical_indicator(stock, 'inverted_hammer', ta.CDLINVERTEDHAMMER, False, open_, high, low, close)
    technical_indicator(stock, 'kicking', ta.CDLKICKING, False, open_, high, low, close)
    technical_indicator(stock, 'kicking_by_length', ta.CDLKICKINGBYLENGTH, False, open_, high, low, close)
    technical_indicator(stock, 'ladder_bottom', ta.CDLLADDERBOTTOM, False, open_, high, low, close)
    technical_indicator(stock, 'long_legged_doji', ta.CDLLONGLEGGEDDOJI, False, open_, high, low, close)
    technical_indicator(stock, 'long_line_candle', ta.CDLLONGLINE, False, open_, high, low, close)
    technical_indicator(stock, 'marubozu', ta.CDLMARUBOZU, False, open_, high, low, close)
    technical_indicator(stock, 'matching_low', ta.CDLMATCHINGLOW, False, open_, high, low, close)
    technical_indicator(stock, 'mat_hold', ta.CDLMATHOLD, False, open_, high, low, close)
    technical_indicator(stock, 'morning_doji_star', ta.CDLMORNINGDOJISTAR, False, open_, high, low, close)
    technical_indicator(stock, 'morning_star', ta.CDLMORNINGSTAR, False, open_, high, low, close)
    technical_indicator(stock, 'on_neck_pattern', ta.CDLONNECK, False, open_, high, low, close)
    technical_indicator(stock, 'piercing_pattern', ta.CDLPIERCING, False, open_, high, low, close)
    technical_indicator(stock, 'rickshaw_man', ta.CDLRICKSHAWMAN, False, open_, high, low, close)
    technical_indicator(stock, 'rising_falling_three_methods', ta.CDLRISEFALL3METHODS, False, open_, high, low, close)
    technical_indicator(stock, 'separating_lines', ta.CDLSEPARATINGLINES, False, open_, high, low, close)
    technical_indicator(stock, 'shooting_star', ta.CDLSHOOTINGSTAR, False, open_, high, low, close)
    technical_indicator(stock, 'short_line_candle', ta.CDLSHORTLINE, False, open_, high, low, close)
    technical_indicator(stock, 'spinning_top', ta.CDLSPINNINGTOP, False, open_, high, low, close)
    technical_indicator(stock, 'stalled_pattern', ta.CDLSTALLEDPATTERN, False, open_, high, low, close)
    technical_indicator(stock, 'stick_sandwich', ta.CDLSTICKSANDWICH, False, open_, high, low, close)
    technical_indicator(stock, 'takuri', ta.CDLTAKURI, False, open_, high, low, close)
    technical_indicator(stock, 'tasuki_gap', ta.CDLTASUKIGAP, False, open_, high, low, close)
    technical_indicator(stock, 'thrusting_pattern', ta.CDLTHRUSTING, False, open_, high, low, close)
    technical_indicator(stock, 'tristar_pattern', ta.CDLTRISTAR, False, open_, high, low, close)
    technical_indicator(stock, 'unique_three_river', ta.CDLUNIQUE3RIVER, False, open_, high, low, close)
    technical_indicator(stock, 'upside_gap_two_crows', ta.CDLUPSIDEGAP2CROWS, False, open_, high, low, close)
    technical_indicator(stock, 'upside_downside_gap_three_methods', ta.CDLXSIDEGAP3METHODS, False, open_, high, low, close)

## Statistic Functions

In [53]:
def beta(stock, timeperiods, high, low):
    technical_indicator_time_periods(stock, 'beta', ta.BETA, timeperiods, False, high, low)

In [54]:
def pearsons_correlation_coefficient(stock, timeperiods, high, low):
    technical_indicator_time_periods(stock, 'correl', ta.CORREL, timeperiods, False, high, low)

In [55]:
def linear_regression(stock, timeperiods, close):
    technical_indicator_time_periods(stock, 'linear_regression', ta.LINEARREG, timeperiods, True, close)
    technical_indicator_time_periods(stock, 'linear_regression_angle', ta.LINEARREG_ANGLE, timeperiods, False, close)
    technical_indicator_time_periods(stock, 'linear_regression_intercept', ta.LINEARREG_INTERCEPT, timeperiods, True, close)
    technical_indicator_time_periods(stock, 'linear_regression_slope', ta.LINEARREG_SLOPE, timeperiods, True, close)

In [56]:
def standard_deviation(stock, timeperiods, close):
    technical_indicator_time_periods(stock, 'std', ta.STDDEV, timeperiods, True, close)

In [57]:
def time_series_forecast(stock, timeperiods, close):
    technical_indicator_time_periods(stock, 'tsf', ta.TSF, timeperiods, True, close)

In [58]:
def variance(stock, timeperiods, close):
    technical_indicator_time_periods(stock, 'var', ta.VAR, timeperiods, True, close)

## Compute Indicators

In [59]:
def compute_indicators_test(stock):
    
    open_ = stock.loc[:, 'open']
    high = stock.loc[:, 'high']
    low = stock.loc[:, 'low']
    close = stock.loc[:, 'close']
    volume = stock.loc[:, 'volume']
    
    timeperiods = [5, 10, 20, 30, 50, 100]
    
    # Overlap Studies
    
    bollinger_bands(stock, timeperiods, close)
    
    double_exponential_moving_average(stock, timeperiods, close)
    
    exponential_moving_average(stock, timeperiods, close)
    
    hilbert_transform_trendline(stock, close)
    
    kaufman_adaptive_moving_average(stock, timeperiods, close)
    
    moving_average(stock, timeperiods, close)
    
    mesa_adaptive_moving_average(stock, close)
    
    midpoint_over_period(stock, timeperiods, close)
    
    midpoint_price_over_period(stock, timeperiods, high, low, close)
    
    parabolic_sar(stock, high, low, close)
    
    triple_exponential_moving_average_t3(stock, timeperiods, close)
    
    triple_exponential_moving_average(stock, timeperiods, close)
    
    triangular_moving_average(stock, timeperiods, close)
    
    weighted_moving_average(stock, timeperiods, close)
    
    # Momentum Indicators
    
    average_directional_movement_index(stock, timeperiods, high, low, close)
    
    absolute_price_oscillator(stock, close)
    
    balance_of_power(stock, open_, high, low, close)
    
    commodity_channel_index(stock, timeperiods, high, low, close)
        
    chande_momentum_oscillator(stock, timeperiods, close)
    
    directional_movement(stock, timeperiods, high, low, close)
    
    directional_indicator(stock, timeperiods, high, low, close)
    
    moving_average_convergence_divergence(stock, close)
    
    money_flow_index(stock, timeperiods, high, low, close, volume)
    
    momentum(stock, timeperiods, close)
    
    percentage_price_oscillator(stock, close)
    
    rate_of_change(stock, timeperiods, close)
    
    relative_strength_index(stock, timeperiods, close)
    
    stochastic(stock, high, low, close)
    
    one_day_rate_of_change_of_a_triple_smooth_ema(stock, timeperiods, close)
    
    ultimate_oscillator(stock, high, low, close)
    
    williams_percent_r(stock, timeperiods, high, low, close)
    
    # Volume Indicators
    
    chaikin_ad(stock, high, low, close, volume)
    
    on_balance_volume(stock, close, volume)
    
    # Volatility Indicators
    
    normalized_average_true_range(stock, timeperiods, high, low, close)
    
    true_range(stock, high, low, close)
    
    # Price Transform
    
    average_price(stock, open_, high, low, close)
    
    median_price(stock, high, low, close)
    
    typical_price(stock, high, low, close)
    
    weighted_close_price(stock, high, low, close)
    
    # Cycle Indicators
    
    dominant_cycle_period(stock, close)
    
    dominant_cycle_phase(stock, close)
    
    phasor_components(stock, close)
    
    sine_wave(stock, close)
    
    trend_mode(stock, close)
    
    # Pattern Recognition
    
    # pattern_recognition(stock, open_, high, low, close)
    
    # Statistic Functions
    
    beta(stock, timeperiods, high, low)
    
    pearsons_correlation_coefficient(stock, timeperiods, high, low)
    
    linear_regression(stock, timeperiods, close)
    
    standard_deviation(stock, timeperiods, close)
    
    time_series_forecast(stock, timeperiods, close)
    
    variance(stock, timeperiods, close)
    
    # Forecast
    stock.loc[:, 'forecast'] = -close.diff(-1) / close
    
    # Buy Signal
    stock.loc[:, 'buy_signal'] = (stock.loc[:, 'forecast'] > 0).astype(float)
    
    return stock

In [60]:
groups = np.empty(len(symbols), dtype=object)

for i, symbol in tqdm(enumerate(symbols)):
    groups[i] = stocks_by_symbol.get_group(symbol)

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [61]:
stocks = np.empty(len(groups), dtype=object)

for i, group in tqdm(enumerate(groups)):
    stocks[i] = compute_indicators_test(group)

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s





In [62]:
stocks = pd.concat(stocks).replace([np.inf, -np.inf], np.nan).dropna()

In [63]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, QuantileTransformer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import chi2, SelectKBest

In [64]:
stocks.set_index('date', inplace=True)

In [65]:
COLUMNS = list(COLUMNS)

In [66]:
def plot_classifier(clf, X_train, y_train, X_test, y_test):    
    
    y_pred_train = clf.predict_proba(X_train)[:, 1]
    fpr_train, tpr_train, _ = roc_curve(y_train, y_pred_train)
    
    y_pred = clf.predict_proba(X_test)[:, 1]
    fpr, tpr, _ = roc_curve(y_test, y_pred)
    
    plt.plot([0, 1], [0, 1], 'k--', label='Coin Flip')
    plt.plot(fpr_train, tpr_train, label='Training Data')
    plt.plot(fpr, tpr, label='Testing Data')
    plt.xlabel('False positive rate')
    plt.ylabel('True positive rate')
    plt.title('ROC curve')
    plt.legend()
    plt.show()
     
    print(f'Train AUC: {roc_auc_score(y_train, y_pred_train)}')
    print(f'Test AUC: {roc_auc_score(y_test, y_pred)}')
    print(f'Test Accuracy: {accuracy_score(y_test, clf.predict(X_test))}')

In [67]:
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import MinMaxScaler, QuantileTransformer
# from imblearn.under_sampling import RandomUnderSampler

# X = stocks[COLUMNS]
# y = stocks['buy_signal']
    
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8)

In [68]:
# rus = RandomUnderSampler()
# X_train, y_train = rus.fit_resample(X_train, y_train)

In [69]:
# from sklearn.preprocessing import MinMaxScaler, QuantileTransformer

# scaler = QuantileTransformer(output_distribution='normal')
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

In [70]:
# from xgboost import XGBClassifier

# clf = XGBClassifier(n_jobs=-1, verbosity=1)

# clf.fit(X_train_scaled, y_train)

# plot_classifier(clf, X_train_scaled, y_train, X_test_scaled, y_test)

In [71]:
# importances = clf.feature_importances_
# indices = np.argsort(importances)[::-1]
# [(COLUMNS[i], importances[i]) for i in indices]

In [72]:
import json

with open('../models/columns.json', 'w+') as column_file:
    json.dump(list(COLUMNS), column_file)

In [73]:
stocks.to_hdf('../data/clean/all_stocks.h5', key='daily')