In [1]:
import pandas as pd
from collections import Counter
import numpy as np
from sklearn import svm, neighbors
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import HTML

In [2]:
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')

In [63]:
def process_data_for_labels(ticker):
    hm_days = 7
    df = pd.read_csv('snp500_joined_closes.csv', index_col=0)
    if '.T' in ticker:
        print('Start correlate and predict ' + ticker + ' against Nikkei 225 components')
        df = pd.read_csv('nikkei225_joined_closes.csv', index_col=0)
    elif '.L' in ticker:
        print('Start correlate and predict ' + ticker + ' against FTSE 100 components')
        df = pd.read_csv('ftse_joined_closes.csv', index_col=0)
    elif '.BO' in ticker:
        print('Start correlate and predict ' + ticker + ' against BSE components')
        df = pd.read_csv('bse_joined_closes.csv', index_col=0)
    elif '.NS' in ticker:
        print('Start correlate and predict ' + ticker + ' against NIFTY components')
        df = pd.read_csv('nifty_joined_closes.csv', index_col=0)
    else:
        print('Start correlate and predict ' + ticker + ' against S&P 500 components')
        df = pd.read_csv('snp500_joined_closes.csv', index_col=0)
    
    tickers = df.columns.values.tolist()
    df.fillna(0, inplace=True)

    for i in range(1,hm_days+1):
        df['{}_{}d'.format(ticker,i)] = (df[ticker].shift(-i) - df[ticker]) / df[ticker]

    df.fillna(0, inplace=True)
    return tickers, df

In [64]:
def buy_sell_hold(*args):
    cols = [c for c in args]
    requirement = 0.02
    for col in cols:
        if col > requirement:
            return 1
        if col < -requirement:
            return -1
    return 0

In [65]:
def extract_featuresets(ticker):
    tickers, df = process_data_for_labels(ticker)

    df['{}_target'.format(ticker)] = list(map( buy_sell_hold,
                                               df['{}_1d'.format(ticker)],
                                               df['{}_2d'.format(ticker)],
                                               df['{}_3d'.format(ticker)],
                                               df['{}_4d'.format(ticker)],
                                               df['{}_5d'.format(ticker)],
                                               df['{}_6d'.format(ticker)],
                                               df['{}_7d'.format(ticker)] ))


    vals = df['{}_target'.format(ticker)].values.tolist()
    str_vals = [str(i) for i in vals]
    print('Data spread:',Counter(str_vals))

    df.fillna(0, inplace=True)
    df = df.replace([np.inf, -np.inf], np.nan)
    df.dropna(inplace=True)

    df_vals = df[[ticker_name for ticker_name in tickers]].pct_change()
    df_vals = df_vals.replace([np.inf, -np.inf], 0)
    df_vals.fillna(0, inplace=True)

    X = df_vals.values
    y = df['{}_target'.format(ticker)].values

    return X,y,df


In [66]:
def do_ml(ticker):
    X, y, df = extract_featuresets(ticker)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.25)

    #clf = neighbors.KNeighborsClassifier()

    clf = VotingClassifier([('lsvc', svm.LinearSVC()),
                            ('knn', neighbors.KNeighborsClassifier()),
                            ('rfor',RandomForestClassifier())])


    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('accuracy:',confidence)
    predictions = clf.predict(X_test)
    print('predicted class counts:',Counter(predictions))
    print(ticker)
    print()
    return confidence

Usage: Source yahoo finance tickers
    
    Nikkei Index: Ticker 7203.T for Toyota
        
    S&P 500 Index: Ticker AAPL for Apple
        
    FTSE 100 Index: Ticker HSBA.L for HSBC bank
        
    BSE Index: Ticker INFY.BO for Infosys
    
    Nifty Index: INFY.NS for Infosys

Result: If the stock goes up more than 2% in next 7 days, Its a BUY ( BUY->1)
        
        If the stock goes down more than 2% in next 7 days, Its a SELL ( SELL->-1)
        
        Its a HOLD if it does not move 2% in next 7 days ( HOLD->0)
        
        accuracy is % of chance,  0.5 is 50%, Better the accuracy, more chance of prediction

In [67]:
interact(do_ml, ticker='AAPL');

interactive(children=(Text(value='AAPL', description='ticker'), Output()), _dom_classes=('widget-interact',))