In [1]:
import requests
import pandas as pd
import numpy as np
import seaborn as sns
from bs4 import BeautifulSoup
import json
import talib
import matplotlib.pyplot as plt

In [2]:
%matplotlib inline

In [37]:
def get_datadf(symbol):
    raw_data = requests.get("https://query1.finance.yahoo.com/v8/finance/chart/"+ symbol +"?period1=1194546600&period2=1510165800&interval=1d").text
    raw_data = json.loads(raw_data)
    df = raw_data[u'chart'][u'result'][0][u'indicators'][u'quote'][0]
    df['unadjclose'] = raw_data[u'chart'][u'result'][0][u'indicators'][u'unadjclose'][0][u'unadjclose']
    df['adjclose'] = raw_data[u'chart'][u'result'][0][u'indicators'][u'adjclose'][0][u'adjclose']
    df['timestamp'] = raw_data[u'chart'][u'result'][0][u'timestamp']
    data = pd.DataFrame.from_dict(df)
    data['sym'] = symbol
    data.index = pd.to_datetime(data['timestamp'], unit='s').values
    data = data.dropna()
    data.drop(['unadjclose','timestamp'],axis=1, inplace=True)
    return standardize_adjclose(data)

In [4]:
def standardize_adjclose(data):
    ratio = data.adjclose / data.close
    data.open = data.open * ratio
    data.close = data.close * ratio
    data.high = data.high * ratio
    data.low = data.low * ratio
    data.drop(['adjclose'], axis=1, inplace=True)
    return data

In [5]:
def returns(arr):
    arr_trans = [0] * 5
    arr_trans[0] = [0] * len(arr.columns)
    arr_trans[1:] = (arr.iloc[1:,].values - arr.iloc[:-1,].values) / arr.iloc[:-1].values
    arr_trans[0] = np.array([0.] * len(arr.columns))
    return arr_trans

def standardize(arr):
    return (arr - arr.mean()) / arr.std()

## Indicators

In [6]:
def getIndicators(df):
    indicators = pd.DataFrame()
    macdfast, macdslow, signal, rsitime=12, 26, 9, 14
    macd12, macdsignal12, macdhist12 = talib.MACD(df.close.values, fastperiod=macdfast, slowperiod=macdslow, signalperiod=signal)
    rsi = talib.RSI(df.close.values, timeperiod=rsitime)
    indicators.loc[:,'rsi14'] = rsi
    
    macdfast, macdslow, signal, rsitime=6, 13, 4, 9
    macd6, macdsignal6, macdhist6 = talib.MACD(df.close.values, fastperiod=macdfast, slowperiod=macdslow, signalperiod=signal)    
    rsi = talib.RSI(df.close.values, timeperiod=rsitime)
    indicators.loc[:,'rsi9'] = rsi
    
    rsitime=5
    rsi = talib.RSI(df.close.values, timeperiod=rsitime)
    indicators.loc[:,'rsi5'] = rsi
    
    
    indicators.loc[:,'macd12'] = macd12
    indicators.loc[:,'macdhist12'] = macdhist12 
    
    indicators.loc[:,'macd6'] = macd6
    indicators.loc[:,'macdhist6'] = macdhist6     
    
    indicators.loc[:,'ema10'] = talib.EMA(df.close.values, timeperiod=10)
    indicators.loc[:,'ema20'] = talib.EMA(df.close.values, timeperiod=20)
    indicators.loc[:,'ema30'] = talib.EMA(df.close.values, timeperiod=30)
    indicators.loc[:,'ema40'] = talib.EMA(df.close.values, timeperiod=40)
    indicators.loc[:,'ema50'] = talib.EMA(df.close.values, timeperiod=50)
    
    indicators.loc[:,'arrondown7'], indicators.loc[:,'arronup7'] = talib.AROON(df.high.values, df.low.values, timeperiod=7)
    indicators.loc[:,'arrondown13'], indicators.loc[:,'arronup13'] = talib.AROON(df.high.values, df.low.values, timeperiod=13)
    
    upperband, middleband, lowerband = talib.BBANDS(df.close.values, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
    indicators.loc[:,'bbindic'] = (df.close.values - lowerband) / (upperband - lowerband)
    
    indicators.index = df.index
    return indicators

## Patterns

In [7]:
def getPatterns(df):
    patterns = pd.DataFrame()
    patterns['marubuzo'] = talib.CDLXSIDEGAP3METHODS(df.open.values, df.high.values, df.low.values, df.close.values)
    patterns['spinningtop'] = talib.CDLSPINNINGTOP(df.open.values, df.high.values, df.low.values, df.close.values)
    patterns['doji'] = talib.CDLDOJI(df.open.values, df.high.values, df.low.values, df.close.values)
    patterns['hammer'] = talib.CDLHAMMER(df.open.values, df.high.values, df.low.values, df.close.values)
    patterns['hangingman'] = talib.CDLHANGINGMAN(df.open.values, df.high.values, df.low.values, df.close.values)
    patterns['shootigstar'] = talib.CDLSHOOTINGSTAR(df.open.values, df.high.values, df.low.values, df.close.values)
    patterns['engulfing'] = talib.CDLENGULFING(df.open.values, df.high.values, df.low.values, df.close.values)
    patterns['piercing'] = talib.CDLPIERCING(df.open.values, df.high.values, df.low.values, df.close.values)
    patterns['darkcloudcover'] = talib.CDLDARKCLOUDCOVER(df.open.values, df.high.values, df.low.values, df.close.values)
    patterns['harami'] = talib.CDLHARAMI(df.open.values, df.high.values, df.low.values, df.close.values)
    patterns['gapthreemethod'] = talib.CDLXSIDEGAP3METHODS(df.open.values, df.high.values, df.low.values, df.close.values)
    patterns['morningstar'] = talib.CDLMORNINGSTAR(df.open.values, df.high.values, df.low.values, df.close.values)
    patterns['eveningstar'] = talib.CDLEVENINGSTAR(df.open.values, df.high.values, df.low.values, df.close.values)
    patterns.index = df.index
    patterns = patterns / 100
    return patterns

In [30]:
def merge_all(data):
    indicators = getIndicators(data)
    indicators = indicators.dropna()

    #patterns = getPatterns(data)

    #sym_data = pd.merge(indicators, patterns, left_index=True, right_index=True)
    final_data =  pd.merge(indicators, data, left_index=True, right_index=True)
    return final_data

In [10]:
def transformations(data):
    
    data['high'] = (data['high'] - data['close']) /  data['close']
    data['low'] = (data['close'] - data['low']) /  data['close']
    data['open'] = (data['close'] - data['open']) /  data['close']
    data['volume'] = standardize(data.volume)

    data.loc[:,['close','macd6','macd12','ema10','ema20','ema30','ema40','ema50']] = returns(data.loc[:,['close','macd6','macd12','ema10','ema20','ema30','ema40','ema50']])
    data.loc[:,['macdhist6','macdhist12','arrondown7','arronup7','arrondown13', 'arronup13', 'rsi14','rsi9','rsi5']] = standardize(data.loc[:,['macdhist6','macdhist12','arrondown7','arronup7','arrondown13', 'arronup13', 'rsi14','rsi9','rsi5']])    
    
    data.loc[data.close < 0,'volume'] = data.loc[data.close < 0,'volume'] * -1
    data['signal'] = data['close'].apply(lambda x:1 if x >= 0.03 else 0)
    data.signal = np.append(data.signal[1:].values,0)
    
    data.dropna(inplace=True)
    return data

In [25]:
nifty_all = ['ADANIPORTS.NS', 'AMBUJACEM.NS', 'ASIANPAINT.NS', 'AUROPHARMA.NS', 'AXISBANK.NS', \
             'BAJFINANCE.NS', 'BPCL.NS', 'BHARTIARTL.NS', 'INFRATEL.NS', 'BOSCHLTD.NS', 'CIPLA.NS', 'COALINDIA.NS', \
             'DRREDDY.NS', 'EICHERMOT.NS', 'GAIL.NS', 'HCLTECH.NS', 'HDFCBANK.NS', 'HEROMOTOCO.NS', 'HINDALCO.NS',\
             'HINDPETRO.NS', 'HINDUNILVR.NS', 'HDFCBANK.NS', 'ITC.NS', 'ICICIBANK.NS', 'IBREALEST.NS', 'IOC.NS',\
              'INFY.NS', 'KOTAKBANK.NS', 'LT.NS', 'LUPIN.NS', 'M&M.NS', 'MARUTI.NS', 'NTPC.NS', #'INDUSINDBK.NS',\ 
             'ONGC.NS', 'POWERGRID.NS', 'RELIANCE.NS', 'SBIN.NS', 'SUNPHARMA.NS', 'TCS.NS', 'TATAMOTORS.NS', 'TATASTEEL.NS', \
             'TECHM.NS', 'UPL.NS', 'ULTRACEMCO.NS', 'VEDL.NS', 'WIPRO.NS', 'YESBANK.NS', 'ZEEL.NS']

In [44]:
stocks = pd.DataFrame()

In [45]:
for stock in nifty_all:
    print(stock)
    data = get_datadf(stock)
    merge_data = merge_all(data)
    #stocks = stocks.append(transformations(merge_data))
    stocks = stocks.append(merge_data)

ADANIPORTS.NS
AMBUJACEM.NS
ASIANPAINT.NS
AUROPHARMA.NS
AXISBANK.NS
BAJFINANCE.NS
BPCL.NS
BHARTIARTL.NS
INFRATEL.NS
BOSCHLTD.NS
CIPLA.NS
COALINDIA.NS
DRREDDY.NS
EICHERMOT.NS
GAIL.NS
HCLTECH.NS
HDFCBANK.NS
HEROMOTOCO.NS
HINDALCO.NS
HINDPETRO.NS
HINDUNILVR.NS
HDFCBANK.NS
ITC.NS
ICICIBANK.NS
IBREALEST.NS
IOC.NS
INFY.NS
KOTAKBANK.NS
LT.NS
LUPIN.NS
M&M.NS
MARUTI.NS
NTPC.NS
ONGC.NS
POWERGRID.NS
RELIANCE.NS
SBIN.NS
SUNPHARMA.NS
TCS.NS
TATAMOTORS.NS
TATASTEEL.NS
TECHM.NS
UPL.NS
ULTRACEMCO.NS
VEDL.NS
WIPRO.NS
YESBANK.NS
ZEEL.NS


In [29]:
pd.options.display.max_columns=100

In [46]:
stocks.shape

(113480, 23)

In [41]:
stocks.head()

Unnamed: 0,rsi14,rsi9,rsi5,macd12,macdhist12,macd6,macdhist6,ema10,ema20,ema30,...,arrondown13,arronup13,bbindic,close,high,low,open,volume,sym,signal
2008-02-06 03:45:00,-1.257544,-1.110235,-0.847772,0.0,-0.492306,0.0,1.024637,0.0,0.0,0.0,...,-0.805341,-1.411509,0.281697,0.0,0.021548,0.034056,0.005573,-0.581843,ADANIPORTS.NS,0
2008-02-07 03:45:00,-1.331247,-1.203711,-1.001014,0.000361,-0.400563,0.004917,0.591781,-0.01108,-0.011366,-0.009723,...,-1.02014,-1.411509,0.273414,-0.013932,0.042386,0.005338,-0.036107,0.548305,ADANIPORTS.NS,0
2008-02-08 03:45:00,-1.413141,-1.308333,-1.170119,0.003394,-0.34603,0.013401,0.298939,-0.011812,-0.011701,-0.010033,...,-1.234939,-0.989456,0.260618,-0.015322,0.058606,0.024106,-0.033097,0.425397,ADANIPORTS.NS,0
2008-02-11 03:45:00,-1.852071,-1.82245,-1.796758,0.077587,-0.765388,0.269091,-0.886418,-0.025765,-0.018566,-0.014594,...,1.557452,-1.200482,0.10209,-0.092915,0.103768,0.022778,-0.103768,0.529513,ADANIPORTS.NS,0
2008-02-12 03:45:00,-2.008179,-1.986643,-1.94233,0.077898,-1.14052,0.186608,-1.470301,-0.028372,-0.0204,-0.015985,...,1.557452,-1.411509,0.052893,-0.042042,0.078747,0.037722,-0.064142,0.394055,ADANIPORTS.NS,0


In [None]:
stocks.head(20)

In [47]:
pd.DataFrame.to_csv(stocks, 'output.csv')

In [90]:
columns = np.append(np.array(columns), 'signal')

In [91]:
stocks[columns]

Unnamed: 0,low,high,close,volume,ema50,open,ema40,ema10,ema30,ema20,macdhist12,rsi14,macdhist6,macd12,bbindic,rsi5,macd6,rsi9,arronup13,arrondown13,arrondown7,arronup7,signal
2007-11-27 03:45:00,0.197080,0.079249,0.000000,7.886502,0.000000e+00,0.197080,0.000000,0.000000e+00,0.000000,0.000000,-3.904582,0.544222,-2.115770,0.000000,0.964139,0.853000,0.000000,1.103356,1.112598,-1.223850,-1.194483,1.288699,0
2007-11-28 03:45:00,0.011695,0.117514,-0.077164,-6.180263,0.000000e+00,-0.096045,0.000000,0.000000e+00,0.000000,0.000000,-3.904582,0.544222,-2.115770,0.000000,0.964139,0.853000,0.000000,1.103356,1.112598,-1.223850,-1.194483,1.288699,0
2007-11-29 03:45:00,0.050395,0.030440,0.002260,7.357059,0.000000e+00,-0.020293,0.000000,0.000000e+00,0.000000,0.000000,-3.904582,0.544222,-2.115770,0.000000,0.964139,0.853000,0.000000,1.103356,1.112598,-1.223850,-1.194483,1.288699,1
2007-11-30 03:45:00,0.041981,0.030140,0.047351,6.530336,0.000000e+00,0.041981,0.000000,0.000000e+00,0.000000,0.000000,-3.904582,0.544222,-2.115770,0.000000,0.964139,0.853000,0.000000,1.103356,1.112598,-1.223850,-1.194483,1.288699,1
2007-12-03 03:45:00,0.059184,0.014592,0.054898,3.805932,0.000000e+00,0.041071,0.000000,0.000000e+00,0.000000,0.000000,-3.904582,0.544222,-2.115770,0.000000,0.964139,0.853000,0.000000,1.103356,1.112598,-1.223850,-1.194483,1.288699,1
2007-12-04 03:45:00,0.069590,0.005720,0.070408,6.432476,0.000000e+00,0.059342,0.000000,0.000000e+00,0.000000,0.000000,-3.904582,0.544222,-2.115770,0.000000,0.964139,0.853000,0.000000,1.103356,1.112598,-1.223850,-1.194483,1.288699,1
2007-12-05 03:45:00,0.031365,0.013838,0.033365,3.557482,0.000000e+00,0.031365,0.000000,0.000000e+00,0.000000,0.000000,-3.904582,0.544222,-2.115770,0.000000,0.964139,1.092880,0.000000,1.103356,1.112598,-1.223850,-1.194483,1.288699,0
2007-12-06 03:45:00,0.035868,0.016054,0.005627,1.888735,0.000000e+00,0.035868,0.000000,0.000000e+00,0.000000,0.000000,-3.904582,0.544222,-2.115770,0.000000,0.964139,1.135581,0.000000,1.103356,1.112598,-1.223850,-1.194483,1.288699,0
2007-12-07 03:45:00,0.020000,0.030636,0.009082,2.713354,0.000000e+00,0.000000,0.000000,0.000000e+00,0.000000,0.000000,-3.904582,0.544222,-2.115770,0.000000,0.964139,1.214156,0.000000,1.103356,1.112598,-1.223850,-0.813109,1.288699,0
2007-12-10 03:45:00,0.011178,0.033953,-0.024045,-0.733337,0.000000e+00,-0.026175,0.000000,0.000000e+00,0.000000,0.000000,-3.904582,0.544222,-2.115770,0.000000,0.964139,0.562740,0.000000,1.103356,1.112598,-1.223850,-1.194483,0.913710,0


In [18]:
from sklearn.model_selection import train_test_split

In [32]:
y = stocks.signal
x = stocks.drop('signal', axis=1)

In [33]:
trainx, testx, trainy, testy = train_test_split(x,y, test_size=0.2)

# Feature Selection

In [34]:
from sklearn.ensemble import RandomForestClassifier

In [35]:
clf = RandomForestClassifier(n_estimators=800, verbose=2, n_jobs=4)

In [36]:
clf.fit(x, y)

building tree 1 of 800building tree 2 of 800

building tree 3 of 800
 building tree 4 of 800
building tree 5 of 800
building tree 6 of 800
building tree 7 of 800
building tree 8 of 800
building tree 9 of 800
building tree 10 of 800
building tree 11 of 800
building tree 12 of 800
building tree 13 of 800
building tree 14 of 800
building tree 15 of 800
building tree 16 of 800
building tree 17 of 800
building tree 18 of 800
building tree 19 of 800
building tree 20 of 800
building tree 21 of 800
building tree 22 of 800
building tree 23 of 800
building tree 24 of 800
building tree 25 of 800
building tree 26 of 800
building tree 27 of 800
building tree 28 of 800
building tree 29 of 800
building tree 30 of 800
building tree 31 of 800
building tree 32 of 800
building tree 33 of 800
building tree 34 of 800
building tree 35 of 800
building tree 36 of 800


[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:   21.8s


building tree 37 of 800
building tree 38 of 800
building tree 39 of 800
building tree 40 of 800
building tree 41 of 800
building tree 42 of 800
building tree 43 of 800
building tree 44 of 800
building tree 45 of 800
building tree 46 of 800
building tree 47 of 800
building tree 48 of 800
building tree 49 of 800
building tree 50 of 800
building tree 51 of 800
building tree 52 of 800
building tree 53 of 800
building tree 54 of 800
building tree 55 of 800
building tree 56 of 800
building tree 57 of 800
building tree 58 of 800
building tree 59 of 800
building tree 60 of 800
building tree 61 of 800
building tree 62 of 800
building tree 63 of 800
building tree 64 of 800
building tree 65 of 800
building tree 66 of 800
building tree 67 of 800
building tree 68 of 800
building tree 69 of 800
building tree 70 of 800
building tree 71 of 800
building tree 72 of 800
building tree 73 of 800
building tree 74 of 800
building tree 75 of 800
building tree 76 of 800
building tree 77 of 800
building tree 78

[Parallel(n_jobs=4)]: Done 154 tasks      | elapsed:  1.7min


building tree 158 of 800
building tree 159 of 800
building tree 160 of 800
building tree 161 of 800
building tree 162 of 800
building tree 163 of 800
building tree 164 of 800
building tree 165 of 800
building tree 166 of 800
building tree 167 of 800
building tree 168 of 800
building tree 169 of 800
building tree 170 of 800
building tree 171 of 800
building tree 172 of 800
building tree 173 of 800
building tree 174 of 800
building tree 175 of 800
building tree 176 of 800
building tree 177 of 800
building tree 178 of 800
building tree 179 of 800
building tree 180 of 800
building tree 181 of 800
building tree 182 of 800
building tree 183 of 800
building tree 184 of 800
building tree 185 of 800
building tree 186 of 800
building tree 187 of 800
building tree 188 of 800
building tree 189 of 800
building tree 190 of 800
building tree 191 of 800
building tree 192 of 800
building tree 193 of 800
building tree 194 of 800
building tree 195 of 800
building tree 196 of 800
building tree 197 of 800


[Parallel(n_jobs=4)]: Done 357 tasks      | elapsed:  3.8min


building tree 362 of 800
building tree 363 of 800
building tree 364 of 800
building tree 365 of 800
building tree 366 of 800
building tree 367 of 800
building tree 368 of 800
building tree 369 of 800
building tree 370 of 800
building tree 371 of 800
building tree 372 of 800
building tree 373 of 800
building tree 374 of 800
building tree 375 of 800
building tree 376 of 800
building tree 377 of 800
building tree 378 of 800
building tree 379 of 800
building tree 380 of 800
building tree 381 of 800
building tree 382 of 800
building tree 383 of 800
building tree 384 of 800
building tree 385 of 800
building tree 386 of 800
building tree 387 of 800
building tree 388 of 800
building tree 389 of 800
building tree 390 of 800
building tree 391 of 800
building tree 392 of 800
building tree 393 of 800
building tree 394 of 800
building tree 395 of 800
building tree 396 of 800
building tree 397 of 800
building tree 398 of 800
building tree 399 of 800
building tree 400 of 800
building tree 401 of 800


[Parallel(n_jobs=4)]: Done 640 tasks      | elapsed:  6.8min


building tree 644 of 800
building tree 645 of 800
building tree 646 of 800
building tree 647 of 800
building tree 648 of 800
building tree 649 of 800
building tree 650 of 800
building tree 651 of 800
building tree 652 of 800
building tree 653 of 800
building tree 654 of 800
building tree 655 of 800
building tree 656 of 800
building tree 657 of 800
building tree 658 of 800
building tree 659 of 800
building tree 660 of 800
building tree 661 of 800
building tree 662 of 800
building tree 663 of 800
building tree 664 of 800
building tree 665 of 800
building tree 666 of 800
building tree 667 of 800
building tree 668 of 800
building tree 669 of 800
building tree 670 of 800
building tree 671 of 800
building tree 672 of 800
building tree 673 of 800
building tree 674 of 800
building tree 675 of 800
building tree 676 of 800
building tree 677 of 800
building tree 678 of 800
building tree 679 of 800
building tree 680 of 800
building tree 681 of 800
building tree 682 of 800
building tree 683 of 800


[Parallel(n_jobs=4)]: Done 800 out of 800 | elapsed:  8.5min finished


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=800, n_jobs=4, oob_score=False, random_state=None,
            verbose=2, warm_start=False)

In [37]:
important_feature = clf.feature_importances_

In [38]:
indices = np.argsort(clf.feature_importances_[::-1])

In [39]:
features_rf = pd.DataFrame(important_feature, index=x.columns, columns=['features'])

In [40]:
features_rf.sort_values(by='features', ascending=False)

Unnamed: 0,features
low,0.053406
high,0.053275
close,0.049062
volume,0.048256
ema50,0.047824
open,0.045782
ema40,0.045447
ema10,0.045432
ema30,0.045198
ema20,0.044917


In [89]:
columns = features_rf.sort_values(by='features', ascending=False)[:22].index

# Feature Scaling

In [42]:
from sklearn.preprocessing import StandardScaler

In [43]:
ss = StandardScaler()

In [44]:
trainx_scale = ss.fit_transform(trainx.loc[:,columns])
testx_scale = ss.fit_transform(testx.loc[:,columns])

# Logistic Regression

In [77]:
from sklearn.linear_model import LogisticRegression

In [118]:
clf_lr = LogisticRegression(penalty='l2',C=1000.0)

In [119]:
clf_lr.fit(trainx_scale, trainy)

LogisticRegression(C=1000.0, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)

In [120]:
test_predy = clf_lr.predict(testx_scale)

In [104]:
from sklearn.metrics import confusion_matrix

In [121]:
confusion_matrix(testy, test_predy)

array([[17202,    14],
       [ 1154,    11]], dtype=int64)

In [45]:
import xgboost as xgb



In [46]:
gbm = xgb.XGBClassifier(max_depth=7, n_estimators=1500, learning_rate=0.6).fit(trainx_scale , trainy)

In [47]:
test_predy = gbm.predict(testx_scale)

In [55]:
test_predy_prob = gbm.predict_proba(testx_scale)

In [79]:
testy_df = pd.DataFrame({'actual':testy.values, 'prob_1':np.transpose(test_predy_prob)[0], 'prob_2':np.transpose(test_predy_prob)[1]})

In [83]:
pd.options.display.max_rows = 1000

In [85]:
testy_df.sort_values(by='prob_1', ascending=False)

Unnamed: 0,actual,prob_1,prob_2
16631,0,1.000000,2.103014e-08
1173,0,1.000000,8.537934e-09
7948,0,1.000000,2.026681e-08
19697,0,1.000000,1.574034e-08
21737,0,1.000000,7.395001e-09
16403,0,1.000000,1.645560e-08
12145,0,1.000000,2.966195e-08
1448,0,1.000000,9.135090e-09
6703,0,1.000000,6.377564e-09
9834,0,1.000000,1.192477e-08


In [66]:
testy_df

Unnamed: 0,signal
2009-09-01 03:45:00,0
2010-04-21 03:45:00,0
2017-04-10 03:45:00,0
2009-01-20 03:45:00,0
2016-06-06 03:45:00,0
2015-11-19 03:45:00,0
2012-01-30 03:45:00,0
2015-03-10 03:45:00,0
2008-06-23 03:45:00,0
2013-04-08 03:45:00,0


In [61]:
pd.DataFrame({'prob':test_predy_prob, 'actual':testy})

Exception: Data must be 1-dimensional

In [51]:
from sklearn.metrics import confusion_matrix

In [52]:
def accuracy(y_true, y_pred):
    from sklearn.metrics import precision_recall_fscore_support
    print(precision_recall_fscore_support(testy, test_predy, average='macro'))
    print(confusion_matrix(testy, test_predy))
accuracy(testy, test_predy)

(0.57351823434483162, 0.52053222824773138, 0.52170745186636736, None)
[[21234   407]
 [ 1900   121]]


In [50]:
pd.Series(testy).value_counts()

0    21641
1     2021
Name: signal, dtype: int64