## Can recent performance can predict stock movements?

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime

%matplotlib inline

In [3]:
#import ^GSPC.csv file (from Yahoo Finance)
prices = pd.read_csv('../data/BTC-14_17.csv')
prices.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800
1,2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200
2,2014-09-19,424.102997,427.834991,384.532013,394.79599,394.79599,37919700
3,2014-09-20,394.673004,423.29599,389.882996,408.903992,408.903992,36863600
4,2014-09-21,408.084991,412.425995,393.181,398.821014,398.821014,26580100


In [4]:
#drop rows with null values
prices = prices.dropna()

#Add gain and gain_% columns
#prices['gain'] = prices['Close'] - prices['Open'](this doesn't work because price changes overnight)
prices['gain'] = prices['Close']-prices['Close'].shift()
prices['gain_%'] = prices['gain'] / prices['Close'].shift(1)

#Drop unnecessary columns
prices = prices.drop(columns=['Open','High','Low','Adj Close','Volume'])

#Rename Columns
prices = prices.rename(columns = {'Date':'date', 'Close':'close'})

prices.head()

Unnamed: 0,date,close,gain,gain_%
0,2014-09-17,457.334015,,
1,2014-09-18,424.440002,-32.894013,-0.071926
2,2014-09-19,394.79599,-29.644012,-0.069843
3,2014-09-20,408.903992,14.108002,0.035735
4,2014-09-21,398.821014,-10.082978,-0.024659


In [5]:
#Add pos_neg column to determine gain vs. loss

prices['pos_neg'] = ''
for ind, row in prices.iterrows():
    if row['gain_%'] > 0:
            prices.loc[ind, 'pos_neg'] = 'pos'
    else:
        prices.loc[ind, 'pos_neg'] = 'neg'
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg
0,2014-09-17,457.334015,,,neg
1,2014-09-18,424.440002,-32.894013,-0.071926,neg
2,2014-09-19,394.79599,-29.644012,-0.069843,neg
3,2014-09-20,408.903992,14.108002,0.035735,pos
4,2014-09-21,398.821014,-10.082978,-0.024659,neg


In [6]:
#Find consecutive days market has moved in one direction
#y.groupby((y != y.shift()).cumsum()).cumcount() + 1
#https://stackoverflow.com/questions/27626542/counting-consecutive-positive-value-in-python-array

prices['streak'] = prices['pos_neg'].groupby((prices['pos_neg'] != prices['pos_neg'].shift()).cumsum()).cumcount() + 1
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak
0,2014-09-17,457.334015,,,neg,1
1,2014-09-18,424.440002,-32.894013,-0.071926,neg,2
2,2014-09-19,394.79599,-29.644012,-0.069843,neg,3
3,2014-09-20,408.903992,14.108002,0.035735,pos,1
4,2014-09-21,398.821014,-10.082978,-0.024659,neg,1


In [7]:
#Assign to groups when pos_neg changes
#https://stackoverflow.com/questions/60334671/pandas-dataframe-how-to-find-consecutive-rows-that-meet-some-conditions

prices['g'] = prices['pos_neg'].ne(prices['pos_neg'].shift()).cumsum()
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g
0,2014-09-17,457.334015,,,neg,1,1
1,2014-09-18,424.440002,-32.894013,-0.071926,neg,2,1
2,2014-09-19,394.79599,-29.644012,-0.069843,neg,3,1
3,2014-09-20,408.903992,14.108002,0.035735,pos,1,2
4,2014-09-21,398.821014,-10.082978,-0.024659,neg,1,3


In [8]:
#Use cumsum to find cummulative sums
#https://stackoverflow.com/questions/32890124/pandas-dataframe-running-sum-with-reset/32891081#32891081

prices['cum']= prices.groupby(['g'])['gain_%'].cumsum()
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g,cum
0,2014-09-17,457.334015,,,neg,1,1,
1,2014-09-18,424.440002,-32.894013,-0.071926,neg,2,1,-0.071926
2,2014-09-19,394.79599,-29.644012,-0.069843,neg,3,1,-0.141768
3,2014-09-20,408.903992,14.108002,0.035735,pos,1,2,0.035735
4,2014-09-21,398.821014,-10.082978,-0.024659,neg,1,3,-0.024659


In [9]:
#assign to bins based on % Gain and streak

# use pd.cut instead?
# https://stackoverflow.com/questions/46472809/python-binning-based-on-2-columns-in-pandas

prices['bin'] = ''
for ind, row in prices.iterrows():
    if row['cum'] >= 0 and row['cum'] < 0.0008 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'A1'
    elif row['cum'] >= 0.0008 and row['cum'] < 0.0016 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'B1'
    elif row['cum'] >= 0.0016 and row['cum'] < 0.0025 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'C1'
    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'D1'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'E1'
    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'F1'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'G1'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'H1'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'I1'
    elif row['cum'] >= 0.0175 and row['cum'] < 0.0275 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'J1'
#    elif row['cum'] >= 0.0200 and row['cum'] < 0.0225 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'I1'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0275 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'J1'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'K1'
    elif row['cum'] >= 0.0275 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'K1'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'M1'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'N1'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'O1'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'P1'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'Q1'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 1:        
#            prices.loc[ind, 'bin'] = 'R1'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'S1'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'T1'
#    elif row['cum'] >= 0.0500 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'U1'
    elif row['cum'] > 0 and row['cum'] < 0.0025 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'A2'
    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'B2'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'C2'
    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'D2'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'E2'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'F2'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'G2'
    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'H2'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0250 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'I2'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'J2'
    elif row['cum'] >= 0.0250 and row['cum'] < 0.0300 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'K2'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'L2'
    elif row['cum'] >= 0.0300 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'M2'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'N2'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'O2'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'P2'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'Q2'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'R2'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'S2'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'T2'
#    elif row['cum'] >= 0.0500 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'U2'
    elif row['cum'] > 0 and row['cum'] < 0.0050 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'A3'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'B3'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0100 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'C3'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'D2'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0150 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'E3'
#    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'F3'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0200 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'G3'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'H3'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0250 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'I3'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'J3'
    elif row['cum'] >= 0.0250 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'K3'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'L3'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'M3'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'N3'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'O3'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'P3'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'Q3'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'R3'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'S3'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'T3'
#    elif row['cum'] >= 0.0500 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'U3'
    elif row['cum'] > 0 and row['cum'] < 0.0125 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = 'A4'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'B4'
#    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'C4'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'D4'
#    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'E4'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0200 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = 'F4'
#    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'G4'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'H4'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0275 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = 'I4'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'J4'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'K4'
    elif row['cum'] >= 0.0275 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = 'L4'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'M4'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'N4'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'O4'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'P4'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'Q4'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'R4'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'S4'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'T4'
#    elif row['cum'] >= 0.0500 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'U4'
    elif row['cum'] > 0 and row['cum'] < 0.0100 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'A5'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'B5'
#    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'C2'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'D5'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0200 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'E5'
#    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'F5'
#    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'G5'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'H5'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0300 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'I5'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'J5'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'K5'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'L5'
    elif row['cum'] >= 0.0300 and row['cum'] < 0.0400 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'M5'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'N5'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'O5'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'P5'
    elif row['cum'] >= 0.0400 and row['cum'] < 0.0500 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'Q5'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'R5'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'S5'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'T5'
    elif row['cum'] >= 0.0500 and row['streak'] >= 5:
            prices.loc[ind, 'bin'] = 'U5'
    elif row['cum'] >= -0.0008 and row['cum'] < 0.0000 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-A1'
    elif row['cum'] >= -0.0016 and row['cum'] < 0.0008 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-B1'
    elif row['cum'] >= -0.0025 and row['cum'] < 0.0016 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-B1'
    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-D1'
    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-E1'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-F1'
    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-G1'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-H1'
    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-I1'
    elif row['cum'] >= -0.0275 and row['cum'] < -0.0175 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-J1'
#    elif row['cum'] >= -0.0225 and row['cum'] < -0.0200 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-I1'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0225 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-J1'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-K1'
    elif row['cum'] < -0.0275 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-K1'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-M1'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-N1'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-O1'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-P1'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-Q1'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-R1'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-S1'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-T1'
#    elif row['cum'] < -0.0500 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-U1'
    elif row['cum'] >= -0.0025 and row['cum'] < 0.0000 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-A2'
    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-B2'
    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-C2'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-D2'
    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-E2'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-F2'
    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-G2'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-H2'
    elif row['cum'] >= -0.0250 and row['cum'] < -0.0200 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-I2'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-J2'
    elif row['cum'] >= -0.0300 and row['cum'] < -0.0250 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-K2'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-L2'
    elif row['cum'] < -0.0300 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-M2'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-N2'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-O2'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-P2'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-Q2'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-R2'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-S2'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-T2'
#    elif row['cum'] < -0.0500 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-U2'
    elif row['cum'] >= -0.0050 and row['cum'] < 0.0000 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-A3'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-B3'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0050 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-C3'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-D2'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0100 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-E3'
#    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-F3'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0150 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-G3'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-H3'
    elif row['cum'] >= -0.0250 and row['cum'] < -0.0200 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-I3'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-J3'
    elif row['cum'] < -0.0250 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-K3'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-L3'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-M3'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-N3'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-O3'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-P3'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-Q3'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-R3'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-S3'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-T3'
#    elif row['cum'] < -0.0500 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-U3'
    elif row['cum'] >= -0.0125 and row['cum'] < 0.0000 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = '-A4'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-B4'
#    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-C4'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-D4'
#    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-E4'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0125 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = '-F4'
#    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-G4'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-H4'
    elif row['cum'] >= -0.0275 and row['cum'] < -0.0200 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = '-I4'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-J4'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-K4'
    elif row['cum'] < -0.0275 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = '-L4'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-M4'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-N4'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-O4'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-P4'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-Q4'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-R4'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-S4'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-T4'
#    elif row['cum'] < -0.0500 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-U4'
    elif row['cum'] >= -0.0100 and row['cum'] < 0.0000 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-A5'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-B5'
#    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-C2'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-D5'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0100 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-E5'
#    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-F5'
#    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-G5'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-H5'
    elif row['cum'] >= -0.0300 and row['cum'] < -0.0200 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-I5'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-J5'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-K5'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-L5'
    elif row['cum'] >= -0.0400 and row['cum'] < -0.0300 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-M5'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-N5'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-O5'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-P5'
    elif row['cum'] >= -0.0500 and row['cum'] < -0.0400 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-Q5'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-R5'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-S5'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-T5'
    elif row['cum'] < -0.0500 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-U5'
    elif row['cum'] < 0.0500 and row['streak'] > 5:
            prices.loc[ind, 'bin'] = 'V6'
    elif row['cum'] >= 0.0500 and row['streak'] > 5:
            prices.loc[ind, 'bin'] = 'W6'
    elif row['cum'] > -0.0500 and row['streak'] > 5:
            prices.loc[ind, 'bin'] = '-V6'
    elif row['cum'] <= -0.0500 and row['streak'] > 5:
            prices.loc[ind, 'bin'] = '-W6'
    else:
        prices.loc[ind, 'bin'] ='z'
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g,cum,bin
0,2014-09-17,457.334015,,,neg,1,1,,z
1,2014-09-18,424.440002,-32.894013,-0.071926,neg,2,1,-0.071926,-M2
2,2014-09-19,394.79599,-29.644012,-0.069843,neg,3,1,-0.141768,-K3
3,2014-09-20,408.903992,14.108002,0.035735,pos,1,2,0.035735,K1
4,2014-09-21,398.821014,-10.082978,-0.024659,neg,1,3,-0.024659,-J1


In [10]:
#Make sure all rows are assigned to a bin
z = prices.loc[prices['bin'] == 'z']
z

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g,cum,bin
0,2014-09-17,457.334015,,,neg,1,1,,z


In [11]:
#new column to shift cum values up one row
#df['gdp'] = df['gdp'].shift(-1)
prices['next_day'] = prices['gain_%'].shift(-1)
prices['p_n'] = prices['pos_neg'].shift(-1)

#drop 'g' column
prices = prices.drop(columns=['g'])
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,cum,bin,next_day,p_n
0,2014-09-17,457.334015,,,neg,1,,z,-0.071926,neg
1,2014-09-18,424.440002,-32.894013,-0.071926,neg,2,-0.071926,-M2,-0.069843,neg
2,2014-09-19,394.79599,-29.644012,-0.069843,neg,3,-0.141768,-K3,0.035735,pos
3,2014-09-20,408.903992,14.108002,0.035735,pos,1,0.035735,K1,-0.024659,neg
4,2014-09-21,398.821014,-10.082978,-0.024659,neg,1,-0.024659,-J1,0.008352,pos


## Create new df's

In [12]:
#get average total return for next day for each bin, then convert to df
#df.groupby('Column1')['Column2'].mean()

df_1 = prices.groupby('bin')['next_day'].mean().to_frame().reset_index()
df_1 = df_1.rename(columns = {'next_day':'next_day_avg'})

df_1.head()

Unnamed: 0,bin,next_day_avg
0,-A1,0.002914
1,-A4,0.005133
2,-B1,0.005484
3,-B2,-0.002532
4,-C2,0.000434


In [13]:
#df_1.loc[df_1['bin'] == 'F4']

#df_1.head()

In [22]:
#Get count for pos and neg returns by bin and p_n, then get counts and %

g = prices.groupby('bin')['p_n']
df_2 = pd.concat([g.value_counts(), 
                g.value_counts(normalize=True).mul(100)],axis=1, keys=('counts','percentage')).reset_index()

df_2.head(60)

Unnamed: 0,bin,p_n,counts,percentage
0,-A1,pos,6,54.545455
1,-A1,neg,5,45.454545
2,-A4,pos,1,100.0
3,-B1,pos,17,62.962963
4,-B1,neg,10,37.037037
5,-B2,neg,2,50.0
6,-B2,pos,2,50.0
7,-C2,pos,4,57.142857
8,-C2,neg,3,42.857143
9,-C3,neg,1,50.0


In [None]:
#Get average next day return by bin and p_n (same as df_1 except averages grouped by positive or negative)

df_3 = prices.groupby(['bin', 'p_n'])['next_day'].mean().reset_index()
df_3.head()

In [None]:
#merge df_1 and df_2
df_4 = pd.merge(df_2, df_3, on = ['bin', 'p_n'], how = 'inner')
df_4.head()

In [None]:
#merge df_4 and df_3
perf = pd.merge(df_4, df_1, on = 'bin', how = 'left')
perf.head()

In [None]:
#z1 = perf[perf.bin.str.endswith('5')]
#z1 = z1.loc[z1['p_n'] == 'pos']
#z1.head()

In [None]:
#plt.bar('bin', 'percentage', data = z1)
#plt.hlines(y=50, xmin = -1, xmax = 10, color = 'orange')
#plt.xlim(-1, 10)
#plt.xticks(rotation = 70)
#plt.title('Next Day Return with 5-or-More-Day Streak')
#plt.xlabel('Previous Day(s) Activity')
#plt.ylabel('%');

## Import test data and run through same first steps as above

In [25]:
#Import new ^GSPC.csv file (from Yahoo Finance)
test_data = pd.read_csv('../data/BTC-17_21.csv')
test_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2018-01-01,14112.200195,14112.200195,13154.700195,13657.200195,13657.200195,10291200000.0
1,2018-01-02,13625.0,15444.599609,13163.599609,14982.099609,14982.099609,16846600000.0
2,2018-01-03,14978.200195,15572.799805,14844.5,15201.0,15201.0,16871900000.0
3,2018-01-04,15270.700195,15739.700195,14522.200195,15599.200195,15599.200195,21783200000.0
4,2018-01-05,15477.200195,17705.199219,15202.799805,17429.5,17429.5,23840900000.0


In [26]:
#drop rows with null values
test_data = test_data.dropna()

#Add gain and gain_%
test_data['gain'] = test_data['Close']-test_data['Close'].shift()
test_data['gain_%'] = test_data['gain'] / test_data['Close'].shift(1)

#Drop unnecessary columns
test_data = test_data.drop(columns=['Open','High','Low','Adj Close','Volume'])

#Rename Columns
test_data = test_data.rename(columns = {'Date':'date', 'Close':'close'})

test_data.head()

Unnamed: 0,date,close,gain,gain_%
0,2018-01-01,13657.200195,,
1,2018-01-02,14982.099609,1324.899414,0.097011
2,2018-01-03,15201.0,218.900391,0.014611
3,2018-01-04,15599.200195,398.200195,0.026196
4,2018-01-05,17429.5,1830.299805,0.117333


In [27]:
#Add pos_neg column to determine gain vs. loss

test_data['pos_neg'] = ''
for ind, row in test_data.iterrows():
    if row['gain_%'] > 0:
            test_data.loc[ind, 'pos_neg'] = 'pos'
    else:
        test_data.loc[ind, 'pos_neg'] = 'neg'
test_data

Unnamed: 0,date,close,gain,gain_%,pos_neg
0,2018-01-01,13657.200195,,,neg
1,2018-01-02,14982.099609,1324.899414,0.097011,pos
2,2018-01-03,15201.000000,218.900391,0.014611,pos
3,2018-01-04,15599.200195,398.200195,0.026196,pos
4,2018-01-05,17429.500000,1830.299805,0.117333,pos
...,...,...,...,...,...
1125,2021-01-30,34269.523438,-46.863281,-0.001366,neg
1126,2021-01-31,33114.359375,-1155.164063,-0.033708,neg
1127,2021-02-01,33537.175781,422.816406,0.012768,pos
1128,2021-02-02,35510.289063,1973.113282,0.058834,pos


In [28]:
#Find consecutive days market has moved in one direction
#y.groupby((y != y.shift()).cumsum()).cumcount() + 1
#https://stackoverflow.com/questions/27626542/counting-consecutive-positive-value-in-python-array

test_data['streak'] = test_data['pos_neg'].groupby((test_data['pos_neg'] != test_data['pos_neg'].shift()).cumsum()).cumcount() + 1
test_data.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak
0,2018-01-01,13657.200195,,,neg,1
1,2018-01-02,14982.099609,1324.899414,0.097011,pos,1
2,2018-01-03,15201.0,218.900391,0.014611,pos,2
3,2018-01-04,15599.200195,398.200195,0.026196,pos,3
4,2018-01-05,17429.5,1830.299805,0.117333,pos,4


In [29]:
#Assign to groups when pos_neg changes
#https://stackoverflow.com/questions/60334671/pandas-dataframe-how-to-find-consecutive-rows-that-meet-some-conditions

test_data['g'] = test_data['pos_neg'].ne(test_data['pos_neg'].shift()).cumsum()
test_data.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g
0,2018-01-01,13657.200195,,,neg,1,1
1,2018-01-02,14982.099609,1324.899414,0.097011,pos,1,2
2,2018-01-03,15201.0,218.900391,0.014611,pos,2,2
3,2018-01-04,15599.200195,398.200195,0.026196,pos,3,2
4,2018-01-05,17429.5,1830.299805,0.117333,pos,4,2


In [30]:
#Use cumsum to find cummulative sums
#https://stackoverflow.com/questions/32890124/pandas-dataframe-running-sum-with-reset/32891081#32891081

test_data['cum']= test_data.groupby(['g'])['gain_%'].cumsum()
test_data.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g,cum
0,2018-01-01,13657.200195,,,neg,1,1,
1,2018-01-02,14982.099609,1324.899414,0.097011,pos,1,2,0.097011
2,2018-01-03,15201.0,218.900391,0.014611,pos,2,2,0.111622
3,2018-01-04,15599.200195,398.200195,0.026196,pos,3,2,0.137818
4,2018-01-05,17429.5,1830.299805,0.117333,pos,4,2,0.25515


In [31]:
#https://stackoverflow.com/questions/39109045/numpy-where-with-multiple-conditions

#col         = 'consumption_energy'
#conditions  = [ df2[col] >= 400, (df2[col] < 400) & (df2[col]> 200), df2[col] <= 200 ]
#choices     = [ "high", 'medium', 'low' ]
    
#df2["energy_class"] = np.select(conditions, choices, default=np.nan)


In [32]:
#assign to bins based on % Gain and streak

# use pd.cut instead?
# https://stackoverflow.com/questions/46472809/python-binning-based-on-2-columns-in-pandas

test_data['bin'] = ''
for ind, row in test_data.iterrows():
    if row['cum'] >= 0 and row['cum'] < 0.0008 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'A1'
    elif row['cum'] >= 0.0008 and row['cum'] < 0.0016 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'B1'
    elif row['cum'] >= 0.0016 and row['cum'] < 0.0025 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'C1'
    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'D1'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'E1'
    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'F1'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'G1'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'H1'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'I1'
    elif row['cum'] >= 0.0175 and row['cum'] < 0.0275 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'J1'
#    elif row['cum'] >= 0.0200 and row['cum'] < 0.0225 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'I1'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0275 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'J1'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'K1'
    elif row['cum'] >= 0.0275 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'K1'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'M1'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'N1'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'O1'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'P1'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'Q1'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 1:        
#            test_data.loc[ind, 'bin'] = 'R1'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'S1'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'T1'
#    elif row['cum'] >= 0.0500 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'U1'
    elif row['cum'] > 0 and row['cum'] < 0.0025 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'A2'
    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'B2'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'C2'
    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'D2'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'E2'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'F2'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'G2'
    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'H2'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0250 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'I2'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'J2'
    elif row['cum'] >= 0.0250 and row['cum'] < 0.0300 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'K2'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'L2'
    elif row['cum'] >= 0.0300 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'M2'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'N2'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'O2'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'P2'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'Q2'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'R2'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'S2'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'T2'
#    elif row['cum'] >= 0.0500 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'U2'
    elif row['cum'] > 0 and row['cum'] < 0.0050 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'A3'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'B3'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0100 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'C3'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'D2'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0150 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'E3'
#    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'F3'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0200 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'G3'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'H3'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0250 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'I3'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'J3'
    elif row['cum'] >= 0.0250 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'K3'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'L3'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'M3'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'N3'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'O3'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'P3'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'Q3'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'R3'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'S3'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'T3'
#    elif row['cum'] >= 0.0500 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'U3'
    elif row['cum'] > 0 and row['cum'] < 0.0125 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = 'A4'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'B4'
#    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'C4'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'D4'
#    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'E4'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0200 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = 'F4'
#    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'G4'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'H4'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0275 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = 'I4'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'J4'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'K4'
    elif row['cum'] >= 0.0275 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = 'L4'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'M4'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'N4'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'O4'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'P4'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'Q4'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'R4'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'S4'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'T4'
#    elif row['cum'] >= 0.0500 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'U4'
    elif row['cum'] > 0 and row['cum'] < 0.0100 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = 'A5'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'B5'
#    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'C2'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'D5'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0200 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = 'E5'
#    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'F5'
#    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'G5'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'H5'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0300 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = 'I5'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'J5'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'K5'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'L5'
    elif row['cum'] >= 0.0300 and row['cum'] < 0.0400 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = 'M5'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'N5'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'O5'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'P5'
    elif row['cum'] >= 0.0400 and row['cum'] < 0.0500 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = 'Q5'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 5:
#            test_dataloc[ind, 'bin'] = 'R5'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'S5'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'T5'
    elif row['cum'] >= 0.0500 and row['streak'] >= 5:
            test_data.loc[ind, 'bin'] = 'U5'
    elif row['cum'] >= -0.0008 and row['cum'] < 0.0000 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-A1'
    elif row['cum'] >= -0.0016 and row['cum'] < 0.0008 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-B1'
    elif row['cum'] >= -0.0025 and row['cum'] < 0.0016 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-B1'
    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-B1'
    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-B1'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-F1'
    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-G1'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-H1'
    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-I1'
    elif row['cum'] >= -0.0275 and row['cum'] < -0.0175 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-J1'
#    elif row['cum'] >= -0.0225 and row['cum'] < -0.0200 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-I1'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0225 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-J1'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-K1'
    elif row['cum'] < -0.0275 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-K1'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-M1'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-N1'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-O1'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-P1'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-Q1'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-R1'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-S1'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-T1'
#    elif row['cum'] < -0.0500 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-U1'
    elif row['cum'] >= -0.0025 and row['cum'] < 0.0000 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-A2'
    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-B2'
    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-C2'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-D2'
    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-E2'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-F2'
    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-G2'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-H2'
    elif row['cum'] >= -0.0250 and row['cum'] < -0.0200 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-I2'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-J2'
    elif row['cum'] >= -0.0300 and row['cum'] < -0.0250 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-K2'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-L2'
    elif row['cum'] < -0.0300 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-M2'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-N2'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-O2'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-P2'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-Q2'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-R2'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-S2'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-T2'
#    elif row['cum'] < -0.0500 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-U2'
    elif row['cum'] >= -0.0050 and row['cum'] < 0.0000 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-A3'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-B3'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0050 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-C3'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-D2'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0100 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-E3'
#    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-F3'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0150 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-G3'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-H3'
    elif row['cum'] >= -0.0250 and row['cum'] < -0.0200 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-I3'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-J3'
    elif row['cum'] < -0.0250 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-K3'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-L3'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-M3'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-N3'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-O3'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-P3'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-Q3'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-R3'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-S3'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-T3'
#    elif row['cum'] < -0.0500 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-U3'
    elif row['cum'] >= -0.0125 and row['cum'] < 0.0000 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = '-A4'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-B4'
#    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-C4'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-D4'
#    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-E4'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0125 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = '-F4'
#    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-G4'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-H4'
    elif row['cum'] >= -0.0275 and row['cum'] < -0.0200 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = '-I4'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-J4'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-K4'
    elif row['cum'] < -0.0275 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = '-L4'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-M4'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-N4'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-O4'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-P4'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-Q4'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-R4'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-S4'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-T4'
#    elif row['cum'] < -0.0500 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-U4'
    elif row['cum'] >= -0.0100 and row['cum'] < 0.0000 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-A5'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-B5'
#    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-C2'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-D5'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0100 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-E5'
#    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-F5'
#    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-G5'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-H5'
    elif row['cum'] >= -0.0300 and row['cum'] < -0.0200 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-I5'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-J5'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-K5'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-L5'
    elif row['cum'] >= -0.0400 and row['cum'] < -0.0300 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-M5'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-N5'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-O5'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 5:
#            test_dataloc[ind, 'bin'] = '-P5'
    elif row['cum'] >= -0.0500 and row['cum'] < -0.0400 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-Q5'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-R5'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-S5'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-T5'
    elif row['cum'] < -0.0500 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-U5'
    elif row['cum'] < 0.0500 and row['streak'] > 5:
            test_data.loc[ind, 'bin'] = 'V6'
    elif row['cum'] >= 0.0500 and row['streak'] > 5:
            test_data.loc[ind, 'bin'] = 'W6'
    elif row['cum'] > -0.0500 and row['streak'] > 5:
            test_data.loc[ind, 'bin'] = '-V6'
    elif row['cum'] <= -0.0500 and row['streak'] > 5:
            test_data.loc[ind, 'bin'] = '-W6'
    else:
        test_data.loc[ind, 'bin'] ='z'
test_data.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g,cum,bin
0,2018-01-01,13657.200195,,,neg,1,1,,z
1,2018-01-02,14982.099609,1324.899414,0.097011,pos,1,2,0.097011,K1
2,2018-01-03,15201.0,218.900391,0.014611,pos,2,2,0.111622,M2
3,2018-01-04,15599.200195,398.200195,0.026196,pos,3,2,0.137818,K3
4,2018-01-05,17429.5,1830.299805,0.117333,pos,4,2,0.25515,L4


In [33]:
#Make sure all rows are assigned to a bin
z = test_data.loc[test_data['bin'] == 'z']
z

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g,cum,bin
0,2018-01-01,13657.200195,,,neg,1,1,,z


In [34]:
#new column to shift cum values up one row
#df['gdp'] = df['gdp'].shift(-1)
test_data['next_day'] = test_data['gain_%'].shift(-1)
test_data['p_n'] = test_data['pos_neg'].shift(-1)

#drop 'g' column
test_data = test_data.drop(columns=['g'])
test_data.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,cum,bin,next_day,p_n
0,2018-01-01,13657.200195,,,neg,1,,z,0.097011,pos
1,2018-01-02,14982.099609,1324.899414,0.097011,pos,1,0.097011,K1,0.014611,pos
2,2018-01-03,15201.0,218.900391,0.014611,pos,2,0.111622,M2,0.026196,pos
3,2018-01-04,15599.200195,398.200195,0.026196,pos,3,0.137818,K3,0.117333,pos
4,2018-01-05,17429.5,1830.299805,0.117333,pos,4,0.25515,L4,0.005594,pos


## Choose time period and test theory versus market

In [35]:
#Change date column to datetime
test_data['date'] =pd.to_datetime(test_data.date)

#Create new df by merging prices and df_3. Sort by date and reset index.
predict =  pd.merge(test_data, df_1, on = 'bin', how = 'inner').sort_values(by=['date'], ascending=True).reset_index()

#Drop columns.
predict = predict.drop(columns=['index','gain'])
predict.head()

Unnamed: 0,date,close,gain_%,pos_neg,streak,cum,bin,next_day,p_n,next_day_avg
0,2018-01-01,13657.200195,,neg,1,,z,0.097011,pos,-0.071926
1,2018-01-02,14982.099609,0.097011,pos,1,0.097011,K1,0.014611,pos,0.008545
2,2018-01-03,15201.0,0.014611,pos,2,0.111622,M2,0.026196,pos,0.009206
3,2018-01-04,15599.200195,0.026196,pos,3,0.137818,K3,0.117333,pos,-0.00139
4,2018-01-05,17429.5,0.117333,pos,4,0.25515,L4,0.005594,pos,0.010799


In [36]:
#Set Time Period

#predict = predict[(predict['date'] > '2017-01-01') & (predict['date'] < '2017-12-31')]

In [37]:
#Use lambda function to add buy/sell column in order to separate next_day by positive/negative
predict['buy_sell'] = predict['next_day_avg'].apply(lambda x: float(1) if x > 0 else float(-1))
predict.head()

Unnamed: 0,date,close,gain_%,pos_neg,streak,cum,bin,next_day,p_n,next_day_avg,buy_sell
0,2018-01-01,13657.200195,,neg,1,,z,0.097011,pos,-0.071926,-1.0
1,2018-01-02,14982.099609,0.097011,pos,1,0.097011,K1,0.014611,pos,0.008545,1.0
2,2018-01-03,15201.0,0.014611,pos,2,0.111622,M2,0.026196,pos,0.009206,1.0
3,2018-01-04,15599.200195,0.026196,pos,3,0.137818,K3,0.117333,pos,-0.00139,-1.0
4,2018-01-05,17429.5,0.117333,pos,4,0.25515,L4,0.005594,pos,0.010799,1.0


In [38]:
#Create values column, then delete later
predict['values'] = predict['next_day'] * predict['buy_sell']+1

#Add profit column for cummulative return on $1000 
#df['PROFIT']=df['PROFIT'].fillna(df.RATIO.shift().add(1).iloc[2:].cumprod()*20000)
#https://stackoverflow.com/questions/55518348/cumulative-multiplication-in-pandas-python
#predict['profit']=(predict['values'].shift().add(1).cumprod()*1000).round(2)

predict['profit']=(predict['values'].cumprod() * 1000).round(2)

#predict = predict.drop(columns=['values'])
predict.head()

Unnamed: 0,date,close,gain_%,pos_neg,streak,cum,bin,next_day,p_n,next_day_avg,buy_sell,values,profit
0,2018-01-01,13657.200195,,neg,1,,z,0.097011,pos,-0.071926,-1.0,0.902989,902.99
1,2018-01-02,14982.099609,0.097011,pos,1,0.097011,K1,0.014611,pos,0.008545,1.0,1.014611,916.18
2,2018-01-03,15201.0,0.014611,pos,2,0.111622,M2,0.026196,pos,0.009206,1.0,1.026196,940.18
3,2018-01-04,15599.200195,0.026196,pos,3,0.137818,K3,0.117333,pos,-0.00139,-1.0,0.882667,829.87
4,2018-01-05,17429.5,0.117333,pos,4,0.25515,L4,0.005594,pos,0.010799,1.0,1.005594,834.51


In [39]:
#Add market column for cummulative return on $1000 left in market

#predict['market']=(predict['gain_%'].shift().add(1).cumprod()*1000).round(2)
predict['gain_%']=predict['gain_%']+1
predict['market']=(predict['gain_%'].cumprod() * 1000).round(2)
predict

Unnamed: 0,date,close,gain_%,pos_neg,streak,cum,bin,next_day,p_n,next_day_avg,buy_sell,values,profit,market
0,2018-01-01,13657.200195,,neg,1,,z,0.097011,pos,-0.071926,-1.0,0.902989,902.99,
1,2018-01-02,14982.099609,1.097011,pos,1,0.097011,K1,0.014611,pos,0.008545,1.0,1.014611,916.18,1097.01
2,2018-01-03,15201.000000,1.014611,pos,2,0.111622,M2,0.026196,pos,0.009206,1.0,1.026196,940.18,1113.04
3,2018-01-04,15599.200195,1.026196,pos,3,0.137818,K3,0.117333,pos,-0.001390,-1.0,0.882667,829.87,1142.20
4,2018-01-05,17429.500000,1.117333,pos,4,0.255150,L4,0.005594,pos,0.010799,1.0,1.005594,834.51,1276.21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1117,2021-01-30,34269.523438,0.998634,neg,1,-0.001366,-B1,-0.033708,neg,0.005484,1.0,0.966292,179.94,2556.63
1118,2021-01-31,33114.359375,0.966292,neg,2,-0.035074,-M2,0.012768,pos,-0.005195,-1.0,0.987232,177.64,2470.45
1119,2021-02-01,33537.175781,1.012768,pos,1,0.012768,H1,0.058834,pos,0.003309,1.0,1.058834,188.10,2502.00
1120,2021-02-02,35510.289063,1.058834,pos,2,0.071602,M2,0.055246,pos,0.009206,1.0,1.055246,198.49,2649.20


In [40]:
#df.iloc[0] / df.iloc[-1]
#df.iloc[:,1] # second column of data frame
print(((predict['profit'].iloc[-2]-predict['profit'].iloc[1]) / predict['profit'].iloc[1]) * 100)
print(((predict['close'].iloc[-1]-predict['close'].iloc[1]) / predict['close'].iloc[1]) * 100)
print(((predict['market'].iloc[-1]-predict['market'].iloc[1]) / predict['market'].iloc[1]) * 100)

-78.3350433320963
150.11240628442948
154.83359313041817
