## Can recent performance can predict stock movements?

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime

%matplotlib inline

In [2]:
#import ^GSPC.csv file (from Yahoo Finance)
prices = pd.read_csv('../data/GC=06_16.csv')
prices.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2006-01-03,518.599976,528.5,518.599976,530.700012,530.700012,7.0
1,2006-01-04,533.599976,533.599976,533.5,533.900024,533.900024,8.0
2,2006-01-05,529.0,529.0,526.0,526.299988,526.299988,10.0
3,2006-01-06,539.700012,539.700012,539.700012,539.700012,539.700012,10.0
4,2006-01-08,,,,,,


In [3]:
#drop rows with null values
prices = prices.dropna()

#Add gain and gain_% columns
#prices['gain'] = prices['Close'] - prices['Open'](this doesn't work because price changes overnight)
prices['gain'] = prices['Close']-prices['Close'].shift()
prices['gain_%'] = prices['gain'] / prices['Close'].shift(1)

#Drop unnecessary columns
prices = prices.drop(columns=['Open','High','Low','Adj Close','Volume'])

#Rename Columns
prices = prices.rename(columns = {'Date':'date', 'Close':'close'})

prices.head()

Unnamed: 0,date,close,gain,gain_%
0,2006-01-03,530.700012,,
1,2006-01-04,533.900024,3.200012,0.00603
2,2006-01-05,526.299988,-7.600036,-0.014235
3,2006-01-06,539.700012,13.400024,0.025461
5,2006-01-09,549.099976,9.399964,0.017417


In [4]:
#Add pos_neg column to determine gain vs. loss

prices['pos_neg'] = ''
for ind, row in prices.iterrows():
    if row['gain_%'] > 0:
            prices.loc[ind, 'pos_neg'] = 'pos'
    else:
        prices.loc[ind, 'pos_neg'] = 'neg'
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg
0,2006-01-03,530.700012,,,neg
1,2006-01-04,533.900024,3.200012,0.00603,pos
2,2006-01-05,526.299988,-7.600036,-0.014235,neg
3,2006-01-06,539.700012,13.400024,0.025461,pos
5,2006-01-09,549.099976,9.399964,0.017417,pos


In [5]:
#Find consecutive days market has moved in one direction
#y.groupby((y != y.shift()).cumsum()).cumcount() + 1
#https://stackoverflow.com/questions/27626542/counting-consecutive-positive-value-in-python-array

prices['streak'] = prices['pos_neg'].groupby((prices['pos_neg'] != prices['pos_neg'].shift()).cumsum()).cumcount() + 1
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak
0,2006-01-03,530.700012,,,neg,1
1,2006-01-04,533.900024,3.200012,0.00603,pos,1
2,2006-01-05,526.299988,-7.600036,-0.014235,neg,1
3,2006-01-06,539.700012,13.400024,0.025461,pos,1
5,2006-01-09,549.099976,9.399964,0.017417,pos,2


In [6]:
#Assign to groups when pos_neg changes
#https://stackoverflow.com/questions/60334671/pandas-dataframe-how-to-find-consecutive-rows-that-meet-some-conditions

prices['g'] = prices['pos_neg'].ne(prices['pos_neg'].shift()).cumsum()
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g
0,2006-01-03,530.700012,,,neg,1,1
1,2006-01-04,533.900024,3.200012,0.00603,pos,1,2
2,2006-01-05,526.299988,-7.600036,-0.014235,neg,1,3
3,2006-01-06,539.700012,13.400024,0.025461,pos,1,4
5,2006-01-09,549.099976,9.399964,0.017417,pos,2,4


In [7]:
#Use cumsum to find cummulative sums
#https://stackoverflow.com/questions/32890124/pandas-dataframe-running-sum-with-reset/32891081#32891081

prices['cum']= prices.groupby(['g'])['gain_%'].cumsum()
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g,cum
0,2006-01-03,530.700012,,,neg,1,1,
1,2006-01-04,533.900024,3.200012,0.00603,pos,1,2,0.00603
2,2006-01-05,526.299988,-7.600036,-0.014235,neg,1,3,-0.014235
3,2006-01-06,539.700012,13.400024,0.025461,pos,1,4,0.025461
5,2006-01-09,549.099976,9.399964,0.017417,pos,2,4,0.042878


In [8]:
#assign to bins based on % Gain and streak

# use pd.cut instead?
# https://stackoverflow.com/questions/46472809/python-binning-based-on-2-columns-in-pandas

prices['bin'] = ''
for ind, row in prices.iterrows():
    if row['cum'] >= 0 and row['cum'] < 0.0008 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'A1'
    elif row['cum'] >= 0.0008 and row['cum'] < 0.0016 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'B1'
    elif row['cum'] >= 0.0016 and row['cum'] < 0.0025 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'C1'
    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'D1'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'E1'
    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'F1'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'G1'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'H1'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'I1'
    elif row['cum'] >= 0.0175 and row['cum'] < 0.0275 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'J1'
#    elif row['cum'] >= 0.0200 and row['cum'] < 0.0225 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'I1'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0275 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'J1'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'K1'
    elif row['cum'] >= 0.0275 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'K1'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'M1'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'N1'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'O1'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'P1'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'Q1'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 1:        
#            prices.loc[ind, 'bin'] = 'R1'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'S1'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'T1'
#    elif row['cum'] >= 0.0500 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'U1'
    elif row['cum'] > 0 and row['cum'] < 0.0025 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'A2'
    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'B2'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'C2'
    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'D2'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'E2'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'F2'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'G2'
    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'H2'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0250 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'I2'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'J2'
    elif row['cum'] >= 0.0250 and row['cum'] < 0.0300 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'K2'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'L2'
    elif row['cum'] >= 0.0300 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'M2'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'N2'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'O2'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'P2'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'Q2'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'R2'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'S2'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'T2'
#    elif row['cum'] >= 0.0500 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'U2'
    elif row['cum'] > 0 and row['cum'] < 0.0050 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'A3'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'B3'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0100 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'C3'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'D2'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0150 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'E3'
#    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'F3'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0200 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'G3'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'H3'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0250 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'I3'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'J3'
    elif row['cum'] >= 0.0250 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'K3'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'L3'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'M3'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'N3'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'O3'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'P3'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'Q3'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'R3'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'S3'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'T3'
#    elif row['cum'] >= 0.0500 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'U3'
    elif row['cum'] > 0 and row['cum'] < 0.0125 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = 'A4'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'B4'
#    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'C4'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'D4'
#    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'E4'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0200 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = 'F4'
#    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'G4'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'H4'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0275 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = 'I4'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'J4'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'K4'
    elif row['cum'] >= 0.0275 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = 'L4'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'M4'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'N4'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'O4'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'P4'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'Q4'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'R4'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'S4'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'T4'
#    elif row['cum'] >= 0.0500 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'U4'
    elif row['cum'] > 0 and row['cum'] < 0.0100 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'A5'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'B5'
#    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'C2'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'D5'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0200 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'E5'
#    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'F5'
#    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'G5'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'H5'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0300 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'I5'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'J5'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'K5'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'L5'
    elif row['cum'] >= 0.0300 and row['cum'] < 0.0400 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'M5'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'N5'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'O5'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'P5'
    elif row['cum'] >= 0.0400 and row['cum'] < 0.0500 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'Q5'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'R5'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'S5'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'T5'
    elif row['cum'] >= 0.0500 and row['streak'] >= 5:
            prices.loc[ind, 'bin'] = 'U5'
    elif row['cum'] >= -0.0008 and row['cum'] < 0.0000 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-A1'
    elif row['cum'] >= -0.0016 and row['cum'] < 0.0008 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-B1'
    elif row['cum'] >= -0.0025 and row['cum'] < 0.0016 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-B1'
    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-D1'
    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-E1'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-F1'
    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-G1'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-H1'
    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-I1'
    elif row['cum'] >= -0.0275 and row['cum'] < -0.0175 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-J1'
#    elif row['cum'] >= -0.0225 and row['cum'] < -0.0200 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-I1'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0225 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-J1'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-K1'
    elif row['cum'] < -0.0275 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-K1'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-M1'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-N1'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-O1'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-P1'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-Q1'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-R1'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-S1'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-T1'
#    elif row['cum'] < -0.0500 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-U1'
    elif row['cum'] >= -0.0025 and row['cum'] < 0.0000 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-A2'
    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-B2'
    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-C2'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-D2'
    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-E2'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-F2'
    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-G2'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-H2'
    elif row['cum'] >= -0.0250 and row['cum'] < -0.0200 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-I2'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-J2'
    elif row['cum'] >= -0.0300 and row['cum'] < -0.0250 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-K2'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-L2'
    elif row['cum'] < -0.0300 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-M2'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-N2'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-O2'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-P2'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-Q2'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-R2'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-S2'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-T2'
#    elif row['cum'] < -0.0500 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-U2'
    elif row['cum'] >= -0.0050 and row['cum'] < 0.0000 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-A3'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-B3'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0050 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-C3'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-D2'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0100 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-E3'
#    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-F3'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0150 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-G3'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-H3'
    elif row['cum'] >= -0.0250 and row['cum'] < -0.0200 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-I3'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-J3'
    elif row['cum'] < -0.0250 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-K3'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-L3'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-M3'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-N3'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-O3'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-P3'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-Q3'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-R3'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-S3'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-T3'
#    elif row['cum'] < -0.0500 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-U3'
    elif row['cum'] >= -0.0125 and row['cum'] < 0.0000 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = '-A4'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-B4'
#    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-C4'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-D4'
#    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-E4'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0125 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = '-F4'
#    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-G4'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-H4'
    elif row['cum'] >= -0.0275 and row['cum'] < -0.0200 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = '-I4'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-J4'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-K4'
    elif row['cum'] < -0.0275 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = '-L4'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-M4'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-N4'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-O4'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-P4'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-Q4'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-R4'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-S4'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-T4'
#    elif row['cum'] < -0.0500 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-U4'
    elif row['cum'] >= -0.0100 and row['cum'] < 0.0000 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-A5'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-B5'
#    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-C2'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-D5'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0100 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-E5'
#    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-F5'
#    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-G5'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-H5'
    elif row['cum'] >= -0.0300 and row['cum'] < -0.0200 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-I5'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-J5'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-K5'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-L5'
    elif row['cum'] >= -0.0400 and row['cum'] < -0.0300 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-M5'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-N5'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-O5'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-P5'
    elif row['cum'] >= -0.0500 and row['cum'] < -0.0400 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-Q5'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-R5'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-S5'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-T5'
    elif row['cum'] < -0.0500 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-U5'
    elif row['cum'] < 0.0500 and row['streak'] > 5:
            prices.loc[ind, 'bin'] = 'V6'
    elif row['cum'] >= 0.0500 and row['streak'] > 5:
            prices.loc[ind, 'bin'] = 'W6'
    elif row['cum'] > -0.0500 and row['streak'] > 5:
            prices.loc[ind, 'bin'] = '-V6'
    elif row['cum'] <= -0.0500 and row['streak'] > 5:
            prices.loc[ind, 'bin'] = '-W6'
    else:
        prices.loc[ind, 'bin'] ='z'
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g,cum,bin
0,2006-01-03,530.700012,,,neg,1,1,,z
1,2006-01-04,533.900024,3.200012,0.00603,pos,1,2,0.00603,E1
2,2006-01-05,526.299988,-7.600036,-0.014235,neg,1,3,-0.014235,-H1
3,2006-01-06,539.700012,13.400024,0.025461,pos,1,4,0.025461,J1
5,2006-01-09,549.099976,9.399964,0.017417,pos,2,4,0.042878,M2


In [9]:
#Make sure all rows are assigned to a bin
z = prices.loc[prices['bin'] == 'z']
z

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g,cum,bin
0,2006-01-03,530.700012,,,neg,1,1,,z


In [10]:
#new column to shift cum values up one row
#df['gdp'] = df['gdp'].shift(-1)
prices['next_day'] = prices['gain_%'].shift(-1)
prices['p_n'] = prices['pos_neg'].shift(-1)

#drop 'g' column
prices = prices.drop(columns=['g'])
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,cum,bin,next_day,p_n
0,2006-01-03,530.700012,,,neg,1,,z,0.00603,pos
1,2006-01-04,533.900024,3.200012,0.00603,pos,1,0.00603,E1,-0.014235,neg
2,2006-01-05,526.299988,-7.600036,-0.014235,neg,1,-0.014235,-H1,0.025461,pos
3,2006-01-06,539.700012,13.400024,0.025461,pos,1,0.025461,J1,0.017417,pos
5,2006-01-09,549.099976,9.399964,0.017417,pos,2,0.042878,M2,-0.008742,neg


## Create new df's

In [11]:
#get average total return for next day for each bin, then convert to df
#df.groupby('Column1')['Column2'].mean()

df_1 = prices.groupby('bin')['next_day'].mean().to_frame().reset_index()
df_1 = df_1.rename(columns = {'next_day':'next_day_avg'})

df_1.head()

Unnamed: 0,bin,next_day_avg
0,-A1,-0.000895
1,-A2,0.001681
2,-A3,-0.003836
3,-A4,-0.005289
4,-B1,0.001326


In [None]:
#df_1.loc[df_1['bin'] == 'F4']

#df_1.head()

In [None]:
#Get count for pos and neg returns by bin and p_n, then get counts and %

g = prices.groupby('bin')['p_n']
df_2 = pd.concat([g.value_counts(), 
                g.value_counts(normalize=True).mul(100)],axis=1, keys=('counts','percentage')).reset_index()

df_2.head()

In [None]:
#Get average next day return by bin and p_n (same as df_1 except averages grouped by positive or negative)

df_3 = prices.groupby(['bin', 'p_n'])['next_day'].mean().reset_index()
df_3.head()

In [None]:
#merge df_1 and df_2
df_4 = pd.merge(df_2, df_3, on = ['bin', 'p_n'], how = 'inner')
df_4.head()

In [None]:
#merge df_4 and df_3
perf = pd.merge(df_4, df_1, on = 'bin', how = 'left')
perf.head()

In [None]:
#z1 = perf[perf.bin.str.endswith('5')]
#z1 = z1.loc[z1['p_n'] == 'pos']
#z1.head()

In [None]:
#plt.bar('bin', 'percentage', data = z1)
#plt.hlines(y=50, xmin = -1, xmax = 10, color = 'orange')
#plt.xlim(-1, 10)
#plt.xticks(rotation = 70)
#plt.title('Next Day Return with 5-or-More-Day Streak')
#plt.xlabel('Previous Day(s) Activity')
#plt.ylabel('%');

## Import test data and run through same first steps as above

In [12]:
#Import new ^GSPC.csv file (from Yahoo Finance)
test_data = pd.read_csv('../data/GC=16_20.csv')
test_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2016-01-04,1063.400024,1082.5,1063.199951,1075.099976,1075.099976,143.0
1,2016-01-05,1075.599976,1081.5,1075.300049,1078.400024,1078.400024,82.0
2,2016-01-06,1081.599976,1093.699951,1081.599976,1091.900024,1091.900024,52.0
3,2016-01-07,1091.599976,1109.400024,1091.599976,1107.699951,1107.699951,122.0
4,2016-01-08,1111.099976,1111.099976,1093.0,1097.800049,1097.800049,98.0


In [13]:
#drop rows with null values
test_data = test_data.dropna()

#Add gain and gain_%
test_data['gain'] = test_data['Close']-test_data['Close'].shift()
test_data['gain_%'] = test_data['gain'] / test_data['Close'].shift(1)

#Drop unnecessary columns
test_data = test_data.drop(columns=['Open','High','Low','Adj Close','Volume'])

#Rename Columns
test_data = test_data.rename(columns = {'Date':'date', 'Close':'close'})

test_data.head()

Unnamed: 0,date,close,gain,gain_%
0,2016-01-04,1075.099976,,
1,2016-01-05,1078.400024,3.300048,0.00307
2,2016-01-06,1091.900024,13.5,0.012519
3,2016-01-07,1107.699951,15.799927,0.01447
4,2016-01-08,1097.800049,-9.899902,-0.008937


In [14]:
#Add pos_neg column to determine gain vs. loss

test_data['pos_neg'] = ''
for ind, row in test_data.iterrows():
    if row['gain_%'] > 0:
            test_data.loc[ind, 'pos_neg'] = 'pos'
    else:
        test_data.loc[ind, 'pos_neg'] = 'neg'
test_data

Unnamed: 0,date,close,gain,gain_%,pos_neg
0,2016-01-04,1075.099976,,,neg
1,2016-01-05,1078.400024,3.300048,0.003070,pos
2,2016-01-06,1091.900024,13.500000,0.012519,pos
3,2016-01-07,1107.699951,15.799927,0.014470,pos
4,2016-01-08,1097.800049,-9.899902,-0.008937,neg
...,...,...,...,...,...
1512,2020-12-23,1874.699951,8.099975,0.004339,pos
1515,2020-12-28,1877.199951,2.500000,0.001334,pos
1516,2020-12-29,1879.699951,2.500000,0.001332,pos
1517,2020-12-30,1891.000000,11.300049,0.006012,pos


In [15]:
#Find consecutive days market has moved in one direction
#y.groupby((y != y.shift()).cumsum()).cumcount() + 1
#https://stackoverflow.com/questions/27626542/counting-consecutive-positive-value-in-python-array

test_data['streak'] = test_data['pos_neg'].groupby((test_data['pos_neg'] != test_data['pos_neg'].shift()).cumsum()).cumcount() + 1
test_data.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak
0,2016-01-04,1075.099976,,,neg,1
1,2016-01-05,1078.400024,3.300048,0.00307,pos,1
2,2016-01-06,1091.900024,13.5,0.012519,pos,2
3,2016-01-07,1107.699951,15.799927,0.01447,pos,3
4,2016-01-08,1097.800049,-9.899902,-0.008937,neg,1


In [16]:
#Assign to groups when pos_neg changes
#https://stackoverflow.com/questions/60334671/pandas-dataframe-how-to-find-consecutive-rows-that-meet-some-conditions

test_data['g'] = test_data['pos_neg'].ne(test_data['pos_neg'].shift()).cumsum()
test_data.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g
0,2016-01-04,1075.099976,,,neg,1,1
1,2016-01-05,1078.400024,3.300048,0.00307,pos,1,2
2,2016-01-06,1091.900024,13.5,0.012519,pos,2,2
3,2016-01-07,1107.699951,15.799927,0.01447,pos,3,2
4,2016-01-08,1097.800049,-9.899902,-0.008937,neg,1,3


In [17]:
#Use cumsum to find cummulative sums
#https://stackoverflow.com/questions/32890124/pandas-dataframe-running-sum-with-reset/32891081#32891081

test_data['cum']= test_data.groupby(['g'])['gain_%'].cumsum()
test_data.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g,cum
0,2016-01-04,1075.099976,,,neg,1,1,
1,2016-01-05,1078.400024,3.300048,0.00307,pos,1,2,0.00307
2,2016-01-06,1091.900024,13.5,0.012519,pos,2,2,0.015588
3,2016-01-07,1107.699951,15.799927,0.01447,pos,3,2,0.030058
4,2016-01-08,1097.800049,-9.899902,-0.008937,neg,1,3,-0.008937


In [18]:
#https://stackoverflow.com/questions/39109045/numpy-where-with-multiple-conditions

#col         = 'consumption_energy'
#conditions  = [ df2[col] >= 400, (df2[col] < 400) & (df2[col]> 200), df2[col] <= 200 ]
#choices     = [ "high", 'medium', 'low' ]
    
#df2["energy_class"] = np.select(conditions, choices, default=np.nan)


In [19]:
#assign to bins based on % Gain and streak

# use pd.cut instead?
# https://stackoverflow.com/questions/46472809/python-binning-based-on-2-columns-in-pandas

test_data['bin'] = ''
for ind, row in test_data.iterrows():
    if row['cum'] >= 0 and row['cum'] < 0.0008 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'A1'
    elif row['cum'] >= 0.0008 and row['cum'] < 0.0016 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'B1'
    elif row['cum'] >= 0.0016 and row['cum'] < 0.0025 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'C1'
    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'D1'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'E1'
    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'F1'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'G1'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'H1'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'I1'
    elif row['cum'] >= 0.0175 and row['cum'] < 0.0275 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'J1'
#    elif row['cum'] >= 0.0200 and row['cum'] < 0.0225 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'I1'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0275 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'J1'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'K1'
    elif row['cum'] >= 0.0275 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'K1'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'M1'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'N1'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'O1'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'P1'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'Q1'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 1:        
#            test_data.loc[ind, 'bin'] = 'R1'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'S1'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'T1'
#    elif row['cum'] >= 0.0500 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'U1'
    elif row['cum'] > 0 and row['cum'] < 0.0025 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'A2'
    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'B2'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'C2'
    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'D2'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'E2'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'F2'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'G2'
    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'H2'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0250 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'I2'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'J2'
    elif row['cum'] >= 0.0250 and row['cum'] < 0.0300 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'K2'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'L2'
    elif row['cum'] >= 0.0300 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'M2'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'N2'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'O2'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'P2'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'Q2'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'R2'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'S2'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'T2'
#    elif row['cum'] >= 0.0500 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'U2'
    elif row['cum'] > 0 and row['cum'] < 0.0050 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'A3'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'B3'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0100 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'C3'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'D2'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0150 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'E3'
#    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'F3'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0200 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'G3'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'H3'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0250 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'I3'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'J3'
    elif row['cum'] >= 0.0250 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'K3'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'L3'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'M3'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'N3'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'O3'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'P3'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'Q3'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'R3'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'S3'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'T3'
#    elif row['cum'] >= 0.0500 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'U3'
    elif row['cum'] > 0 and row['cum'] < 0.0125 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = 'A4'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'B4'
#    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'C4'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'D4'
#    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'E4'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0200 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = 'F4'
#    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'G4'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'H4'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0275 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = 'I4'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'J4'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'K4'
    elif row['cum'] >= 0.0275 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = 'L4'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'M4'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'N4'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'O4'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'P4'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'Q4'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'R4'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'S4'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'T4'
#    elif row['cum'] >= 0.0500 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'U4'
    elif row['cum'] > 0 and row['cum'] < 0.0100 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = 'A5'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'B5'
#    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'C2'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'D5'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0200 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = 'E5'
#    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'F5'
#    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'G5'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'H5'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0300 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = 'I5'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'J5'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'K5'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'L5'
    elif row['cum'] >= 0.0300 and row['cum'] < 0.0400 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = 'M5'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'N5'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'O5'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'P5'
    elif row['cum'] >= 0.0400 and row['cum'] < 0.0500 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = 'Q5'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 5:
#            test_dataloc[ind, 'bin'] = 'R5'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'S5'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'T5'
    elif row['cum'] >= 0.0500 and row['streak'] >= 5:
            test_data.loc[ind, 'bin'] = 'U5'
    elif row['cum'] >= -0.0008 and row['cum'] < 0.0000 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-A1'
    elif row['cum'] >= -0.0016 and row['cum'] < 0.0008 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-B1'
    elif row['cum'] >= -0.0025 and row['cum'] < 0.0016 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-B1'
    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-B1'
    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-B1'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-F1'
    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-G1'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-H1'
    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-I1'
    elif row['cum'] >= -0.0275 and row['cum'] < -0.0175 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-J1'
#    elif row['cum'] >= -0.0225 and row['cum'] < -0.0200 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-I1'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0225 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-J1'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-K1'
    elif row['cum'] < -0.0275 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-K1'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-M1'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-N1'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-O1'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-P1'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-Q1'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-R1'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-S1'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-T1'
#    elif row['cum'] < -0.0500 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-U1'
    elif row['cum'] >= -0.0025 and row['cum'] < 0.0000 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-A2'
    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-B2'
    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-C2'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-D2'
    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-E2'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-F2'
    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-G2'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-H2'
    elif row['cum'] >= -0.0250 and row['cum'] < -0.0200 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-I2'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-J2'
    elif row['cum'] >= -0.0300 and row['cum'] < -0.0250 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-K2'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-L2'
    elif row['cum'] < -0.0300 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-M2'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-N2'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-O2'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-P2'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-Q2'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-R2'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-S2'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-T2'
#    elif row['cum'] < -0.0500 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-U2'
    elif row['cum'] >= -0.0050 and row['cum'] < 0.0000 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-A3'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-B3'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0050 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-C3'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-D2'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0100 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-E3'
#    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-F3'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0150 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-G3'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-H3'
    elif row['cum'] >= -0.0250 and row['cum'] < -0.0200 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-I3'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-J3'
    elif row['cum'] < -0.0250 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-K3'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-L3'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-M3'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-N3'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-O3'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-P3'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-Q3'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-R3'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-S3'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-T3'
#    elif row['cum'] < -0.0500 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-U3'
    elif row['cum'] >= -0.0125 and row['cum'] < 0.0000 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = '-A4'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-B4'
#    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-C4'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-D4'
#    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-E4'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0125 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = '-F4'
#    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-G4'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-H4'
    elif row['cum'] >= -0.0275 and row['cum'] < -0.0200 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = '-I4'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-J4'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-K4'
    elif row['cum'] < -0.0275 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = '-L4'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-M4'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-N4'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-O4'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-P4'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-Q4'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-R4'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-S4'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-T4'
#    elif row['cum'] < -0.0500 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-U4'
    elif row['cum'] >= -0.0100 and row['cum'] < 0.0000 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-A5'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-B5'
#    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-C2'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-D5'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0100 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-E5'
#    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-F5'
#    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-G5'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-H5'
    elif row['cum'] >= -0.0300 and row['cum'] < -0.0200 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-I5'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-J5'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-K5'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-L5'
    elif row['cum'] >= -0.0400 and row['cum'] < -0.0300 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-M5'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-N5'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-O5'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 5:
#            test_dataloc[ind, 'bin'] = '-P5'
    elif row['cum'] >= -0.0500 and row['cum'] < -0.0400 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-Q5'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-R5'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-S5'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-T5'
    elif row['cum'] < -0.0500 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-U5'
    elif row['cum'] < 0.0500 and row['streak'] > 5:
            test_data.loc[ind, 'bin'] = 'V6'
    elif row['cum'] >= 0.0500 and row['streak'] > 5:
            test_data.loc[ind, 'bin'] = 'W6'
    elif row['cum'] > -0.0500 and row['streak'] > 5:
            test_data.loc[ind, 'bin'] = '-V6'
    elif row['cum'] <= -0.0500 and row['streak'] > 5:
            test_data.loc[ind, 'bin'] = '-W6'
    else:
        test_data.loc[ind, 'bin'] ='z'
test_data.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g,cum,bin
0,2016-01-04,1075.099976,,,neg,1,1,,z
1,2016-01-05,1078.400024,3.300048,0.00307,pos,1,2,0.00307,D1
2,2016-01-06,1091.900024,13.5,0.012519,pos,2,2,0.015588,G2
3,2016-01-07,1107.699951,15.799927,0.01447,pos,3,2,0.030058,K3
4,2016-01-08,1097.800049,-9.899902,-0.008937,neg,1,3,-0.008937,-F1


In [20]:
#Make sure all rows are assigned to a bin
z = test_data.loc[test_data['bin'] == 'z']
z

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g,cum,bin
0,2016-01-04,1075.099976,,,neg,1,1,,z


In [21]:
#new column to shift cum values up one row
#df['gdp'] = df['gdp'].shift(-1)
test_data['next_day'] = test_data['gain_%'].shift(-1)
test_data['p_n'] = test_data['pos_neg'].shift(-1)

#drop 'g' column
test_data = test_data.drop(columns=['g'])
test_data.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,cum,bin,next_day,p_n
0,2016-01-04,1075.099976,,,neg,1,,z,0.00307,pos
1,2016-01-05,1078.400024,3.300048,0.00307,pos,1,0.00307,D1,0.012519,pos
2,2016-01-06,1091.900024,13.5,0.012519,pos,2,0.015588,G2,0.01447,pos
3,2016-01-07,1107.699951,15.799927,0.01447,pos,3,0.030058,K3,-0.008937,neg
4,2016-01-08,1097.800049,-9.899902,-0.008937,neg,1,-0.008937,-F1,-0.001184,neg


## Choose time period and test theory versus market

In [22]:
#Change date column to datetime
test_data['date'] =pd.to_datetime(test_data.date)

#Create new df by merging prices and df_3. Sort by date and reset index.
predict =  pd.merge(test_data, df_1, on = 'bin', how = 'inner').sort_values(by=['date'], ascending=True).reset_index()

#Drop columns.
predict = predict.drop(columns=['index','gain'])
predict.head()

Unnamed: 0,date,close,gain_%,pos_neg,streak,cum,bin,next_day,p_n,next_day_avg
0,2016-01-04,1075.099976,,neg,1,,z,0.00307,pos,0.00603
1,2016-01-05,1078.400024,0.00307,pos,1,0.00307,D1,0.012519,pos,-0.001331
2,2016-01-06,1091.900024,0.012519,pos,2,0.015588,G2,0.01447,pos,-0.001618
3,2016-01-07,1107.699951,0.01447,pos,3,0.030058,K3,-0.008937,neg,7e-05
4,2016-01-08,1097.800049,-0.008937,neg,1,-0.008937,-F1,-0.001184,neg,0.003656


In [23]:
#Set Time Period

#predict = predict[(predict['date'] > '2017-01-01') & (predict['date'] < '2017-12-31')]

In [24]:
#Use lambda function to add buy/sell column in order to separate next_day by positive/negative
predict['buy_sell'] = predict['next_day_avg'].apply(lambda x: float(1) if x > 0 else float(-1))
predict.head()

Unnamed: 0,date,close,gain_%,pos_neg,streak,cum,bin,next_day,p_n,next_day_avg,buy_sell
0,2016-01-04,1075.099976,,neg,1,,z,0.00307,pos,0.00603,1.0
1,2016-01-05,1078.400024,0.00307,pos,1,0.00307,D1,0.012519,pos,-0.001331,-1.0
2,2016-01-06,1091.900024,0.012519,pos,2,0.015588,G2,0.01447,pos,-0.001618,-1.0
3,2016-01-07,1107.699951,0.01447,pos,3,0.030058,K3,-0.008937,neg,7e-05,1.0
4,2016-01-08,1097.800049,-0.008937,neg,1,-0.008937,-F1,-0.001184,neg,0.003656,1.0


In [25]:
#Create values column, then delete later
predict['values'] = predict['next_day'] * predict['buy_sell']+1

#Add profit column for cummulative return on $1000 
#df['PROFIT']=df['PROFIT'].fillna(df.RATIO.shift().add(1).iloc[2:].cumprod()*20000)
#https://stackoverflow.com/questions/55518348/cumulative-multiplication-in-pandas-python
#predict['profit']=(predict['values'].shift().add(1).cumprod()*1000).round(2)

predict['profit']=(predict['values'].cumprod() * 1000).round(2)

#predict = predict.drop(columns=['values'])
predict.head()

Unnamed: 0,date,close,gain_%,pos_neg,streak,cum,bin,next_day,p_n,next_day_avg,buy_sell,values,profit
0,2016-01-04,1075.099976,,neg,1,,z,0.00307,pos,0.00603,1.0,1.00307,1003.07
1,2016-01-05,1078.400024,0.00307,pos,1,0.00307,D1,0.012519,pos,-0.001331,-1.0,0.987481,990.51
2,2016-01-06,1091.900024,0.012519,pos,2,0.015588,G2,0.01447,pos,-0.001618,-1.0,0.98553,976.18
3,2016-01-07,1107.699951,0.01447,pos,3,0.030058,K3,-0.008937,neg,7e-05,1.0,0.991063,967.46
4,2016-01-08,1097.800049,-0.008937,neg,1,-0.008937,-F1,-0.001184,neg,0.003656,1.0,0.998816,966.31


In [26]:
#Add market column for cummulative return on $1000 left in market

#predict['market']=(predict['gain_%'].shift().add(1).cumprod()*1000).round(2)
predict['gain_%']=predict['gain_%']+1
predict['market']=(predict['gain_%'].cumprod() * 1000).round(2)
predict

Unnamed: 0,date,close,gain_%,pos_neg,streak,cum,bin,next_day,p_n,next_day_avg,buy_sell,values,profit,market
0,2016-01-04,1075.099976,,neg,1,,z,0.003070,pos,0.006030,1.0,1.003070,1003.07,
1,2016-01-05,1078.400024,1.003070,pos,1,0.003070,D1,0.012519,pos,-0.001331,-1.0,0.987481,990.51,1003.07
2,2016-01-06,1091.900024,1.012519,pos,2,0.015588,G2,0.014470,pos,-0.001618,-1.0,0.985530,976.18,1015.63
3,2016-01-07,1107.699951,1.014470,pos,3,0.030058,K3,-0.008937,neg,0.000070,1.0,0.991063,967.46,1030.32
4,2016-01-08,1097.800049,0.991063,neg,1,-0.008937,-F1,-0.001184,neg,0.003656,1.0,0.998816,966.31,1021.11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1240,2020-12-23,1874.699951,1.004339,pos,1,0.004339,D1,0.001334,pos,-0.001331,-1.0,0.998666,1302.67,1743.88
1241,2020-12-28,1877.199951,1.001334,pos,2,0.005673,C2,0.001332,pos,-0.003504,-1.0,0.998668,1300.93,1746.20
1242,2020-12-29,1879.699951,1.001332,pos,3,0.007005,C3,0.006012,pos,0.003251,1.0,1.006012,1308.76,1748.53
1243,2020-12-30,1891.000000,1.006012,pos,4,0.013016,F4,0.001111,pos,-0.000646,-1.0,0.998889,1307.30,1759.04


In [27]:
#df.iloc[0] / df.iloc[-1]
#df.iloc[:,1] # second column of data frame
print(((predict['profit'].iloc[-2]-predict['profit'].iloc[1]) / predict['profit'].iloc[1]) * 100)
print(((predict['close'].iloc[-1]-predict['close'].iloc[1]) / predict['close'].iloc[1]) * 100)
print(((predict['market'].iloc[-1]-predict['market'].iloc[1]) / predict['market'].iloc[1]) * 100)

31.982514058414345
75.54710069257196
75.5600307057334
