## Can recent performance can predict stock movements?

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime

%matplotlib inline

In [2]:
#import ^GSPC.csv file (from Yahoo Finance)
prices = pd.read_csv('../data/GSPC_07-16.csv')
prices.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2007-01-03,1418.030029,1429.420044,1407.859985,1416.599976,1416.599976,3429160000
1,2007-01-04,1416.599976,1421.839966,1408.430054,1418.339966,1418.339966,3004460000
2,2007-01-05,1418.339966,1418.339966,1405.75,1409.709961,1409.709961,2919400000
3,2007-01-08,1409.26001,1414.97998,1403.969971,1412.839966,1412.839966,2763340000
4,2007-01-09,1412.839966,1415.609985,1405.420044,1412.109985,1412.109985,3038380000


In [3]:
#Add gain and gain_% columns
#prices['gain'] = prices['Close'] - prices['Open'](this doesn't work because price changes overnight)
prices['gain'] = prices['Close']-prices['Close'].shift()
prices['gain_%'] = prices['gain'] / prices['Close'].shift(1)

#Drop unnecessary columns
prices = prices.drop(columns=['Open','High','Low','Adj Close','Volume'])

#Rename Columns
prices = prices.rename(columns = {'Date':'date', 'Close':'close'})

prices.head()

Unnamed: 0,date,close,gain,gain_%
0,2007-01-03,1416.599976,,
1,2007-01-04,1418.339966,1.73999,0.001228
2,2007-01-05,1409.709961,-8.630005,-0.006085
3,2007-01-08,1412.839966,3.130005,0.00222
4,2007-01-09,1412.109985,-0.729981,-0.000517


In [4]:
#Add pos_neg column to determine gain vs. loss

prices['pos_neg'] = ''
for ind, row in prices.iterrows():
    if row['gain_%'] > 0:
            prices.loc[ind, 'pos_neg'] = 'pos'
    else:
        prices.loc[ind, 'pos_neg'] = 'neg'
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg
0,2007-01-03,1416.599976,,,neg
1,2007-01-04,1418.339966,1.73999,0.001228,pos
2,2007-01-05,1409.709961,-8.630005,-0.006085,neg
3,2007-01-08,1412.839966,3.130005,0.00222,pos
4,2007-01-09,1412.109985,-0.729981,-0.000517,neg


In [5]:
#Find consecutive days market has moved in one direction
#y.groupby((y != y.shift()).cumsum()).cumcount() + 1
#https://stackoverflow.com/questions/27626542/counting-consecutive-positive-value-in-python-array

prices['streak'] = prices['pos_neg'].groupby((prices['pos_neg'] != prices['pos_neg'].shift()).cumsum()).cumcount() + 1
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak
0,2007-01-03,1416.599976,,,neg,1
1,2007-01-04,1418.339966,1.73999,0.001228,pos,1
2,2007-01-05,1409.709961,-8.630005,-0.006085,neg,1
3,2007-01-08,1412.839966,3.130005,0.00222,pos,1
4,2007-01-09,1412.109985,-0.729981,-0.000517,neg,1


In [6]:
#Assign to groups when pos_neg changes
#https://stackoverflow.com/questions/60334671/pandas-dataframe-how-to-find-consecutive-rows-that-meet-some-conditions

prices['g'] = prices['pos_neg'].ne(prices['pos_neg'].shift()).cumsum()
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g
0,2007-01-03,1416.599976,,,neg,1,1
1,2007-01-04,1418.339966,1.73999,0.001228,pos,1,2
2,2007-01-05,1409.709961,-8.630005,-0.006085,neg,1,3
3,2007-01-08,1412.839966,3.130005,0.00222,pos,1,4
4,2007-01-09,1412.109985,-0.729981,-0.000517,neg,1,5


In [7]:
#Use cumsum to find cummulative sums
#https://stackoverflow.com/questions/32890124/pandas-dataframe-running-sum-with-reset/32891081#32891081

prices['cum']= prices.groupby(['g'])['gain_%'].cumsum()
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g,cum
0,2007-01-03,1416.599976,,,neg,1,1,
1,2007-01-04,1418.339966,1.73999,0.001228,pos,1,2,0.001228
2,2007-01-05,1409.709961,-8.630005,-0.006085,neg,1,3,-0.006085
3,2007-01-08,1412.839966,3.130005,0.00222,pos,1,4,0.00222
4,2007-01-09,1412.109985,-0.729981,-0.000517,neg,1,5,-0.000517


In [8]:
#assign to bins based on % Gain and streak

# use pd.cut instead?
# https://stackoverflow.com/questions/46472809/python-binning-based-on-2-columns-in-pandas

prices['bin'] = ''
for ind, row in prices.iterrows():
    if row['cum'] >= 0 and row['cum'] < 0.0025 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'A1'
    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'B1'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'C1'
    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'D1'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'E1'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'F1'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'G1'
    elif row['cum'] >= 0.0175 and row['cum'] < 0.0275 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'H1'
#    elif row['cum'] >= 0.0200 and row['cum'] < 0.0225 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'I1'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0275 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'J1'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'K1'
    elif row['cum'] >= 0.0275 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'L1'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'M1'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'N1'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'O1'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'P1'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'Q1'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 1:        
#            prices.loc[ind, 'bin'] = 'R1'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'S1'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'T1'
#    elif row['cum'] >= 0.0500 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'U1'
    elif row['cum'] > 0 and row['cum'] < 0.0025 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'A2'
    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'B2'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'C2'
    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'D2'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'E2'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'F2'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'G2'
    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'H2'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0250 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'I2'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'J2'
    elif row['cum'] >= 0.0250 and row['cum'] < 0.0300 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'K2'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'L2'
    elif row['cum'] >= 0.0300 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'M2'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'N2'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'O2'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'P2'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'Q2'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'R2'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'S2'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'T2'
#    elif row['cum'] >= 0.0500 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'U2'
    elif row['cum'] > 0 and row['cum'] < 0.0050 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'A3'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'B3'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0100 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'C3'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'D2'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0150 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'E3'
#    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'F3'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0200 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'G3'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'H3'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0250 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'I3'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'J3'
    elif row['cum'] >= 0.0250 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'K3'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'L3'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'M3'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'N3'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'O3'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'P3'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'Q3'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'R3'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'S3'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'T3'
#    elif row['cum'] >= 0.0500 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'U3'
    elif row['cum'] > 0 and row['cum'] < 0.0125 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = 'A4'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'B4'
#    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'C4'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'D4'
#    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'E4'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0200 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = 'F4'
#    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'G4'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'H4'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0275 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = 'I4'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'J4'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'K4'
    elif row['cum'] >= 0.0275 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = 'L4'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'M4'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'N4'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'O4'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'P4'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'Q4'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'R4'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'S4'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'T4'
#    elif row['cum'] >= 0.0500 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'U4'
    elif row['cum'] > 0 and row['cum'] < 0.0100 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'A5'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'B5'
#    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'C2'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'D5'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0200 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'E5'
#    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'F5'
#    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'G5'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'H5'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0300 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'I5'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'J5'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'K5'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'L5'
    elif row['cum'] >= 0.0300 and row['cum'] < 0.0400 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'M5'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'N5'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'O5'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'P5'
    elif row['cum'] >= 0.0400 and row['cum'] < 0.0500 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'Q5'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'R5'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'S5'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'T5'
    elif row['cum'] >= 0.0500 and row['streak'] >= 5:
            prices.loc[ind, 'bin'] = 'U5'
    elif row['cum'] >= -0.0025 and row['cum'] < 0.0000 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-A1'
    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-B1'
    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-C1'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-D1'
    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-E1'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-F1'
    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-G1'
    elif row['cum'] >= -0.0275 and row['cum'] < -0.0175 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-H1'
#    elif row['cum'] >= -0.0225 and row['cum'] < -0.0200 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-I1'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0225 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-J1'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-K1'
    elif row['cum'] < -0.0275 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-L1'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-M1'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-N1'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-O1'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-P1'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-Q1'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-R1'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-S1'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-T1'
#    elif row['cum'] < -0.0500 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-U1'
    elif row['cum'] >= -0.0025 and row['cum'] < 0.0000 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-A2'
    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-B2'
    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-C2'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-D2'
    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-E2'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-F2'
    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-G2'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-H2'
    elif row['cum'] >= -0.0250 and row['cum'] < -0.0200 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-I2'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-J2'
    elif row['cum'] >= -0.0300 and row['cum'] < -0.0250 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-K2'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-L2'
    elif row['cum'] < -0.0300 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-M2'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-N2'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-O2'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-P2'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-Q2'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-R2'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-S2'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-T2'
#    elif row['cum'] < -0.0500 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-U2'
    elif row['cum'] >= -0.0050 and row['cum'] < 0.0000 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-A3'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-B3'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0050 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-C3'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-D2'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0100 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-E3'
#    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-F3'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0150 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-G3'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-H3'
    elif row['cum'] >= -0.0250 and row['cum'] < -0.0200 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-I3'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-J3'
    elif row['cum'] < -0.0250 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-K3'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-L3'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-M3'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-N3'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-O3'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-P3'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-Q3'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-R3'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-S3'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-T3'
#    elif row['cum'] < -0.0500 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-U3'
    elif row['cum'] >= -0.0125 and row['cum'] < 0.0000 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = '-A4'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-B4'
#    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-C4'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-D4'
#    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-E4'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0125 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = '-F4'
#    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-G4'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-H4'
    elif row['cum'] >= -0.0275 and row['cum'] < -0.0200 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = '-I4'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-J4'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-K4'
    elif row['cum'] < -0.0275 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = '-L4'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-M4'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-N4'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-O4'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-P4'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-Q4'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-R4'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-S4'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-T4'
#    elif row['cum'] < -0.0500 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-U4'
    elif row['cum'] >= -0.0100 and row['cum'] < 0.0000 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-A5'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-B5'
#    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-C2'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-D5'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0100 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-E5'
#    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-F5'
#    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-G5'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-H5'
    elif row['cum'] >= -0.0300 and row['cum'] < -0.0200 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-I5'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-J5'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-K5'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-L5'
    elif row['cum'] >= -0.0400 and row['cum'] < -0.0300 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-M5'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-N5'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-O5'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-P5'
    elif row['cum'] >= -0.0500 and row['cum'] < -0.0400 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-Q5'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-R5'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-S5'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-T5'
    elif row['cum'] < -0.0500 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-U5'
    elif row['cum'] < 0.0500 and row['streak'] > 5:
            prices.loc[ind, 'bin'] = 'V6'
    elif row['cum'] >= 0.0500 and row['streak'] > 5:
            prices.loc[ind, 'bin'] = 'W6'
    elif row['cum'] > -0.0500 and row['streak'] > 5:
            prices.loc[ind, 'bin'] = '-V6'
    elif row['cum'] <= -0.0500 and row['streak'] > 5:
            prices.loc[ind, 'bin'] = '-W6'
    else:
        prices.loc[ind, 'bin'] ='z'
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g,cum,bin
0,2007-01-03,1416.599976,,,neg,1,1,,z
1,2007-01-04,1418.339966,1.73999,0.001228,pos,1,2,0.001228,A1
2,2007-01-05,1409.709961,-8.630005,-0.006085,neg,1,3,-0.006085,-C1
3,2007-01-08,1412.839966,3.130005,0.00222,pos,1,4,0.00222,A1
4,2007-01-09,1412.109985,-0.729981,-0.000517,neg,1,5,-0.000517,-A1


In [None]:
#Make sure all rows are assigned to a bin
z = prices.loc[prices['bin'] == 'z']
z

In [9]:
#new column to shift cum values up one row
#df['gdp'] = df['gdp'].shift(-1)
prices['next_day'] = prices['gain_%'].shift(-1)
prices['p_n'] = prices['pos_neg'].shift(-1)

#drop 'g' column
prices = prices.drop(columns=['g'])
prices.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,cum,bin,next_day,p_n
0,2007-01-03,1416.599976,,,neg,1,,z,0.001228,pos
1,2007-01-04,1418.339966,1.73999,0.001228,pos,1,0.001228,A1,-0.006085,neg
2,2007-01-05,1409.709961,-8.630005,-0.006085,neg,1,-0.006085,-C1,0.00222,pos
3,2007-01-08,1412.839966,3.130005,0.00222,pos,1,0.00222,A1,-0.000517,neg
4,2007-01-09,1412.109985,-0.729981,-0.000517,neg,1,-0.000517,-A1,0.00194,pos


## Create new df's

In [10]:
#get average total return for next day for each bin, then convert to df
#df.groupby('Column1')['Column2'].mean()

df_1 = prices.groupby('bin')['next_day'].mean().to_frame().reset_index()
df_1 = df_1.rename(columns = {'next_day':'next_day_avg'})

df_1.head()

Unnamed: 0,bin,next_day_avg
0,-A1,0.000409
1,-A2,0.001567
2,-A3,0.000801
3,-A4,-0.007572
4,-B1,0.001518


In [None]:
#df_1.loc[df_1['bin'] == 'F4']

#df_1.head()

In [30]:
#Get count for pos and neg returns by bin and p_n, then get counts and %

g = prices.groupby('bin')['p_n']
df_2 = pd.concat([g.value_counts(), 
                g.value_counts(normalize=True).mul(100)],axis=1, keys=('counts','percentage')).reset_index()

df_2.head(50)

Unnamed: 0,bin,p_n,counts,percentage
0,-A1,pos,123,56.944444
1,-A1,neg,93,43.055556
2,-A2,pos,15,65.217391
3,-A2,neg,8,34.782609
4,-A3,pos,3,75.0
5,-A3,neg,1,25.0
6,-A4,neg,3,75.0
7,-A4,pos,1,25.0
8,-B1,pos,74,58.730159
9,-B1,neg,52,41.269841


In [None]:
#Get average next day return by bin and p_n (same as df_1 except averages grouped by positive or negative)

df_3 = prices.groupby(['bin', 'p_n'])['next_day'].mean().reset_index()
df_3.head()

In [None]:
#merge df_1 and df_2
df_4 = pd.merge(df_2, df_3, on = ['bin', 'p_n'], how = 'inner')
df_4.head()

In [None]:
#merge df_4 and df_3
perf = pd.merge(df_4, df_1, on = 'bin', how = 'left')
perf.head()

In [None]:
#z1 = perf[perf.bin.str.endswith('5')]
#z1 = z1.loc[z1['p_n'] == 'pos']
#z1.head()

In [None]:
#plt.bar('bin', 'percentage', data = z1)
#plt.hlines(y=50, xmin = -1, xmax = 10, color = 'orange')
#plt.xlim(-1, 10)
#plt.xticks(rotation = 70)
#plt.title('Next Day Return with 5-or-More-Day Streak')
#plt.xlabel('Previous Day(s) Activity')
#plt.ylabel('%');

## Import test data and run through same first steps as above

In [11]:
#Import new ^GSPC.csv file (from Yahoo Finance)
test_data = pd.read_csv('../data/GSPC_17-20.csv')
test_data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2017-01-03,2251.570068,2263.879883,2245.129883,2257.830078,2257.830078,3770530000
1,2017-01-04,2261.600098,2272.820068,2261.600098,2270.75,2270.75,3764890000
2,2017-01-05,2268.179932,2271.5,2260.449951,2269.0,2269.0,3761820000
3,2017-01-06,2271.139893,2282.100098,2264.060059,2276.97998,2276.97998,3339890000
4,2017-01-09,2273.590088,2275.48999,2268.899902,2268.899902,2268.899902,3217610000


In [12]:
#Add gain and gain_%
test_data['gain'] = test_data['Close']-test_data['Close'].shift()
test_data['gain_%'] = test_data['gain'] / test_data['Close'].shift(1)

#Drop unnecessary columns
test_data = test_data.drop(columns=['Open','High','Low','Adj Close','Volume'])

#Rename Columns
test_data = test_data.rename(columns = {'Date':'date', 'Close':'close'})

test_data.head()

Unnamed: 0,date,close,gain,gain_%
0,2017-01-03,2257.830078,,
1,2017-01-04,2270.75,12.919922,0.005722
2,2017-01-05,2269.0,-1.75,-0.000771
3,2017-01-06,2276.97998,7.97998,0.003517
4,2017-01-09,2268.899902,-8.080078,-0.003549


In [13]:
#Add pos_neg column to determine gain vs. loss

test_data['pos_neg'] = ''
for ind, row in test_data.iterrows():
    if row['gain_%'] > 0:
            test_data.loc[ind, 'pos_neg'] = 'pos'
    else:
        test_data.loc[ind, 'pos_neg'] = 'neg'
test_data

Unnamed: 0,date,close,gain,gain_%,pos_neg
0,2017-01-03,2257.830078,,,neg
1,2017-01-04,2270.750000,12.919922,0.005722,pos
2,2017-01-05,2269.000000,-1.750000,-0.000771,neg
3,2017-01-06,2276.979980,7.979980,0.003517,pos
4,2017-01-09,2268.899902,-8.080078,-0.003549,neg
...,...,...,...,...,...
1001,2020-12-23,3690.010010,2.750000,0.000746,pos
1002,2020-12-24,3703.060059,13.050049,0.003537,pos
1003,2020-12-28,3735.360107,32.300048,0.008723,pos
1004,2020-12-29,3727.040039,-8.320068,-0.002227,neg


In [14]:
test_data['gain'].sum()

1474.209961

In [15]:
#Find consecutive days market has moved in one direction
#y.groupby((y != y.shift()).cumsum()).cumcount() + 1
#https://stackoverflow.com/questions/27626542/counting-consecutive-positive-value-in-python-array

test_data['streak'] = test_data['pos_neg'].groupby((test_data['pos_neg'] != test_data['pos_neg'].shift()).cumsum()).cumcount() + 1
test_data.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak
0,2017-01-03,2257.830078,,,neg,1
1,2017-01-04,2270.75,12.919922,0.005722,pos,1
2,2017-01-05,2269.0,-1.75,-0.000771,neg,1
3,2017-01-06,2276.97998,7.97998,0.003517,pos,1
4,2017-01-09,2268.899902,-8.080078,-0.003549,neg,1


In [16]:
#Assign to groups when pos_neg changes
#https://stackoverflow.com/questions/60334671/pandas-dataframe-how-to-find-consecutive-rows-that-meet-some-conditions

test_data['g'] = test_data['pos_neg'].ne(test_data['pos_neg'].shift()).cumsum()
test_data.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g
0,2017-01-03,2257.830078,,,neg,1,1
1,2017-01-04,2270.75,12.919922,0.005722,pos,1,2
2,2017-01-05,2269.0,-1.75,-0.000771,neg,1,3
3,2017-01-06,2276.97998,7.97998,0.003517,pos,1,4
4,2017-01-09,2268.899902,-8.080078,-0.003549,neg,1,5


In [17]:
#Use cumsum to find cummulative sums
#https://stackoverflow.com/questions/32890124/pandas-dataframe-running-sum-with-reset/32891081#32891081

test_data['cum']= test_data.groupby(['g'])['gain_%'].cumsum()
test_data.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g,cum
0,2017-01-03,2257.830078,,,neg,1,1,
1,2017-01-04,2270.75,12.919922,0.005722,pos,1,2,0.005722
2,2017-01-05,2269.0,-1.75,-0.000771,neg,1,3,-0.000771
3,2017-01-06,2276.97998,7.97998,0.003517,pos,1,4,0.003517
4,2017-01-09,2268.899902,-8.080078,-0.003549,neg,1,5,-0.003549


In [18]:
#https://stackoverflow.com/questions/39109045/numpy-where-with-multiple-conditions

#col         = 'consumption_energy'
#conditions  = [ df2[col] >= 400, (df2[col] < 400) & (df2[col]> 200), df2[col] <= 200 ]
#choices     = [ "high", 'medium', 'low' ]
    
#df2["energy_class"] = np.select(conditions, choices, default=np.nan)


In [19]:
#assign to bins based on % Gain and streak

# use pd.cut instead?
# https://stackoverflow.com/questions/46472809/python-binning-based-on-2-columns-in-pandas

test_data['bin'] = ''
for ind, row in test_data.iterrows():
    if row['cum'] >= 0 and row['cum'] < 0.0025 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'A1'
    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'B1'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'C1'
    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'D1'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'E1'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'F1'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'G1'
    elif row['cum'] >= 0.0175 and row['cum'] < 0.0275 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'H1'
#    elif row['cum'] >= 0.0200 and row['cum'] < 0.0225 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'I1'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0275 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'J1'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'K1'
    elif row['cum'] >= 0.0275 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = 'L1'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'M1'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'N1'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'O1'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'P1'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'Q1'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 1:        
#            test_data.loc[ind, 'bin'] = 'R1'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'S1'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'T1'
#    elif row['cum'] >= 0.0500 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = 'U1'
    elif row['cum'] > 0 and row['cum'] < 0.0025 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'A2'
    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'B2'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'C2'
    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'D2'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'E2'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'F2'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'G2'
    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'H2'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0250 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'I2'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'J2'
    elif row['cum'] >= 0.0250 and row['cum'] < 0.0300 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'K2'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'L2'
    elif row['cum'] >= 0.0300 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = 'M2'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'N2'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'O2'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'P2'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'Q2'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'R2'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'S2'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'T2'
#    elif row['cum'] >= 0.0500 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = 'U2'
    elif row['cum'] > 0 and row['cum'] < 0.0050 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'A3'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'B3'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0100 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'C3'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'D2'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0150 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'E3'
#    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'F3'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0200 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'G3'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'H3'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0250 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'I3'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'J3'
    elif row['cum'] >= 0.0250 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = 'K3'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'L3'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'M3'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'N3'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'O3'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'P3'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'Q3'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'R3'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'S3'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'T3'
#    elif row['cum'] >= 0.0500 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = 'U3'
    elif row['cum'] > 0 and row['cum'] < 0.0125 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = 'A4'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'B4'
#    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'C4'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'D4'
#    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'E4'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0200 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = 'F4'
#    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'G4'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'H4'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0275 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = 'I4'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'J4'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'K4'
    elif row['cum'] >= 0.0275 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = 'L4'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'M4'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'N4'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'O4'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'P4'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'Q4'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'R4'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'S4'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'T4'
#    elif row['cum'] >= 0.0500 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = 'U4'
    elif row['cum'] > 0 and row['cum'] < 0.0100 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = 'A5'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'B5'
#    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'C2'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'D5'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0200 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = 'E5'
#    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'F5'
#    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'G5'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'H5'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0300 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = 'I5'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'J5'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'K5'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'L5'
    elif row['cum'] >= 0.0300 and row['cum'] < 0.0400 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = 'M5'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'N5'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'O5'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'P5'
    elif row['cum'] >= 0.0400 and row['cum'] < 0.0500 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = 'Q5'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 5:
#            test_dataloc[ind, 'bin'] = 'R5'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'S5'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = 'T5'
    elif row['cum'] >= 0.0500 and row['streak'] >= 5:
            test_data.loc[ind, 'bin'] = 'U5'
    elif row['cum'] >= -0.0025 and row['cum'] < 0.0000 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-A1'
    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-B1'
    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-C1'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-D1'
    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-E1'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-F1'
    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-G1'
    elif row['cum'] >= -0.0275 and row['cum'] < -0.0175 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-H1'
#    elif row['cum'] >= -0.0225 and row['cum'] < -0.0200 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-I1'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0225 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-J1'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-K1'
    elif row['cum'] < -0.0275 and row['streak'] == 1:
            test_data.loc[ind, 'bin'] = '-L1'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-M1'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-N1'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-O1'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-P1'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-Q1'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-R1'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-S1'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-T1'
#    elif row['cum'] < -0.0500 and row['streak'] == 1:
#            test_data.loc[ind, 'bin'] = '-U1'
    elif row['cum'] >= -0.0025 and row['cum'] < 0.0000 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-A2'
    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-B2'
    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-C2'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-D2'
    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-E2'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-F2'
    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-G2'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-H2'
    elif row['cum'] >= -0.0250 and row['cum'] < -0.0200 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-I2'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-J2'
    elif row['cum'] >= -0.0300 and row['cum'] < -0.0250 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-K2'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-L2'
    elif row['cum'] < -0.0300 and row['streak'] == 2:
            test_data.loc[ind, 'bin'] = '-M2'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-N2'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-O2'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-P2'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-Q2'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-R2'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-S2'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-T2'
#    elif row['cum'] < -0.0500 and row['streak'] == 2:
#            test_data.loc[ind, 'bin'] = '-U2'
    elif row['cum'] >= -0.0050 and row['cum'] < 0.0000 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-A3'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-B3'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0050 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-C3'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-D2'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0100 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-E3'
#    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-F3'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0150 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-G3'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-H3'
    elif row['cum'] >= -0.0250 and row['cum'] < -0.0200 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-I3'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-J3'
    elif row['cum'] < -0.0250 and row['streak'] == 3:
            test_data.loc[ind, 'bin'] = '-K3'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-L3'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-M3'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-N3'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-O3'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-P3'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-Q3'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-R3'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-S3'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-T3'
#    elif row['cum'] < -0.0500 and row['streak'] == 3:
#            test_data.loc[ind, 'bin'] = '-U3'
    elif row['cum'] >= -0.0125 and row['cum'] < 0.0000 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = '-A4'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-B4'
#    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-C4'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-D4'
#    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-E4'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0125 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = '-F4'
#    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-G4'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-H4'
    elif row['cum'] >= -0.0275 and row['cum'] < -0.0200 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = '-I4'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-J4'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-K4'
    elif row['cum'] < -0.0275 and row['streak'] == 4:
            test_data.loc[ind, 'bin'] = '-L4'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-M4'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-N4'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-O4'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-P4'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-Q4'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-R4'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-S4'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-T4'
#    elif row['cum'] < -0.0500 and row['streak'] == 4:
#            test_data.loc[ind, 'bin'] = '-U4'
    elif row['cum'] >= -0.0100 and row['cum'] < 0.0000 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-A5'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-B5'
#    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-C2'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-D5'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0100 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-E5'
#    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-F5'
#    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-G5'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-H5'
    elif row['cum'] >= -0.0300 and row['cum'] < -0.0200 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-I5'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-J5'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-K5'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-L5'
    elif row['cum'] >= -0.0400 and row['cum'] < -0.0300 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-M5'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-N5'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-O5'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 5:
#            test_dataloc[ind, 'bin'] = '-P5'
    elif row['cum'] >= -0.0500 and row['cum'] < -0.0400 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-Q5'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-R5'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-S5'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 5:
#            test_data.loc[ind, 'bin'] = '-T5'
    elif row['cum'] < -0.0500 and row['streak'] == 5:
            test_data.loc[ind, 'bin'] = '-U5'
    elif row['cum'] < 0.0500 and row['streak'] > 5:
            test_data.loc[ind, 'bin'] = 'V6'
    elif row['cum'] >= 0.0500 and row['streak'] > 5:
            test_data.loc[ind, 'bin'] = 'W6'
    elif row['cum'] > -0.0500 and row['streak'] > 5:
            test_data.loc[ind, 'bin'] = '-V6'
    elif row['cum'] <= -0.0500 and row['streak'] > 5:
            test_data.loc[ind, 'bin'] = '-W6'
    else:
        test_data.loc[ind, 'bin'] ='z'
test_data.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g,cum,bin
0,2017-01-03,2257.830078,,,neg,1,1,,z
1,2017-01-04,2270.75,12.919922,0.005722,pos,1,2,0.005722,C1
2,2017-01-05,2269.0,-1.75,-0.000771,neg,1,3,-0.000771,-A1
3,2017-01-06,2276.97998,7.97998,0.003517,pos,1,4,0.003517,B1
4,2017-01-09,2268.899902,-8.080078,-0.003549,neg,1,5,-0.003549,-B1


In [20]:
#Make sure all rows are assigned to a bin
z = test_data.loc[test_data['bin'] == 'z']
z

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,g,cum,bin
0,2017-01-03,2257.830078,,,neg,1,1,,z


In [21]:
#new column to shift cum values up one row
#df['gdp'] = df['gdp'].shift(-1)
test_data['next_day'] = test_data['gain_%'].shift(-1)
test_data['p_n'] = test_data['pos_neg'].shift(-1)

#drop 'g' column
test_data = test_data.drop(columns=['g'])
test_data.head()

Unnamed: 0,date,close,gain,gain_%,pos_neg,streak,cum,bin,next_day,p_n
0,2017-01-03,2257.830078,,,neg,1,,z,0.005722,pos
1,2017-01-04,2270.75,12.919922,0.005722,pos,1,0.005722,C1,-0.000771,neg
2,2017-01-05,2269.0,-1.75,-0.000771,neg,1,-0.000771,-A1,0.003517,pos
3,2017-01-06,2276.97998,7.97998,0.003517,pos,1,0.003517,B1,-0.003549,neg
4,2017-01-09,2268.899902,-8.080078,-0.003549,neg,1,-0.003549,-B1,0.0,neg


## Choose time period and test theory versus market

In [49]:
#Change date column to datetime
test_data['date'] =pd.to_datetime(test_data.date)

#Create new df by merging prices and df_3. Sort by date and reset index.
predict =  pd.merge(test_data, df_1, on = 'bin', how = 'inner').sort_values(by=['date'], ascending=True).reset_index()

#Drop columns.
predict = predict.drop(columns=['index','gain'])
predict.head()

Unnamed: 0,date,close,gain_%,pos_neg,streak,cum,bin,next_day,p_n,next_day_avg
0,2017-01-03,2257.830078,,neg,1,,z,0.005722,pos,0.001228
1,2017-01-04,2270.75,0.005722,pos,1,0.005722,C1,-0.000771,neg,0.000743
2,2017-01-05,2269.0,-0.000771,neg,1,-0.000771,-A1,0.003517,pos,0.000409
3,2017-01-06,2276.97998,0.003517,pos,1,0.003517,B1,-0.003549,neg,-0.001303
4,2017-01-09,2268.899902,-0.003549,neg,1,-0.003549,-B1,0.0,neg,0.001518


In [50]:
#Set Time Period

predict = predict[(predict['date'] > '2017-01-01') & (predict['date'] < '2017-12-31')]

In [51]:
#Use lambda function to add buy/sell column in order to separate next_day by positive/negative
predict['buy_sell'] = predict['next_day_avg'].apply(lambda x: float(1) if x > 0 else float(-1))
predict.head()

Unnamed: 0,date,close,gain_%,pos_neg,streak,cum,bin,next_day,p_n,next_day_avg,buy_sell
0,2017-01-03,2257.830078,,neg,1,,z,0.005722,pos,0.001228,1.0
1,2017-01-04,2270.75,0.005722,pos,1,0.005722,C1,-0.000771,neg,0.000743,1.0
2,2017-01-05,2269.0,-0.000771,neg,1,-0.000771,-A1,0.003517,pos,0.000409,1.0
3,2017-01-06,2276.97998,0.003517,pos,1,0.003517,B1,-0.003549,neg,-0.001303,-1.0
4,2017-01-09,2268.899902,-0.003549,neg,1,-0.003549,-B1,0.0,neg,0.001518,1.0


In [52]:
#Create values column, then delete later
predict['values'] = predict['next_day'] * predict['buy_sell']+1

#Add profit column for cummulative return on $1000 
#df['PROFIT']=df['PROFIT'].fillna(df.RATIO.shift().add(1).iloc[2:].cumprod()*20000)
#https://stackoverflow.com/questions/55518348/cumulative-multiplication-in-pandas-python
#predict['profit']=(predict['values'].shift().add(1).cumprod()*1000).round(2)

predict['profit']=(predict['values'].cumprod() * 1000).round(2)

#predict = predict.drop(columns=['values'])
predict.head()

Unnamed: 0,date,close,gain_%,pos_neg,streak,cum,bin,next_day,p_n,next_day_avg,buy_sell,values,profit
0,2017-01-03,2257.830078,,neg,1,,z,0.005722,pos,0.001228,1.0,1.005722,1005.72
1,2017-01-04,2270.75,0.005722,pos,1,0.005722,C1,-0.000771,neg,0.000743,1.0,0.999229,1004.95
2,2017-01-05,2269.0,-0.000771,neg,1,-0.000771,-A1,0.003517,pos,0.000409,1.0,1.003517,1008.48
3,2017-01-06,2276.97998,0.003517,pos,1,0.003517,B1,-0.003549,neg,-0.001303,-1.0,1.003549,1012.06
4,2017-01-09,2268.899902,-0.003549,neg,1,-0.003549,-B1,0.0,neg,0.001518,1.0,1.0,1012.06


In [53]:
#Add market column for cummulative return on $1000 left in market

#predict['market']=(predict['gain_%'].shift().add(1).cumprod()*1000).round(2)
predict['gain_%']=predict['gain_%']+1
predict['market']=(predict['gain_%'].cumprod() * 1000).round(2)
predict

Unnamed: 0,date,close,gain_%,pos_neg,streak,cum,bin,next_day,p_n,next_day_avg,buy_sell,values,profit,market
0,2017-01-03,2257.830078,,neg,1,,z,0.005722,pos,0.001228,1.0,1.005722,1005.72,
1,2017-01-04,2270.750000,1.005722,pos,1,0.005722,C1,-0.000771,neg,0.000743,1.0,0.999229,1004.95,1005.72
2,2017-01-05,2269.000000,0.999229,neg,1,-0.000771,-A1,0.003517,pos,0.000409,1.0,1.003517,1008.48,1004.95
3,2017-01-06,2276.979980,1.003517,pos,1,0.003517,B1,-0.003549,neg,-0.001303,-1.0,1.003549,1012.06,1008.48
4,2017-01-09,2268.899902,0.996451,neg,1,-0.003549,-B1,0.000000,neg,0.001518,1.0,1.000000,1012.06,1004.90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
244,2017-12-22,2683.340088,0.999542,neg,1,-0.000458,-A1,-0.001058,neg,0.000409,1.0,0.998942,1101.37,1186.56
245,2017-12-26,2680.500000,0.998942,neg,2,-0.001517,-A2,0.000791,pos,0.001567,1.0,1.000791,1102.24,1185.31
246,2017-12-27,2682.620117,1.000791,pos,1,0.000791,A1,0.001834,pos,0.000773,1.0,1.001834,1104.27,1186.24
247,2017-12-28,2687.540039,1.001834,pos,2,0.002625,B2,-0.005183,neg,-0.000704,-1.0,1.005183,1109.99,1188.42


In [54]:
#df.iloc[0] / df.iloc[-1]
#df.iloc[:,1] # second column of data frame
print(((predict['profit'].iloc[-2]-predict['profit'].iloc[1]) / predict['profit'].iloc[1]) * 100)
print(((predict['close'].iloc[-1]-predict['close'].iloc[1]) / predict['close'].iloc[1]) * 100)
print(((predict['market'].iloc[-1]-predict['market'].iloc[1]) / predict['market'].iloc[1]) * 100)

10.452261306532659
17.741279621270504
17.55359344549178


## Steps not currently needed

In [None]:
#find highest cummulative sums
#https://stackoverflow.com/questions/47924400/python-pandas-assign-last-value-of-dataframe-group-to-all-entries-of-that-group

prices['b_new'] = prices.groupby('g')['des'].transform('last')
prices.head()

In [None]:
#only keep highest cummulatives sums
prices['new_col'] = ''
for ind, row in prices.iterrows():
    if row['des'] == row['b_new']:
            prices.loc[ind, 'new_col'] = row['des']
    else:
        prices.loc[ind, 'new_col'] = ''
prices.head()

In [None]:
data = {'x':  ['.0012','.0032','-.0041','.0063','-.0051'],
        'y': ['8.1','8.2','8.9','8.3','8.7']
        }
df = pd.DataFrame (data, columns = ['x','y'])
df['x'] = df['x'].astype(float)
df['y'] = df['y'].astype(float)
df

In [None]:
#df['market']=df['market'].fillna(df.x.shift().add(1).iloc[1:].cumprod()*20000)
initial_stock_price = 1000
df['x']=df['x']+1
df['market']=df['x'].cumprod() * initial_stock_price
df

In [None]:
df['net']=df['market']-df['market'].shift()
df['gain_%'] = df['net'] / df['market'].shift(1)
df

In [None]:
#could be helpful?

period = 5
prices['less_than_zero'] = (prices['% Gain']
                        .rolling(window=period, min_periods=period)
                        .agg(lambda x: (x < 0).sum()))

prices['greater_than_zero'] = (prices['% Gain']
                          .rolling(window=period,min_periods=period)
                          .agg(lambda x: (x > 0).sum()))
prices.head(25)