## Can recent performance can predict stock movements?

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime

%matplotlib inline

In [2]:
#import ^GSPC.csv file (from Yahoo Finance)
prices = pd.read_csv('../data/GSPC_10.csv')
prices.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2516 entries, 0 to 2515
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    2516 non-null   object 
 1   Open    2516 non-null   float64
 2   High    2516 non-null   float64
 3   Low     2516 non-null   float64
 4   Close   2516 non-null   float64
 5   Gain    2516 non-null   float64
 6   % Gain  2516 non-null   float64
dtypes: float64(6), object(1)
memory usage: 137.7+ KB


In [3]:
#drop columns
prices = prices.drop(columns=['High','Low'])
prices.head()

Unnamed: 0,Date,Open,Close,Gain,% Gain
0,1/21/2011,1283.63,1283.35,-0.28,-0.0002
1,1/24/2011,1283.29,1290.84,7.55,0.0059
2,1/25/2011,1288.17,1291.18,3.01,0.0023
3,1/26/2011,1291.97,1296.63,4.66,0.0036
4,1/27/2011,1297.51,1299.54,2.03,0.0016


In [4]:
#rename columns
prices = prices.rename(columns = {'Date':'date', 'Open':'open', 'Close':'close', 'Gain': 'gain', '% Gain': 'x_gain'})
prices.head()

Unnamed: 0,date,open,close,gain,x_gain
0,1/21/2011,1283.63,1283.35,-0.28,-0.0002
1,1/24/2011,1283.29,1290.84,7.55,0.0059
2,1/25/2011,1288.17,1291.18,3.01,0.0023
3,1/26/2011,1291.97,1296.63,4.66,0.0036
4,1/27/2011,1297.51,1299.54,2.03,0.0016


In [5]:
prices['pos_neg'] = ''
for ind, row in prices.iterrows():
    if row['x_gain'] > 0:
            prices.loc[ind, 'pos_neg'] = 'pos'
    else:
        prices.loc[ind, 'pos_neg'] = 'neg'
prices.head()

Unnamed: 0,date,open,close,gain,x_gain,pos_neg
0,1/21/2011,1283.63,1283.35,-0.28,-0.0002,neg
1,1/24/2011,1283.29,1290.84,7.55,0.0059,pos
2,1/25/2011,1288.17,1291.18,3.01,0.0023,pos
3,1/26/2011,1291.97,1296.63,4.66,0.0036,pos
4,1/27/2011,1297.51,1299.54,2.03,0.0016,pos


In [6]:
#Find consecutive days market has moved in one direction
#y.groupby((y != y.shift()).cumsum()).cumcount() + 1
#https://stackoverflow.com/questions/27626542/counting-consecutive-positive-value-in-python-array

prices['streak'] = prices['pos_neg'].groupby((prices['pos_neg'] != prices['pos_neg'].shift()).cumsum()).cumcount() + 1
prices.head()

Unnamed: 0,date,open,close,gain,x_gain,pos_neg,streak
0,1/21/2011,1283.63,1283.35,-0.28,-0.0002,neg,1
1,1/24/2011,1283.29,1290.84,7.55,0.0059,pos,1
2,1/25/2011,1288.17,1291.18,3.01,0.0023,pos,2
3,1/26/2011,1291.97,1296.63,4.66,0.0036,pos,3
4,1/27/2011,1297.51,1299.54,2.03,0.0016,pos,4


In [7]:
#Assign to groups when pos_neg changes
#https://stackoverflow.com/questions/60334671/pandas-dataframe-how-to-find-consecutive-rows-that-meet-some-conditions
prices['g'] = prices['pos_neg'].ne(prices['pos_neg'].shift()).cumsum()
prices.head()

Unnamed: 0,date,open,close,gain,x_gain,pos_neg,streak,g
0,1/21/2011,1283.63,1283.35,-0.28,-0.0002,neg,1,1
1,1/24/2011,1283.29,1290.84,7.55,0.0059,pos,1,2
2,1/25/2011,1288.17,1291.18,3.01,0.0023,pos,2,2
3,1/26/2011,1291.97,1296.63,4.66,0.0036,pos,3,2
4,1/27/2011,1297.51,1299.54,2.03,0.0016,pos,4,2


In [8]:
#Use cumsum to find cummulative sums
#https://stackoverflow.com/questions/32890124/pandas-dataframe-running-sum-with-reset/32891081#32891081
prices['cum']= prices.groupby(['g'])['x_gain'].cumsum()
prices.head()

Unnamed: 0,date,open,close,gain,x_gain,pos_neg,streak,g,cum
0,1/21/2011,1283.63,1283.35,-0.28,-0.0002,neg,1,1,-0.0002
1,1/24/2011,1283.29,1290.84,7.55,0.0059,pos,1,2,0.0059
2,1/25/2011,1288.17,1291.18,3.01,0.0023,pos,2,2,0.0082
3,1/26/2011,1291.97,1296.63,4.66,0.0036,pos,3,2,0.0118
4,1/27/2011,1297.51,1299.54,2.03,0.0016,pos,4,2,0.0134


In [9]:
#assign to bins based on % Gain and streak

# use pd.cut instead?
# https://stackoverflow.com/questions/46472809/python-binning-based-on-2-columns-in-pandas

prices['bin'] = ''
for ind, row in prices.iterrows():
    if row['cum'] >= 0 and row['cum'] < 0.0025 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'A1'
    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'B1'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'C1'
    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'D1'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'E1'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'F1'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'G1'
    elif row['cum'] >= 0.0175 and row['cum'] < 0.0275 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'H1'
#    elif row['cum'] >= 0.0200 and row['cum'] < 0.0225 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'I1'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0275 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'J1'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'K1'
    elif row['cum'] >= 0.0275 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = 'L1'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'M1'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'N1'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'O1'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'P1'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'Q1'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 1:        
#            prices.loc[ind, 'bin'] = 'R1'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'S1'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'T1'
#    elif row['cum'] >= 0.0500 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = 'U1'
    elif row['cum'] > 0 and row['cum'] < 0.0025 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'A2'
    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'B2'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'C2'
    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'D2'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'E2'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'F2'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'G2'
    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'H2'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0250 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'I2'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'J2'
    elif row['cum'] >= 0.0250 and row['cum'] < 0.0300 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'K2'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'L2'
    elif row['cum'] >= 0.0300 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = 'M2'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'N2'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'O2'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'P2'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'Q2'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'R2'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'S2'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'T2'
#    elif row['cum'] >= 0.0500 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = 'U2'
    elif row['cum'] > 0 and row['cum'] < 0.0050 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'A3'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'B3'
    elif row['cum'] >= 0.0050 and row['cum'] < 0.0100 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'C3'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'D2'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0150 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'E3'
#    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'F3'
    elif row['cum'] >= 0.0150 and row['cum'] < 0.0200 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'G3'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'H3'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0250 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'I3'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'J3'
    elif row['cum'] >= 0.0250 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = 'K3'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'L3'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'M3'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'N3'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'O3'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'P3'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'Q3'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'R3'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'S3'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'T3'
#    elif row['cum'] >= 0.0500 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = 'U3'
    elif row['cum'] > 0 and row['cum'] < 0.0125 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = 'A4'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'B4'
#    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'C4'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'D4'
#    elif row['cum'] >= 0.0100 and row['cum'] < 0.0125 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'E4'
    elif row['cum'] >= 0.0125 and row['cum'] < 0.0200 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = 'F4'
#    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'G4'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'H4'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0275 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = 'I4'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'J4'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'K4'
    elif row['cum'] >= 0.0275 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = 'L4'
#    elif row['cum'] >= 0.0300 and row['cum'] < 0.0325 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'M4'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'N4'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'O4'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'P4'
#    elif row['cum'] >= 0.0400 and row['cum'] < 0.0425 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'Q4'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'R4'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'S4'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'T4'
#    elif row['cum'] >= 0.0500 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = 'U4'
    elif row['cum'] > 0 and row['cum'] < 0.0100 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'A5'
#    elif row['cum'] >= 0.0025 and row['cum'] < 0.0050 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'B5'
#    elif row['cum'] >= 0.0050 and row['cum'] < 0.0075 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'C2'
#    elif row['cum'] >= 0.0075 and row['cum'] < 0.0100 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'D5'
    elif row['cum'] >= 0.0100 and row['cum'] < 0.0200 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'E5'
#    elif row['cum'] >= 0.0125 and row['cum'] < 0.0150 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'F5'
#    elif row['cum'] >= 0.0150 and row['cum'] < 0.0175 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'G5'
#    elif row['cum'] >= 0.0175 and row['cum'] < 0.0200 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'H5'
    elif row['cum'] >= 0.0200 and row['cum'] < 0.0300 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'I5'
#    elif row['cum'] >= 0.0225 and row['cum'] < 0.0250 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'J5'
#    elif row['cum'] >= 0.0250 and row['cum'] < 0.0275 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'K5'
#    elif row['cum'] >= 0.0275 and row['cum'] < 0.0300 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'L5'
    elif row['cum'] >= 0.0300 and row['cum'] < 0.0400 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'M5'
#    elif row['cum'] >= 0.0325 and row['cum'] < 0.0350 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'N5'
#    elif row['cum'] >= 0.0350 and row['cum'] < 0.0375 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'O5'
#    elif row['cum'] >= 0.0375 and row['cum'] < 0.0400 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'P5'
    elif row['cum'] >= 0.0400 and row['cum'] < 0.0500 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = 'Q5'
#    elif row['cum'] >= 0.0425 and row['cum'] < 0.0450 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'R5'
#    elif row['cum'] >= 0.0450 and row['cum'] < 0.0475 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'S5'
#    elif row['cum'] >= 0.0475 and row['cum'] < 0.0500 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = 'T5'
    elif row['cum'] >= 0.0500 and row['streak'] >= 5:
            prices.loc[ind, 'bin'] = 'U5'
    elif row['cum'] >= -0.0025 and row['cum'] < 0.0000 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-A1'
    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-B1'
    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-C1'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-D1'
    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-E1'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-F1'
    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-G1'
    elif row['cum'] >= -0.0275 and row['cum'] < -0.0175 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-H1'
#    elif row['cum'] >= -0.0225 and row['cum'] < -0.0200 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-I1'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0225 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-J1'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-K1'
    elif row['cum'] < -0.0275 and row['streak'] == 1:
            prices.loc[ind, 'bin'] = '-L1'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-M1'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-N1'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-O1'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-P1'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-Q1'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-R1'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-S1'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-T1'
#    elif row['cum'] < -0.0500 and row['streak'] == 1:
#            prices.loc[ind, 'bin'] = '-U1'
    elif row['cum'] >= -0.0025 and row['cum'] < 0.0000 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-A2'
    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-B2'
    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-C2'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-D2'
    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-E2'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-F2'
    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-G2'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-H2'
    elif row['cum'] >= -0.0250 and row['cum'] < -0.0200 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-I2'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-J2'
    elif row['cum'] >= -0.0300 and row['cum'] < -0.0250 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-K2'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-L2'
    elif row['cum'] < -0.0300 and row['streak'] == 2:
            prices.loc[ind, 'bin'] = '-M2'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-N2'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-O2'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-P2'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-Q2'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-R2'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-S2'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-T2'
#    elif row['cum'] < -0.0500 and row['streak'] == 2:
#            prices.loc[ind, 'bin'] = '-U2'
    elif row['cum'] >= -0.0050 and row['cum'] < 0.0000 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-A3'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-B3'
    elif row['cum'] >= -0.0100 and row['cum'] < -0.0050 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-C3'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-D2'
    elif row['cum'] >= -0.0150 and row['cum'] < -0.0100 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-E3'
#    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-F3'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0150 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-G3'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-H3'
    elif row['cum'] >= -0.0250 and row['cum'] < -0.0200 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-I3'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-J3'
    elif row['cum'] < -0.0250 and row['streak'] == 3:
            prices.loc[ind, 'bin'] = '-K3'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-L3'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-M3'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-N3'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-O3'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-P3'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-Q3'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-R3'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-S3'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-T3'
#    elif row['cum'] < -0.0500 and row['streak'] == 3:
#            prices.loc[ind, 'bin'] = '-U3'
    elif row['cum'] >= -0.0125 and row['cum'] < 0.0000 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = '-A4'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-B4'
#    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-C4'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-D4'
#    elif row['cum'] >= -0.0125 and row['cum'] < -0.0100 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-E4'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0125 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = '-F4'
#    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-G4'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-H4'
    elif row['cum'] >= -0.0275 and row['cum'] < -0.0200 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = '-I4'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-J4'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-K4'
    elif row['cum'] < -0.0275 and row['streak'] == 4:
            prices.loc[ind, 'bin'] = '-L4'
#    elif row['cum'] >= -0.0325 and row['cum'] < -0.0300 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-M4'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-N4'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-O4'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-P4'
#    elif row['cum'] >= -0.0425 and row['cum'] < -0.0400 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-Q4'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-R4'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-S4'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-T4'
#    elif row['cum'] < -0.0500 and row['streak'] == 4:
#            prices.loc[ind, 'bin'] = '-U4'
    elif row['cum'] >= -0.0100 and row['cum'] < 0.0000 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-A5'
#    elif row['cum'] >= -0.0050 and row['cum'] < -0.0025 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-B5'
#    elif row['cum'] >= -0.0075 and row['cum'] < -0.0050 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-C2'
#    elif row['cum'] >= -0.0100 and row['cum'] < -0.0075 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-D5'
    elif row['cum'] >= -0.0200 and row['cum'] < -0.0100 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-E5'
#    elif row['cum'] >= -0.0150 and row['cum'] < -0.0125 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-F5'
#    elif row['cum'] >= -0.0175 and row['cum'] < -0.0150 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-G5'
#    elif row['cum'] >= -0.0200 and row['cum'] < -0.0175 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-H5'
    elif row['cum'] >= -0.0300 and row['cum'] < -0.0200 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-I5'
#    elif row['cum'] >= -0.0250 and row['cum'] < -0.0225 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-J5'
#    elif row['cum'] >= -0.0275 and row['cum'] < -0.0250 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-K5'
#    elif row['cum'] >= -0.0300 and row['cum'] < -0.0275 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-L5'
    elif row['cum'] >= -0.0400 and row['cum'] < -0.0300 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-M5'
#    elif row['cum'] >= -0.0350 and row['cum'] < -0.0325 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-N5'
#    elif row['cum'] >= -0.0375 and row['cum'] < -0.0350 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-O5'
#    elif row['cum'] >= -0.0400 and row['cum'] < -0.0375 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-P5'
    elif row['cum'] >= -0.0500 and row['cum'] < -0.0400 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-Q5'
#    elif row['cum'] >= -0.0450 and row['cum'] < -0.0425 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-R5'
#    elif row['cum'] >= -0.0475 and row['cum'] < -0.0450 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-S5'
#    elif row['cum'] >= -0.0500 and row['cum'] < -0.0475 and row['streak'] == 5:
#            prices.loc[ind, 'bin'] = '-T5'
    elif row['cum'] < -0.0500 and row['streak'] == 5:
            prices.loc[ind, 'bin'] = '-U5'
    elif row['cum'] < 0.0500 and row['streak'] > 5:
            prices.loc[ind, 'bin'] = 'V6'
    elif row['cum'] >= 0.0500 and row['streak'] > 5:
            prices.loc[ind, 'bin'] = 'W6'
    elif row['cum'] > -0.0500 and row['streak'] > 5:
            prices.loc[ind, 'bin'] = '-V6'
    elif row['cum'] <= -0.0500 and row['streak'] > 5:
            prices.loc[ind, 'bin'] = '-W6'
    else:
        prices.loc[ind, 'bin'] ='z'
prices.head()

Unnamed: 0,date,open,close,gain,x_gain,pos_neg,streak,g,cum,bin
0,1/21/2011,1283.63,1283.35,-0.28,-0.0002,neg,1,1,-0.0002,-A1
1,1/24/2011,1283.29,1290.84,7.55,0.0059,pos,1,2,0.0059,C1
2,1/25/2011,1288.17,1291.18,3.01,0.0023,pos,2,2,0.0082,D2
3,1/26/2011,1291.97,1296.63,4.66,0.0036,pos,3,2,0.0118,E3
4,1/27/2011,1297.51,1299.54,2.03,0.0016,pos,4,2,0.0134,F4


In [None]:
#Make sure all rows are assigned to a bin
z = prices.loc[prices['bin'] == 'z']
z

In [10]:
#new column to shift cum values up one row
#df['gdp'] = df['gdp'].shift(-1)
prices['next_day'] = prices['cum'].shift(-1)
prices['p_n'] = prices['pos_neg'].shift(-1)

#drop 'g'
prices = prices.drop(columns=['g'])
prices.head()

Unnamed: 0,date,open,close,gain,x_gain,pos_neg,streak,cum,bin,next_day,p_n
0,1/21/2011,1283.63,1283.35,-0.28,-0.0002,neg,1,-0.0002,-A1,0.0059,pos
1,1/24/2011,1283.29,1290.84,7.55,0.0059,pos,1,0.0059,C1,0.0082,pos
2,1/25/2011,1288.17,1291.18,3.01,0.0023,pos,2,0.0082,D2,0.0118,pos
3,1/26/2011,1291.97,1296.63,4.66,0.0036,pos,3,0.0118,E3,0.0134,pos
4,1/27/2011,1297.51,1299.54,2.03,0.0016,pos,4,0.0134,F4,-0.0179,neg


In [11]:
#get average total return for next day, then convert to df
#df.groupby('Column1')['Column2'].mean()

df_3 = prices.groupby('bin')['next_day'].mean().to_frame().reset_index()
df_3 = df_3.rename(columns = {'next_day':'next_day_avg'})
df_3.head()

Unnamed: 0,bin,next_day_avg
0,-A1,-0.0001
1,-A2,8.3e-05
2,-A3,-0.001631
3,-A4,-0.002575
4,-A5,0.00425


In [12]:
df_3.loc[df_3['bin'] == 'F4']

Unnamed: 0,bin,next_day_avg
54,F4,0.005778


In [13]:
#sort by bin and p_n, then get counts and %

g = prices.groupby('bin')['p_n']
df_1 = pd.concat([g.value_counts(), 
                g.value_counts(normalize=True).mul(100)],axis=1, keys=('counts','percentage')).reset_index()

df_1.head()

Unnamed: 0,bin,p_n,counts,percentage
0,-A1,pos,135,53.149606
1,-A1,neg,119,46.850394
2,-A2,pos,27,58.695652
3,-A2,neg,19,41.304348
4,-A3,pos,9,56.25


In [14]:
#get average next day return each bin

df_2 = prices.groupby(['bin', 'p_n'])['next_day'].mean().reset_index()
df_2.head()

Unnamed: 0,bin,p_n,next_day
0,-A1,neg,-0.005568
1,-A1,pos,0.00472
2,-A2,neg,-0.005026
3,-A2,pos,0.003678
4,-A3,neg,-0.006729


In [15]:
#merge df_1 and df_2
df_4 = pd.merge(df_1, df_2, on = ['bin', 'p_n'], how = 'inner')
df_4.head()

Unnamed: 0,bin,p_n,counts,percentage,next_day
0,-A1,pos,135,53.149606,0.00472
1,-A1,neg,119,46.850394,-0.005568
2,-A2,pos,27,58.695652,0.003678
3,-A2,neg,19,41.304348,-0.005026
4,-A3,pos,9,56.25,0.002333


In [16]:
#merge df_4 and df_3
perf = pd.merge(df_4, df_3, on = 'bin', how = 'left')
perf.head()

Unnamed: 0,bin,p_n,counts,percentage,next_day,next_day_avg
0,-A1,pos,135,53.149606,0.00472,-0.0001
1,-A1,neg,119,46.850394,-0.005568,-0.0001
2,-A2,pos,27,58.695652,0.003678,8.3e-05
3,-A2,neg,19,41.304348,-0.005026,8.3e-05
4,-A3,pos,9,56.25,0.002333,-0.001631


In [None]:
z1 = perf[perf.bin.str.endswith('5')]
z1 = z1.loc[z1['p_n'] == 'pos']
z1.head()

In [None]:
plt.bar('bin', 'percentage', data = z1)
plt.hlines(y=50, xmin = -1, xmax = 10, color = 'orange')
plt.xlim(-1, 10)
plt.xticks(rotation = 70)
plt.title('Next Day Return with 5-or-More-Day Streak')
plt.xlabel('Previous Day(s) Activity')
plt.ylabel('%');

## Test year 2020 based on previous year

In [17]:
#Create new df by merging prices and df_3. Sort by date and reset index. Drop columns.
prices['date'] =pd.to_datetime(prices.date)
predict =  pd.merge(prices, df_3, on = 'bin', how = 'inner').sort_values(by=['date'], ascending=True).reset_index()
predict = predict.drop(columns=['index','open','gain'])
predict.head()

Unnamed: 0,date,close,x_gain,pos_neg,streak,cum,bin,next_day,p_n,next_day_avg
0,2011-01-21,1283.35,-0.0002,neg,1,-0.0002,-A1,0.0059,pos,-0.0001
1,2011-01-24,1290.84,0.0059,pos,1,0.0059,C1,0.0082,pos,0.001283
2,2011-01-25,1291.18,0.0023,pos,2,0.0082,D2,0.0118,pos,0.006102
3,2011-01-26,1296.63,0.0036,pos,3,0.0118,E3,0.0134,pos,0.003649
4,2011-01-27,1299.54,0.0016,pos,4,0.0134,F4,-0.0179,neg,0.005778


In [18]:
#Use lambda function to add buy/sell column
predict['buy_sell'] = predict['next_day_avg'].apply(lambda x: float(1) if x > 0 else float(-1))
predict.head()

Unnamed: 0,date,close,x_gain,pos_neg,streak,cum,bin,next_day,p_n,next_day_avg,buy_sell
0,2011-01-21,1283.35,-0.0002,neg,1,-0.0002,-A1,0.0059,pos,-0.0001,-1.0
1,2011-01-24,1290.84,0.0059,pos,1,0.0059,C1,0.0082,pos,0.001283,1.0
2,2011-01-25,1291.18,0.0023,pos,2,0.0082,D2,0.0118,pos,0.006102,1.0
3,2011-01-26,1296.63,0.0036,pos,3,0.0118,E3,0.0134,pos,0.003649,1.0
4,2011-01-27,1299.54,0.0016,pos,4,0.0134,F4,-0.0179,neg,0.005778,1.0


In [19]:
#df['PROFIT']=df['PROFIT'].fillna(df.RATIO.shift().add(1).iloc[2:].cumprod()*20000)
predict['values'] = predict['next_day'] * predict['buy_sell']
predict['PROFIT']=predict['values'].shift().add(1).iloc[2:].cumprod()*1000
predict = predict.drop(columns=['values'])
predict.head()

Unnamed: 0,date,close,x_gain,pos_neg,streak,cum,bin,next_day,p_n,next_day_avg,buy_sell,PROFIT
0,2011-01-21,1283.35,-0.0002,neg,1,-0.0002,-A1,0.0059,pos,-0.0001,-1.0,
1,2011-01-24,1290.84,0.0059,pos,1,0.0059,C1,0.0082,pos,0.001283,1.0,
2,2011-01-25,1291.18,0.0023,pos,2,0.0082,D2,0.0118,pos,0.006102,1.0,1008.2
3,2011-01-26,1296.63,0.0036,pos,3,0.0118,E3,0.0134,pos,0.003649,1.0,1020.09676
4,2011-01-27,1299.54,0.0016,pos,4,0.0134,F4,-0.0179,neg,0.005778,1.0,1033.766057


In [None]:


predict['running_total'] = 1000.0
predict['values'] = predict['next_day'] * predict['buy_sell']

for ind, row in predict.iterrows():
    predict['running_total'] += (predict['running_total'] * predict['values'])
#    if row['next_day'] > 0.0 and row['buy_sell'] > 0':
#            predict['running_total'] = (predict['next_day'] + predict['running_total'])
#    elif row['next_day'] > 0.0 and row['buy_sell'] < 0:
#        predict['running_total'] = (predict['next_day'] * predict['running_total'])
#    elif row['next_day'] < 0.0 and row['buy_sell'] == 'buy':
#            predict['running_total'] = (predict['next_day'] * predict['running_total'])
#    else:
#        predict['running_total'] = (predict['next_day'] * predict['running_total'])

#predict = predict.drop(columns=['values'])
predict.head(10)

## Steps not needed currently

In [None]:
#find highest cummulative sums
#https://stackoverflow.com/questions/47924400/python-pandas-assign-last-value-of-dataframe-group-to-all-entries-of-that-group

prices['b_new'] = prices.groupby('g')['des'].transform('last')
prices.head()

In [None]:
#only keep highest cummulatives sums
prices['new_col'] = ''
for ind, row in prices.iterrows():
    if row['des'] == row['b_new']:
            prices.loc[ind, 'new_col'] = row['des']
    else:
        prices.loc[ind, 'new_col'] = ''
prices.head()

In [None]:
#drop unnecessary columns
prices = prices.drop(columns=['pos_neg','des','b_new','g'])
prices.head(25)

In [None]:
#could be helpful?

period = 5
prices['less_than_zero'] = (prices['% Gain']
                        .rolling(window=period, min_periods=period)
                        .agg(lambda x: (x < 0).sum()))

prices['greater_than_zero'] = (prices['% Gain']
                          .rolling(window=period,min_periods=period)
                          .agg(lambda x: (x > 0).sum()))
prices.head(25)