## Can recent performance can predict stock movements?

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime

%matplotlib inline

In [2]:
#import ^GSPC.csv file (from Yahoo Finance)
prices = pd.read_csv('../data/test.csv')
prices.head()

Unnamed: 0,Date,Open,High,Low,Close,Gain,% Gain
0,1/22/2018,2809.16,2833.03,2808.12,2832.97,23.81,0.0085
1,1/23/2018,2835.05,2842.24,2830.59,2839.13,4.08,0.0014
2,1/24/2018,2845.42,2852.97,2824.81,2837.54,-7.88,-0.0028
3,1/25/2018,2846.24,2848.56,2830.94,2839.25,-6.99,-0.0025
4,1/26/2018,2847.48,2872.87,2846.18,2872.87,25.39,0.0089


In [3]:
#drop columns
prices = prices.drop(columns=['High','Low'])
prices.head()

Unnamed: 0,Date,Open,Close,Gain,% Gain
0,1/22/2018,2809.16,2832.97,23.81,0.0085
1,1/23/2018,2835.05,2839.13,4.08,0.0014
2,1/24/2018,2845.42,2837.54,-7.88,-0.0028
3,1/25/2018,2846.24,2839.25,-6.99,-0.0025
4,1/26/2018,2847.48,2872.87,25.39,0.0089


In [4]:
prices['pos_neg'] = ''
for ind, row in prices.iterrows():
    if row['% Gain'] > 0:
            prices.loc[ind, 'pos_neg'] = 'pos'
    else:
        prices.loc[ind, 'pos_neg'] = 'neg'
prices.head()

Unnamed: 0,Date,Open,Close,Gain,% Gain,pos_neg
0,1/22/2018,2809.16,2832.97,23.81,0.0085,pos
1,1/23/2018,2835.05,2839.13,4.08,0.0014,pos
2,1/24/2018,2845.42,2837.54,-7.88,-0.0028,neg
3,1/25/2018,2846.24,2839.25,-6.99,-0.0025,neg
4,1/26/2018,2847.48,2872.87,25.39,0.0089,pos


In [5]:
#Assign to groups when pos_neg changes
#https://stackoverflow.com/questions/60334671/pandas-dataframe-how-to-find-consecutive-rows-that-meet-some-conditions
prices['g'] = prices['pos_neg'].ne(prices['pos_neg'].shift()).cumsum()
prices.head()

Unnamed: 0,Date,Open,Close,Gain,% Gain,pos_neg,g
0,1/22/2018,2809.16,2832.97,23.81,0.0085,pos,1
1,1/23/2018,2835.05,2839.13,4.08,0.0014,pos,1
2,1/24/2018,2845.42,2837.54,-7.88,-0.0028,neg,2
3,1/25/2018,2846.24,2839.25,-6.99,-0.0025,neg,2
4,1/26/2018,2847.48,2872.87,25.39,0.0089,pos,3


In [6]:
#Use cumsum to find cummulative sums
#https://stackoverflow.com/questions/32890124/pandas-dataframe-running-sum-with-reset/32891081#32891081
prices['des']= prices.groupby(['g'])['% Gain'].cumsum()
prices.head()

Unnamed: 0,Date,Open,Close,Gain,% Gain,pos_neg,g,des
0,1/22/2018,2809.16,2832.97,23.81,0.0085,pos,1,0.0085
1,1/23/2018,2835.05,2839.13,4.08,0.0014,pos,1,0.0099
2,1/24/2018,2845.42,2837.54,-7.88,-0.0028,neg,2,-0.0028
3,1/25/2018,2846.24,2839.25,-6.99,-0.0025,neg,2,-0.0053
4,1/26/2018,2847.48,2872.87,25.39,0.0089,pos,3,0.0089


In [7]:
#find highest cummulative sums
prices['b_new'] = prices.groupby('g')['des'].transform('last')
prices.head()

Unnamed: 0,Date,Open,Close,Gain,% Gain,pos_neg,g,des,b_new
0,1/22/2018,2809.16,2832.97,23.81,0.0085,pos,1,0.0085,0.0099
1,1/23/2018,2835.05,2839.13,4.08,0.0014,pos,1,0.0099,0.0099
2,1/24/2018,2845.42,2837.54,-7.88,-0.0028,neg,2,-0.0028,-0.0053
3,1/25/2018,2846.24,2839.25,-6.99,-0.0025,neg,2,-0.0053,-0.0053
4,1/26/2018,2847.48,2872.87,25.39,0.0089,pos,3,0.0089,0.0089


In [8]:
#only keep highest cummulatives sums
prices['new_col'] = ''
for ind, row in prices.iterrows():
    if row['des'] == row['b_new']:
            prices.loc[ind, 'new_col'] = row['des']
    else:
        prices.loc[ind, 'new_col'] = ''
prices.head()

Unnamed: 0,Date,Open,Close,Gain,% Gain,pos_neg,g,des,b_new,new_col
0,1/22/2018,2809.16,2832.97,23.81,0.0085,pos,1,0.0085,0.0099,
1,1/23/2018,2835.05,2839.13,4.08,0.0014,pos,1,0.0099,0.0099,0.0099
2,1/24/2018,2845.42,2837.54,-7.88,-0.0028,neg,2,-0.0028,-0.0053,
3,1/25/2018,2846.24,2839.25,-6.99,-0.0025,neg,2,-0.0053,-0.0053,-0.0053
4,1/26/2018,2847.48,2872.87,25.39,0.0089,pos,3,0.0089,0.0089,0.0089


In [9]:
#drop unnecessary columns
prices = prices.drop(columns=['pos_neg','des','b_new','g'])
prices.head(25)

Unnamed: 0,Date,Open,Close,Gain,% Gain,new_col
0,1/22/2018,2809.16,2832.97,23.81,0.0085,
1,1/23/2018,2835.05,2839.13,4.08,0.0014,0.0099
2,1/24/2018,2845.42,2837.54,-7.88,-0.0028,
3,1/25/2018,2846.24,2839.25,-6.99,-0.0025,-0.0053
4,1/26/2018,2847.48,2872.87,25.39,0.0089,0.0089
5,1/29/2018,2867.23,2853.53,-13.7,-0.0048,
6,1/30/2018,2832.74,2822.43,-10.31,-0.0036,
7,1/31/2018,2832.41,2823.81,-8.6,-0.003,-0.0114
8,2/1/2018,2816.45,2821.98,5.53,0.002,0.002
9,2/2/2018,2808.92,2762.13,-46.79,-0.0167,


In [None]:
period = 5
prices['less_than_zero'] = (prices['% Gain']
                        .rolling(window=period, min_periods=period)
                        .agg(lambda x: (x < 0).sum()))

prices['greater_than_zero'] = (prices['% Gain']
                          .rolling(window=period,min_periods=period)
                          .agg(lambda x: (x > 0).sum()))
prices.head(25)