In [2]:
import pandas as pd

In [None]:
df = pd.read_csv('stock_price.csv', index_col=False)

# Stock Indicators

## Trend Indicators
(lagging) which analyze whether a market is moving up, down, or sideways over time

### SMA
Simple Moving Average calculates the average of a selected range of prices, usually closing prices, by the number of periods (here is 20 days) in that range.  
[mathematical reference](http://www.fmlabs.com/reference/default.htm?url=SimpleMA.htm)  
[pandas.DataFrame.rolling](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rolling.html)

In [134]:
def SMA(df, time_period=20):
    indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=time_period)
    sma = df['Close'].rolling(window=indexer, min_periods=time_period).mean()
    df['SMA_' + str(time_period)] = sma
    return df

In [135]:
df = SMA(df, 20)
df

Unnamed: 0,Date,Close,Open,High,Low,Vol,Change_%,SMA_20
0,"Dec 30, 2020",124.34,123.74,124.82,123.63,3380494,0.44,124.8410
1,"Dec 29, 2020",123.80,125.25,125.48,123.25,3487007,-0.82,124.7820
2,"Dec 28, 2020",124.82,125.12,126.57,124.46,3615222,0.10,124.7680
3,"Dec 24, 2020",124.69,125.00,125.10,124.21,1761122,0.64,124.7445
4,"Dec 23, 2020",123.90,123.93,125.16,123.89,2693889,0.23,124.7200
...,...,...,...,...,...,...,...,...
4995,"Feb 22, 2001",108.90,106.50,110.90,103.38,11709500,1.29,
4996,"Feb 21, 2001",107.51,109.55,111.50,107.29,9158200,-3.58,
4997,"Feb 20, 2001",111.50,114.55,115.60,110.78,6748300,-3.04,
4998,"Feb 16, 2001",115.00,114.55,115.75,113.94,7073000,-1.52,


### EMA
Exponential Moving Average like the SMA but it places a greater weight and significance on the most recent data points  
[mathematical reference](https://www.fmlabs.com/reference/default.htm?url=ExpMA.htm)  
[pandas.DataFrame.ewm](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.ewm.html)

In [136]:
def exponential_moving_average(df, time_period=20):
    EMA = df['Close'].iloc[::-1].ewm(span=time_period, min_periods=time_period, adjust=False).mean()
    df['EMA_' + str(time_period)] = EMA.iloc[::-1]
    return df

In [137]:
df = exponential_moving_average(df, 20)
df

Unnamed: 0,Date,Close,Open,High,Low,Vol,Change_%,SMA_20,EMA_20
0,"Dec 30, 2020",124.34,123.74,124.82,123.63,3380494,0.44,124.8410,123.945514
1,"Dec 29, 2020",123.80,125.25,125.48,123.25,3487007,-0.82,124.7820,123.903989
2,"Dec 28, 2020",124.82,125.12,126.57,124.46,3615222,0.10,124.7680,123.914935
3,"Dec 24, 2020",124.69,125.00,125.10,124.21,1761122,0.64,124.7445,123.819665
4,"Dec 23, 2020",123.90,123.93,125.16,123.89,2693889,0.23,124.7200,123.728051
...,...,...,...,...,...,...,...,...,...
4995,"Feb 22, 2001",108.90,106.50,110.90,103.38,11709500,1.29,,
4996,"Feb 21, 2001",107.51,109.55,111.50,107.29,9158200,-3.58,,
4997,"Feb 20, 2001",111.50,114.55,115.60,110.78,6748300,-3.04,,
4998,"Feb 16, 2001",115.00,114.55,115.75,113.94,7073000,-1.52,,


## Mean Reversion Indicators
(lagging) which measure how far a price swing will stretch before a counter impulse triggers a retracement

### Bollinger Bands
Bollinger Bands defined by a set of trendlines plotted two standard deviations (positively and negatively) away from a simple moving average (SMA) of a security's price. The upper and lower bands are typically 2 standard deviations +/- from a 20-day simple moving average  
[mathematical reference](https://www.fmlabs.com/reference/default.htm?url=Bollinger.htm)  
[implement reference](https://quant.stackexchange.com/a/31905)   

In [138]:
def bollinger_bands(df, time_period=20, n_std=2):
    indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=time_period)
    rolling_mean = df['Close'].rolling(window=indexer, min_periods=time_period).mean()
    rolling_std  = df['Close'].rolling(window=indexer, min_periods=time_period).std()
    upper_band = rolling_mean + (rolling_std*n_std)
    lower_band = rolling_mean - (rolling_std*n_std)
    df['BB_upper_' + str(time_period)] = upper_band
    df['BB_lower_' + str(time_period)] = lower_band
    return df

In [139]:
df = bollinger_bands(df, 20, 2)
df

Unnamed: 0,Date,Close,Open,High,Low,Vol,Change_%,SMA_20,EMA_20,BB_upper_20,BB_lower_20
0,"Dec 30, 2020",124.34,123.74,124.82,123.63,3380494,0.44,124.8410,123.945514,127.033580,122.648420
1,"Dec 29, 2020",123.80,125.25,125.48,123.25,3487007,-0.82,124.7820,123.903989,127.091720,122.472280
2,"Dec 28, 2020",124.82,125.12,126.57,124.46,3615222,0.10,124.7680,123.914935,127.106003,122.429997
3,"Dec 24, 2020",124.69,125.00,125.10,124.21,1761122,0.64,124.7445,123.819665,127.089739,122.399261
4,"Dec 23, 2020",123.90,123.93,125.16,123.89,2693889,0.23,124.7200,123.728051,127.077840,122.362160
...,...,...,...,...,...,...,...,...,...,...,...
4995,"Feb 22, 2001",108.90,106.50,110.90,103.38,11709500,1.29,,,,
4996,"Feb 21, 2001",107.51,109.55,111.50,107.29,9158200,-3.58,,,,
4997,"Feb 20, 2001",111.50,114.55,115.60,110.78,6748300,-3.04,,,,
4998,"Feb 16, 2001",115.00,114.55,115.75,113.94,7073000,-1.52,,,,


## Volume Indicators
(leading or lagging) which calculate trades and quantify whether bulls (rising) or bears (falling) are in control.

### OBV
On-Balance Volume uses volume flow to predict changes in stock price  
[mathematical reference](https://www.fmlabs.com/reference/default.htm?url=OBV.htm)  
[implement reference](https://randerson112358.medium.com/stock-trading-strategy-using-on-balance-volume-obv-python-77a7c719cdac)

In [140]:
def obv(df):
    OBV = [0]
    for i in range(1, len(df.Close)):
        # If the closing price is above the prior close price 
        if df.Close[i] > df.Close[i-1]:
            #then: Current OBV = Previous OBV + Current Volume
            OBV.append(OBV[-1] + df.Vol[i])
        elif df.Close[i] < df.Close[i-1]:
            OBV.append(OBV[-1] - df.Vol[i])
        else:
            OBV.append(OBV[-1])
    OBV.pop(0)
    OBV.append(None)
    df['OBV'] = OBV
    return df

In [141]:
df = obv(df)
df

Unnamed: 0,Date,Close,Open,High,Low,Vol,Change_%,SMA_20,EMA_20,BB_upper_20,BB_lower_20,OBV
0,"Dec 30, 2020",124.34,123.74,124.82,123.63,3380494,0.44,124.8410,123.945514,127.033580,122.648420,-3487007.0
1,"Dec 29, 2020",123.80,125.25,125.48,123.25,3487007,-0.82,124.7820,123.903989,127.091720,122.472280,128215.0
2,"Dec 28, 2020",124.82,125.12,126.57,124.46,3615222,0.10,124.7680,123.914935,127.106003,122.429997,-1632907.0
3,"Dec 24, 2020",124.69,125.00,125.10,124.21,1761122,0.64,124.7445,123.819665,127.089739,122.399261,-4326796.0
4,"Dec 23, 2020",123.90,123.93,125.16,123.89,2693889,0.23,124.7200,123.728051,127.077840,122.362160,-8664553.0
...,...,...,...,...,...,...,...,...,...,...,...,...
4995,"Feb 22, 2001",108.90,106.50,110.90,103.38,11709500,1.29,,,,,-514875896.0
4996,"Feb 21, 2001",107.51,109.55,111.50,107.29,9158200,-3.58,,,,,-508127596.0
4997,"Feb 20, 2001",111.50,114.55,115.60,110.78,6748300,-3.04,,,,,-501054596.0
4998,"Feb 16, 2001",115.00,114.55,115.75,113.94,7073000,-1.52,,,,,-492955896.0


## Momentum Indicators
(leading) which evaluate the speed of price change over time

### MACD
Moving Average Convergence Divergence shows the relationship between two moving averages of a security’s price. The MACD is calculated by subtracting the 26-period EMA from the 12-period EMA. The result of that calculation is the MACD line. A 9 days EMA of the MACD called the "signal line"  
[mathematical reference](https://www.fmlabs.com/reference/default.htm?url=MACD.htm)  
[implement reference](https://github.com/Crypto-toolbox/pandas-technical-indicators/blob/master/technical_indicators.py#L219)

In [142]:
def macd(df, n_fast=12, n_slow=26):
    """Calculate MACD, MACD Signal and MACD difference
    :param df: pandas.DataFrame
    :param n_fast: 
    :param n_slow: 
    :return: pandas.DataFrame
    """
    EMAfast = df['Close'].iloc[::-1].ewm(span=n_fast, min_periods=n_fast, adjust=False).mean().iloc[::-1]
    EMAslow = df['Close'].iloc[::-1].ewm(span=n_slow, min_periods=n_slow, adjust=False).mean().iloc[::-1]
    MACD = EMAfast - EMAslow
    MACDsign = MACD.iloc[::-1].ewm(span=9, min_periods=9, adjust=False).mean().iloc[::-1]
    MACDdiff = MACD - MACDsign
    df['MACD_' + str(n_fast) + '_' + str(n_slow)] = MACD
    df['MACDsign_' + str(n_fast) + '_' + str(n_slow)] = MACDsign
    df['MACDdiff_' + str(n_fast) + '_' + str(n_slow)] = MACDdiff
    return df

In [143]:
df = macd(df, 12, 26)
df

Unnamed: 0,Date,Close,Open,High,Low,Vol,Change_%,SMA_20,EMA_20,BB_upper_20,BB_lower_20,OBV,MACD_12_26,MACDsign_12_26,MACDdiff_12_26
0,"Dec 30, 2020",124.34,123.74,124.82,123.63,3380494,0.44,124.8410,123.945514,127.033580,122.648420,-3487007.0,0.865134,1.227020,-0.361885
1,"Dec 29, 2020",123.80,125.25,125.48,123.25,3487007,-0.82,124.7820,123.903989,127.091720,122.472280,128215.0,0.937666,1.317491,-0.379825
2,"Dec 28, 2020",124.82,125.12,126.57,124.46,3615222,0.10,124.7680,123.914935,127.106003,122.429997,-1632907.0,1.071586,1.412447,-0.340861
3,"Dec 24, 2020",124.69,125.00,125.10,124.21,1761122,0.64,124.7445,123.819665,127.089739,122.399261,-4326796.0,1.123076,1.497662,-0.374587
4,"Dec 23, 2020",123.90,123.93,125.16,123.89,2693889,0.23,124.7200,123.728051,127.077840,122.362160,-8664553.0,1.185696,1.591309,-0.405613
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,"Feb 22, 2001",108.90,106.50,110.90,103.38,11709500,1.29,,,,,-514875896.0,,,
4996,"Feb 21, 2001",107.51,109.55,111.50,107.29,9158200,-3.58,,,,,-508127596.0,,,
4997,"Feb 20, 2001",111.50,114.55,115.60,110.78,6748300,-3.04,,,,,-501054596.0,,,
4998,"Feb 16, 2001",115.00,114.55,115.75,113.94,7073000,-1.52,,,,,-492955896.0,,,


## Relative Strength Indicators
(leading) measure oscillations in buying and selling pressure

### RSI
Relative Strength Index measures the magnitude of recent price changes to evaluate overbought or oversold conditions in the price of a stock. RSI is most typically used on a 14-day timeframe, measured on a scale from 0 to 100, with high and low levels marked at 70 and 30  
[mathematical reference](https://www.fmlabs.com/reference/default.htm?url=RSI.htm)  
[implement reference](https://tcoil.info/compute-rsi-for-stocks-with-python-relative-strength-index)

In [144]:
def RSI(df, time_period=14):
    # diff in one field(one day)
    diff = df['Close'].diff(1)

    #this preservers dimensions off diff values
    up_chg = 0 * diff
    down_chg = 0 * diff
    
    # up change is equal to the positive difference, otherwise equal to zero
    up_chg[diff > 0] = diff[diff > 0]
    
    # down change is equal to negative deifference, otherwise equal to zero
    down_chg[diff < 0] = diff[diff < 0]
    
    # values are related to exponential decay
    # we set com=time_window-1 so we get decay alpha=1/time_window
    up_chg_avg   = up_chg.iloc[::-1].ewm(com=time_period-1, min_periods=time_period, adjust=False).mean().iloc[::-1]
    down_chg_avg = down_chg.iloc[::-1].ewm(com=time_period-1, min_periods=time_period, adjust=False).mean().iloc[::-1]
    
    rs = abs(up_chg_avg/down_chg_avg)
    rsi = 100 - 100/(1 + rs)
    
    df['RSI_' + str(time_period)] = rsi
    return df

In [145]:
df = RSI(df, 14)
df

Unnamed: 0,Date,Close,Open,High,Low,Vol,Change_%,SMA_20,EMA_20,BB_upper_20,BB_lower_20,OBV,MACD_12_26,MACDsign_12_26,MACDdiff_12_26,RSI_14
0,"Dec 30, 2020",124.34,123.74,124.82,123.63,3380494,0.44,124.8410,123.945514,127.033580,122.648420,-3487007.0,0.865134,1.227020,-0.361885,46.736086
1,"Dec 29, 2020",123.80,125.25,125.48,123.25,3487007,-0.82,124.7820,123.903989,127.091720,122.472280,128215.0,0.937666,1.317491,-0.379825,46.736086
2,"Dec 28, 2020",124.82,125.12,126.57,124.46,3615222,0.10,124.7680,123.914935,127.106003,122.429997,-1632907.0,1.071586,1.412447,-0.340861,48.493620
3,"Dec 24, 2020",124.69,125.00,125.10,124.21,1761122,0.64,124.7445,123.819665,127.089739,122.399261,-4326796.0,1.123076,1.497662,-0.374587,44.856413
4,"Dec 23, 2020",123.90,123.93,125.16,123.89,2693889,0.23,124.7200,123.728051,127.077840,122.362160,-8664553.0,1.185696,1.591309,-0.405613,45.234450
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,"Feb 22, 2001",108.90,106.50,110.90,103.38,11709500,1.29,,,,,-514875896.0,,,,
4996,"Feb 21, 2001",107.51,109.55,111.50,107.29,9158200,-3.58,,,,,-508127596.0,,,,
4997,"Feb 20, 2001",111.50,114.55,115.60,110.78,6748300,-3.04,,,,,-501054596.0,,,,
4998,"Feb 16, 2001",115.00,114.55,115.75,113.94,7073000,-1.52,,,,,-492955896.0,,,,


### Our dataframe info

In [146]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Date            5000 non-null   object 
 1   Close           5000 non-null   float64
 2   Open            5000 non-null   float64
 3   High            5000 non-null   float64
 4   Low             5000 non-null   float64
 5   Vol             5000 non-null   int64  
 6   Change_%        5000 non-null   float64
 7   SMA_20          4981 non-null   float64
 8   EMA_20          4981 non-null   float64
 9   BB_upper_20     4981 non-null   float64
 10  BB_lower_20     4981 non-null   float64
 11  OBV             4999 non-null   float64
 12  MACD_12_26      4975 non-null   float64
 13  MACDsign_12_26  4967 non-null   float64
 14  MACDdiff_12_26  4967 non-null   float64
 15  RSI_14          4987 non-null   float64
dtypes: float64(14), int64(1), object(1)
memory usage: 625.1+ KB


In [147]:
df.to_csv('stock_price_with_indicators_nan.csv', index=False)

### Drop nan rows come from indicator calculations

In [148]:
df = df.dropna()
df

Unnamed: 0,Date,Close,Open,High,Low,Vol,Change_%,SMA_20,EMA_20,BB_upper_20,BB_lower_20,OBV,MACD_12_26,MACDsign_12_26,MACDdiff_12_26,RSI_14
0,"Dec 30, 2020",124.34,123.74,124.82,123.63,3380494,0.44,124.8410,123.945514,127.033580,122.648420,-3487007.0,0.865134,1.227020,-0.361885,46.736086
1,"Dec 29, 2020",123.80,125.25,125.48,123.25,3487007,-0.82,124.7820,123.903989,127.091720,122.472280,128215.0,0.937666,1.317491,-0.379825,46.736086
2,"Dec 28, 2020",124.82,125.12,126.57,124.46,3615222,0.10,124.7680,123.914935,127.106003,122.429997,-1632907.0,1.071586,1.412447,-0.340861,48.493620
3,"Dec 24, 2020",124.69,125.00,125.10,124.21,1761122,0.64,124.7445,123.819665,127.089739,122.399261,-4326796.0,1.123076,1.497662,-0.374587,44.856413
4,"Dec 23, 2020",123.90,123.93,125.16,123.89,2693889,0.23,124.7200,123.728051,127.077840,122.362160,-8664553.0,1.185696,1.591309,-0.405613,45.234450
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4962,"Apr 10, 2001",99.05,97.00,99.90,96.56,10501300,3.18,94.1000,96.501084,100.902176,87.297824,-458620396.0,-1.572502,-2.788255,1.215753,52.381700
4963,"Apr 09, 2001",96.00,98.50,98.74,93.80,9487700,-1.99,94.0670,96.232777,100.773798,87.360202,-445196196.0,-2.018154,-3.092193,1.074039,50.149056
4964,"Apr 06, 2001",97.95,98.00,100.00,96.40,13424200,-0.26,94.0415,96.257280,100.721176,87.361824,-432690096.0,-2.247067,-3.360703,1.113636,54.626714
4965,"Apr 05, 2001",98.21,95.80,100.00,95.51,12506100,6.75,94.1085,96.079099,100.977445,87.239555,-446205996.0,-2.705105,-3.639112,0.934007,52.086958


In [149]:
df.to_csv('stock_price_with_indicators.csv', index=False)