# Random forest

A supervised machine learning model that uses multiple decision trees in aggregate to predict the direction of the stock movements

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pandas_datareader import DataReader as pdr
from datetime import datetime
from dateutil.relativedelta import relativedelta

%matplotlib inline

In [2]:
symbols = ['BILI', 'PDD', 'JPM', 'WMT', 'AAPL']

# define the timeframe 
end = datetime.now()
start = end - relativedelta(years=2)

# retive data from yahoo finance
stocks = pd.DataFrame()

stocks = pdr(symbols, data_source='yahoo', start=start.date(), end=end.date())

In [3]:
# calculate the change in price for each stock
for symbol in stocks.columns.get_level_values(1)[:5]:
    stocks['price_change', symbol] = stocks['Close'][symbol].diff()

stocks.dropna(inplace=True)
stocks

Attributes,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Close,Close,Close,Close,Close,...,Volume,Volume,Volume,Volume,Volume,price_change,price_change,price_change,price_change,price_change
Symbols,BILI,PDD,JPM,WMT,AAPL,BILI,PDD,JPM,WMT,AAPL,...,BILI,PDD,JPM,WMT,AAPL,BILI,PDD,JPM,WMT,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2019-05-22,14.690000,20.459999,103.975410,98.809914,44.853859,14.690000,20.459999,110.820000,102.230003,45.695000,...,2483000.0,10456400.0,7918900.0,7340900.0,118994400.0,-0.080001,-1.060001,-0.910004,1.110001,-0.955002
2019-05-23,14.320000,20.530001,101.930054,98.452286,44.088215,14.320000,20.530001,108.639999,101.860001,44.915001,...,4847400.0,8147200.0,14558000.0,6454600.0,146118800.0,-0.370000,0.070002,-2.180000,-0.370003,-0.779999
2019-05-24,14.050000,20.280001,102.933960,99.235184,43.918888,14.050000,20.280001,109.709999,102.669998,44.742500,...,2452900.0,3124200.0,8537300.0,4483200.0,94858800.0,-0.270000,-0.250000,1.070000,0.809998,-0.172501
2019-05-28,14.010000,20.230000,101.817474,98.993553,43.737293,14.010000,20.230000,108.519997,102.419998,44.557499,...,3617400.0,7613600.0,11665500.0,10271700.0,111792800.0,-0.040000,-0.050001,-1.190002,-0.250000,-0.185001
2019-05-29,13.780000,19.980000,101.536003,98.703583,43.528706,13.780000,19.980000,108.220001,102.120003,44.345001,...,3338000.0,5740300.0,9966400.0,6520200.0,113924800.0,-0.230000,-0.250000,-0.299995,-0.299995,-0.212498
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-17,98.239998,118.019997,164.669998,138.889999,126.269997,98.239998,118.019997,164.669998,138.889999,126.269997,...,4564300.0,4150800.0,9886800.0,6558600.0,74244600.0,1.369995,-0.310005,0.660004,-0.630005,-1.180000
2021-05-18,101.059998,122.239998,162.350006,141.910004,124.849998,101.059998,122.239998,162.350006,141.910004,124.849998,...,3787500.0,5426300.0,11098100.0,19394600.0,63342900.0,2.820000,4.220001,-2.319992,3.020004,-1.419998
2021-05-19,102.459999,127.169998,161.110001,141.919998,124.690002,102.459999,127.169998,161.110001,141.919998,124.690002,...,2870400.0,5542700.0,13278900.0,11624900.0,92612000.0,1.400002,4.930000,-1.240005,0.009995,-0.159996
2021-05-20,102.800003,131.000000,160.830002,142.419998,127.309998,102.800003,131.000000,160.830002,142.419998,127.309998,...,5083100.0,7067300.0,10423500.0,10178700.0,76756000.0,0.340004,3.830002,-0.279999,0.500000,2.619995


## RSI indicator

Use the Relative Strength Index (RSI) indicator to interpret whether the stock has been overbought or oversold. Gerneally, RSI is ranging from 0 to 100, and an RSI above 70 indicates the stock was overvalued and an RSI below 30 would imply that it is undervalued.

In [4]:
# calculate the 14 day RSI, which is a general standard
n = 14

# make a copy of up and down day twice
up_df, down_df = stocks['price_change'].copy(), stocks['price_change'].copy()

# if the price in up days is lower than 0 set it to 0
# if the price in down days is greater than 0 set it tp 0
for symbol in stocks.columns.get_level_values(1)[:5]:
    up_df.loc[(up_df.loc[:, symbol] < 0), symbol] = 0
    down_df.loc[(down_df.loc[:, symbol] > 0), symbol] = 0

# convert the price change to absolute value
down_df = down_df.abs()

In [5]:
# calculate the Exponential Weighted Moving Average 
# in order to focus more on recent prices
ewma_up = up_df.ewm(span=n).mean()
ewma_down = down_df.ewm(span=n).mean()

# calculate the relative strength and RSI
relative_strength = ewma_up / ewma_down
relative_strength_index = 100 - 100 / (1 + relative_strength)

In [6]:
# append the down and up days and RSI into the original data frame
new_attrs = ['up_days', 'down_days', 'RSI']
attr_dfs = [up_df, down_df, relative_strength_index]

for attr, attr_df in zip(new_attrs, attr_dfs):
    for symbol in stocks.columns.get_level_values(1)[:5]:
        stocks[attr, symbol] = attr_df[symbol]

In [7]:
stocks

Attributes,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Close,Close,Close,Close,Close,...,down_days,down_days,down_days,down_days,down_days,RSI,RSI,RSI,RSI,RSI
Symbols,BILI,PDD,JPM,WMT,AAPL,BILI,PDD,JPM,WMT,AAPL,...,BILI,PDD,JPM,WMT,AAPL,BILI,PDD,JPM,WMT,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2019-05-22,14.690000,20.459999,103.975410,98.809914,44.853859,14.690000,20.459999,110.820000,102.230003,45.695000,...,0.080001,1.060001,0.910004,0.000000,0.955002,0.000000,0.000000,0.000000,100.000000,0.000000
2019-05-23,14.320000,20.530001,101.930054,98.452286,44.088215,14.320000,20.530001,108.639999,101.860001,44.915001,...,0.370000,0.000000,2.180000,0.370003,0.779999,0.000000,7.080385,0.000000,72.222084,0.000000
2019-05-24,14.050000,20.280001,102.933960,99.235184,43.918888,14.050000,20.280001,109.709999,102.669998,44.742500,...,0.270000,0.250000,0.000000,0.000000,0.172501,0.000000,5.481161,29.372621,83.675983,0.000000
2019-05-28,14.010000,20.230000,101.817474,98.993553,43.737293,14.010000,20.230000,108.519997,102.419998,44.557499,...,0.040000,0.050001,1.190002,0.250000,0.185001,0.000000,5.209613,21.332043,72.961921,0.000000
2019-05-29,13.780000,19.980000,101.536003,98.703583,43.528706,13.780000,19.980000,108.220001,102.120003,44.345001,...,0.230000,0.250000,0.299995,0.299995,0.212498,0.000000,4.051612,19.758720,61.974648,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-17,98.239998,118.019997,164.669998,138.889999,126.269997,98.239998,118.019997,164.669998,138.889999,126.269997,...,0.000000,0.310005,0.000000,0.630005,1.180000,45.757666,32.368954,74.352166,47.582070,42.641157
2021-05-18,101.059998,122.239998,162.350006,141.910004,124.849998,101.059998,122.239998,162.350006,141.910004,124.849998,...,0.000000,0.000000,2.319992,0.000000,1.419998,51.017997,42.725389,61.987375,61.541970,38.037418
2021-05-19,102.459999,127.169998,161.110001,141.919998,124.690002,102.459999,127.169998,161.110001,141.919998,124.690002,...,0.000000,0.000000,1.240005,0.000000,0.159996,53.595855,52.525048,56.221315,61.581040,37.510905
2021-05-20,102.800003,131.000000,160.830002,142.419998,127.309998,102.800003,131.000000,160.830002,142.419998,127.309998,...,0.000000,0.000000,0.279999,0.000000,0.000000,54.270265,58.838161,54.890991,63.709253,50.465990


## Stochastic Oscillator
It measures the level of closing price relative to the low-high range changing over the period of time

In [16]:
n = 14

low_14, high_14 = stocks['Low'].copy(), stocks['High'].copy()

# calculate the stochastic oscillator
low_14 = low_14.rolling(window=n).min()
high_14 = high_14.rolling(window=n).max()

k_percent = 100 * (stocks['Close'] - low_14) / (high_14 - low_14)

Symbols,BILI,PDD,JPM,WMT,AAPL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-05-22,,,,,
2019-05-23,,,,,
2019-05-24,,,,,
2019-05-28,,,,,
2019-05-29,,,,,
...,...,...,...,...,...
2021-05-17,36.411459,17.770125,98.661652,44.842406,27.125470
2021-05-18,45.971294,32.311694,76.422202,69.727937,17.543841
2021-05-19,59.821135,54.458702,67.140725,69.841253,20.642986
2021-05-20,71.179894,73.301913,65.044925,75.510183,42.808751


In [17]:
# add the dataframe to the stocks 
new_attrs = ['low_14', 'high_14', 'k_percent']
attr_dfs = [low_14, high_14, k_percent]

for attr, attr_df in zip(new_attrs, attr_dfs):
    for symbol in stocks.columns.get_level_values(1)[:5]:
        stocks[attr, symbol] = attr_df[symbol]

## William %R
Similar to the Stochastic Oscillator. Ranging from -100 to 0. When its value is above -20, it indicates a sell signal and when its value is below -80, it indicates a buy signal.

In [18]:
r_percent = -100 * (high_14 - stocks['Close']) / (high_14 - low_14)

# add new dataframe to the stocks
for symbol in stocks.columns.get_level_values(1)[:5]:
    stocks['r_percent', symbol] = r_percent[symbol]

Attributes,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Close,Close,Close,Close,Close,...,k_percent,k_percent,k_percent,k_percent,k_percent,r_percent,r_percent,r_percent,r_percent,r_percent
Symbols,BILI,PDD,JPM,WMT,AAPL,BILI,PDD,JPM,WMT,AAPL,...,BILI,PDD,JPM,WMT,AAPL,BILI,PDD,JPM,WMT,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2019-05-22,14.69,20.459999,103.97541,98.809914,44.853859,14.69,20.459999,110.82,102.230003,45.695,...,,,,,,,,,,
2019-05-23,14.32,20.530001,101.930054,98.452286,44.088215,14.32,20.530001,108.639999,101.860001,44.915001,...,,,,,,,,,,
2019-05-24,14.05,20.280001,102.93396,99.235184,43.918888,14.05,20.280001,109.709999,102.669998,44.7425,...,,,,,,,,,,
2019-05-28,14.01,20.23,101.817474,98.993553,43.737293,14.01,20.23,108.519997,102.419998,44.557499,...,,,,,,,,,,
2019-05-29,13.78,19.98,101.536003,98.703583,43.528706,13.78,19.98,108.220001,102.120003,44.345001,...,,,,,,,,,,
2019-05-30,13.89,19.65,100.447632,98.77124,43.754475,13.89,19.65,107.059998,102.190002,44.575001,...,,,,,,,,,,
2019-05-31,13.5,19.42,99.415573,98.046341,42.961842,13.5,19.42,105.959999,101.440002,43.767502,...,,,,,,,,,,
2019-06-03,13.48,19.030001,99.884697,98.548935,42.527481,13.48,19.030001,106.459999,101.959999,43.325001,...,,,,,,,,,,
2019-06-04,14.11,19.280001,102.962105,99.128876,44.083302,14.11,19.280001,109.739998,102.559998,44.91,...,,,,,,,,,,
2019-06-05,13.99,19.129999,103.328033,100.926628,44.794956,13.99,19.129999,110.129997,104.419998,45.634998,...,,,,,,,,,,
