In [13]:
import pandas_datareader as pdr
import datetime as dt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier


ticker = "CI" 
data = pdr.get_data_yahoo(ticker, dt.datetime(2017,1,1), dt.datetime.now(), interval='d')

# Calculate the EMA10 > EMA30 signal
ema10 = data['Close'].ewm(span=10).mean()
ema30 = data['Close'].ewm(span=30).mean()
data['EMA10gtEMA30'] = np.where(ema10 > ema30, 1, -1)

# Calculate where Close is > EMA10
data['ClGtEMA10'] = np.where(data['Close'] > ema10, 1, -1)
# Calculate the MACD signal
exp1 = data['Close'].ewm(span=12).mean()
exp2 = data['Close'].ewm(span=26).mean()
macd = exp1 - exp2
macd_signal = macd.ewm(span=9).mean()
data['MACD'] = macd_signal - macd

# Calculate RSI
delta = data['Close'].diff()
up = delta.clip(lower=0)
down = -1*delta.clip(upper=0)
ema_up = up.ewm(com=13, adjust=False).mean()
ema_down = down.ewm(com=13, adjust=False).mean()
rs = ema_up/ema_down
data['RSI'] = 100 - (100/(1 + rs))

# Stochastic Oscillator
high14= data['High'].rolling(14).max()
low14 = data['Low'].rolling(14).min()
data['%K'] = (data['Close'] - low14)*100/(high14 - low14)

# Williams Percentage Range
data['%R'] = -100*(high14 - data['Close'])/(high14 - low14)
days = 6

# Price Rate of Change
ct_n = data['Close'].shift(days)
data['PROC'] = (data['Close'] - ct_n)/ct_n
print(data)

# Set class labels to classify
data['Return'] = data['Close'].pct_change(1).shift(-1)
data['class'] = np.where(data['Return'] > 0, 1, 0)

# Clean for NAN rows
data = data.dropna()

# Minimize dataset
data = data.iloc[-200:]

# Data to predict
predictors = ['EMA10gtEMA30', 'ClGtEMA10', 'MACD', 'RSI', '%K', '%R', 'PROC']
X = data[predictors]
y = data['class']

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

# Train the model
rfc = RandomForestClassifier(random_state=0)
rfc = rfc.fit(X_train, y_train)

# Test the model by doing some predictions
y_pred = rfc.predict(X_test)

# See how accurate the predictions are
report = classification_report(y_test, y_pred)
print('Model accuracy', accuracy_score(y_test, y_pred, normalize=True))
print(report)

                  High         Low        Open       Close     Volume  \
Date                                                                    
2017-01-03  135.830002  133.660004  133.910004  135.720001  1443000.0   
2017-01-04  138.889999  133.699997  136.080002  138.720001  1207800.0   
2017-01-05  142.770004  138.330002  139.080002  141.710007  1446700.0   
2017-01-06  143.149994  140.990005  141.000000  141.809998   983700.0   
2017-01-09  142.610001  140.050003  141.309998  142.350006   986200.0   
...                ...         ...         ...         ...        ...   
2022-06-13  249.800003  241.149994  248.880005  242.229996  2242000.0   
2022-06-14  246.369995  240.110001  242.149994  245.710007  2205400.0   
2022-06-15  249.710007  243.960007  247.050003  247.100006  1643400.0   
2022-06-16  248.520004  240.179993  247.289993  248.300003  1904200.0   
2022-06-17  251.820007  241.539993  250.169998  244.520004  3822900.0   

             Adj Close  EMA10gtEMA30  ClGtEMA10   