In [4]:
import numpy as np
import pandas as pd
import yfinance as yf
import datetime as dt
import math
import plotly
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

In [5]:
# start = dt.datetime.today()-dt.timedelta(10000)
# end = dt.datetime.today()
stock = yf.Ticker("NVDA")

In [6]:
# df = yf.download(ticker, start, end)
df = stock.history(period="max")
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-06-13,160.0,163.119995,156.059998,156.470001,60415300.0,0.0,0.0
2022-06-14,157.309998,160.660004,154.119995,158.360001,46968000.0,0.0,0.0
2022-06-15,160.949997,168.679993,159.259995,165.270004,56393600.0,0.0,0.0
2022-06-16,158.600006,159.419998,154.009995,156.009995,54574900.0,0.0,0.0
2022-06-17,156.479996,159.949997,153.279999,158.800003,62905700.0,0.0,0.0


In [7]:
def psar(df, iaf = 0.02, maxaf = 0.2):
    length = len(df)
    dates = list(df.index)
    high = list(df['High'])
    low = list(df['Low'])
    close = list(df['Close'])
    psar = close[0:len(close)]
    psarbull = [None] * length # Bullish signal - dot below candle
    psarbear = [None] * length # Bearish signal - dot above candle
    bull = True
    af = iaf # acceleration factor
    ep = low[0] # ep = Extreme Point
    hp = high[0] # High Point
    lp = low[0] # Low Point

    # https://www.investopedia.com/terms/p/parabolicindicator.asp - Parabolic Stop & Reverse Formula from Investopedia 
    for i in range(2,length):
        if bull:
            psar[i] = psar[i - 1] + af * (hp - psar[i - 1])
        else:
            psar[i] = psar[i - 1] + af * (lp - psar[i - 1])
        reverse = False
        if bull:
            if low[i] < psar[i]:
                bull = False
                reverse = True
                psar[i] = hp
                lp = low[i]
                af = iaf
        else:
            if high[i] > psar[i]:
                bull = True
                reverse = True
                psar[i] = lp
                hp = high[i]
                af = iaf
        if not reverse:
            if bull:
                if high[i] > hp:
                    hp = high[i]
                    af = min(af + iaf, maxaf)
                if low[i - 1] < psar[i]:
                    psar[i] = low[i - 1]
                if low[i - 2] < psar[i]:
                    psar[i] = low[i - 2]
            else:
                if low[i] < lp:
                    lp = low[i]
                    af = min(af + iaf, maxaf)
                if high[i - 1] > psar[i]:
                    psar[i] = high[i - 1]
                if high[i - 2] > psar[i]:
                    psar[i] = high[i - 2]
        if bull:
            psarbull[i] = psar[i]
        else:
            psarbear[i] = psar[i]
    return {"dates":dates, "high":high, "low":low, "close":close, "psar":psar, "psarbear":psarbear, "psarbull":psarbull}

In [8]:
if __name__ == "__main__":
    import sys
    import os
    
    startidx = 0
    endidx = len(df)
    
    result = psar(df)
    dates = result['dates'][startidx:endidx]
    close = result['close'][startidx:endidx]
    psarbear = result['psarbear'][startidx:endidx]
    psarbull = result['psarbull'][startidx:endidx]
    df['Slow MA'] = df['Close'].rolling(200).mean()
#     df['Fast MA'] = df['Adj Close'].rolling(50).mean()
    
    # plt.title('PSAR (Parabolic Stop & Reverse)')
    # plt.plot(dates, close, label = ticker)
    # plt.plot(dates, psarbull, label = 'Buy', color = 'green')
    # plt.plot(dates, psarbear, label = 'Sell', color = 'red' )
    # plt.plot(df['200 MA'], label = '200 Day MA')
    # plt.legend()
    # plt.grid()
    # plt.show()

fig = go.Figure(data=[go.Candlestick(x=df.index,
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])])

fig.add_trace(go.Scatter(x=dates, y=psarbull, name='buy',mode = 'markers',
                         marker = dict(color='green', size=4)))

fig.add_trace(go.Scatter(x=dates, y=psarbear, name='sell', mode = 'markers',
                         marker = dict(color='red', size=4)))

fig.add_trace(go.Scatter(x=df.index, y=df['Slow MA'], name='Slow MA',
                         line = dict(color='orange', width=2)))

# fig.add_trace(go.Scatter(x=df.index, y=df['Fast MA'], name='fast MA',
#                           line = dict(color='Blue', width=2)))

fig.show()

In [11]:
import talib

In [12]:
# df['50 MA'], df['200 MA'] = talib.MA(df['Adj Close'], timeperiod=50, matype=0), talib.MA(df['Adj Close'], timeperiod=200, matype=0)
# df['9 MA'], df['21 MA'] = talib.MA(df['Adj Close'], timeperiod=9, matype=0), talib.MA(df['Adj Close'], timeperiod=21, matype=0)
df['PSAR'] = real = talib.SAR(df['High'], df['Low'], acceleration=0.02, maximum=0.2)
# df['upperband'], df['middleband'], df['lowerband'] = talib.BBANDS(df['Adj Close'], timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
df['Action'] = np.where(df['Close'] > df['Slow MA'] , 1, 0) 
df['Action'] = np.where(df['Close'] < df['Slow MA'], -1, df['Action'])
df['PSAR_Action'] = np.where(df['PSAR'] < df['Low'] , 1, 0) 
df['PSAR_Action'] = np.where(df['PSAR'] > df['High'], -1, df['PSAR_Action'])
df.dropna(inplace=True)

In [13]:
len(df)

5493

In [14]:
def signal(df):
    if df['Action'] == 1 and df['PSAR_Action'] == 1:
        return 1
    elif df['Action'] == -1 and df['PSAR_Action'] == -1:
        return -1
    else:
        return 0

In [15]:
df['signal'] = df.apply(signal, axis = 1)

In [16]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Slow MA,PSAR,Action,PSAR_Action,signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1999-11-04,0.530869,0.572717,0.530869,0.558369,126033600.0,0.0,0.0,0.392613,0.381270,1,1,1
1999-11-05,0.573912,0.578695,0.514130,0.540434,61636800.0,0.0,0.0,0.393432,0.396586,1,1,1
1999-11-08,0.523695,0.571521,0.506956,0.555978,47001600.0,0.0,0.0,0.394131,0.414797,1,1,1
1999-11-09,0.554782,0.557173,0.526087,0.547608,27235200.0,0.0,0.0,0.394950,0.431187,1,1,1
1999-11-10,0.549401,0.549999,0.532064,0.542825,14404800.0,0.0,0.0,0.395752,0.445937,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
2022-06-13,160.000000,163.119995,156.059998,156.470001,60415300.0,0.0,0.0,240.948888,195.599362,-1,-1,-1
2022-06-14,157.309998,160.660004,154.119995,158.360001,46968000.0,0.0,0.0,240.609658,194.017788,-1,-1,-1
2022-06-15,160.949997,168.679993,159.259995,165.270004,56393600.0,0.0,0.0,240.302380,191.623920,-1,-1,-1
2022-06-16,158.600006,159.419998,154.009995,156.009995,54574900.0,0.0,0.0,239.963744,189.373685,-1,-1,-1


In [17]:
# fig1 = go.Figure(data=[go.Candlestick(x=df.index,
#                 open=df['Open'],
#                 high=df['High'],
#                 low=df['Low'],
#                 close=df['Close'])])

# fig1.add_trace(go.Scatter(x=dates, y=df['PSAR'], name='Parabolic Stop & Reverse (PSAR Signals)', mode = 'markers',
#                          marker = dict(color='black', size=4)))

# fig1.add_trace(go.Scatter(x=df.index, y=df['200 MA'], name='Slow MA',
#                          line = dict(color='orange', width=2)))

# fig1.add_trace(go.Scatter(x=df.index, y=df['50 MA'], name='fast MA',
#                          line = dict(color='Blue', width=2)))

# fig1.show()

In [18]:
# Create our features


# Shifting Signal to predict values from the previous day
##__________________ Work on shifitng only
X_df = df.drop(columns=['signal']).shift().dropna()




#X = pd.get_dummies(X_df, columns=["Close","High","Low","Open","Slow MA","PSAR", "Action", "PSAR_Action"], drop_first = True)
#Slow MA PSAR Action PSAR_Action

# Create our target
y = df['signal'].to_frame()

In [31]:
#X.describe()
X_df.describe()

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Slow MA,PSAR,Action,PSAR_Action
count,5492.0,5492.0,5492.0,5492.0,5492.0,5492.0,5492.0,5492.0,5492.0,5492.0,5492.0
mean,27.423845,27.92768,26.871696,27.412315,60685910.0,0.000229,0.00173,23.280763,27.228534,0.419519,0.088128
std,55.323407,56.423916,54.059559,55.255562,41928150.0,0.002794,0.06912,46.601827,55.380549,0.907829,0.995651
min,0.523695,0.549999,0.506956,0.540434,4564400.0,0.0,0.0,0.392613,0.38127,-1.0,-1.0
25%,2.555059,2.630431,2.49078,2.549893,34677000.0,0.0,0.0,2.451677,2.547703,-1.0,-1.0
50%,3.999021,4.057561,3.927856,3.996727,51593400.0,0.0,0.0,3.692618,3.98659,1.0,1.0
75%,24.585003,24.841531,24.104194,24.485944,73715900.0,0.0,0.0,14.870087,23.662581,1.0,1.0
max,335.001042,346.295333,320.198497,333.591766,923085600.0,0.04,4.0,244.267109,346.295333,1.0,1.0


In [53]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Slow MA,PSAR,Action,PSAR_Action,signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1999-11-04,0.530869,0.572717,0.530869,0.558369,126033600.0,0.0,0.0,0.392613,0.38127,1,1,1
1999-11-05,0.573912,0.578695,0.51413,0.540434,61636800.0,0.0,0.0,0.393432,0.396586,1,1,1
1999-11-08,0.523695,0.571521,0.506956,0.555978,47001600.0,0.0,0.0,0.394131,0.414797,1,1,1
1999-11-09,0.554782,0.557173,0.526087,0.547608,27235200.0,0.0,0.0,0.39495,0.431187,1,1,1
1999-11-10,0.549401,0.549999,0.532064,0.542825,14404800.0,0.0,0.0,0.395752,0.445937,1,1,1


In [54]:
X_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Slow MA,PSAR,Action,PSAR_Action
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1999-11-05,0.530869,0.572717,0.530869,0.558369,126033600.0,0.0,0.0,0.392613,0.38127,1.0,1.0
1999-11-08,0.573912,0.578695,0.51413,0.540434,61636800.0,0.0,0.0,0.393432,0.396586,1.0,1.0
1999-11-09,0.523695,0.571521,0.506956,0.555978,47001600.0,0.0,0.0,0.394131,0.414797,1.0,1.0
1999-11-10,0.554782,0.557173,0.526087,0.547608,27235200.0,0.0,0.0,0.39495,0.431187,1.0,1.0
1999-11-11,0.549401,0.549999,0.532064,0.542825,14404800.0,0.0,0.0,0.395752,0.445937,1.0,1.0


In [33]:
len(X_df)

5492

In [34]:
y['signal'].value_counts()

 0    2304
 1    2291
-1     898
Name: signal, dtype: int64

In [35]:
# Split the X and y into X_train, X_test, y_train, y_test
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [36]:
# Import required libraries
from pandas.tseries.offsets import DateOffset

In [39]:
t = X_df.index.max() - X_df.index.min()
(t * 0.70).days

5782

In [40]:
training_begin = X_df.index.min()

# Display the training begin date
print(training_begin)

1999-11-05 00:00:00


In [41]:
# Select the ending period for the training data with an offset of 3 months
training_end = X_df.index.min() + DateOffset(days= (t * 0.70).days )

# Display the training end date
print(training_end)

2015-09-04 00:00:00


In [42]:
# Generate the X_train and y_train DataFrames
X_train = X_df.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

In [43]:
# Generate the X_test and y_test DataFrames
X_test = X_df.loc[training_end:]
y_test = y.loc[training_end:]

In [44]:
# Create the StandardScaler instance
scaler = StandardScaler()

In [45]:
# Fit the Standard Scaler with the training data
# When fitting scaling functions, only train on the training dataset
X_scaler = scaler.fit(X_train)

In [46]:
# Scale the training and testing data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [47]:
# Resample the training data with the BalancedRandomForestClassifier
from imblearn.ensemble import BalancedRandomForestClassifier
brf = BalancedRandomForestClassifier(n_estimators=100, random_state=1)

#PRODUCT BACKLOG: Develope and evaluate additioanl models


brf.fit(X_train, y_train)


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().



BalancedRandomForestClassifier(random_state=1)

In [48]:
# Calculated the balanced accuracy score
from sklearn.metrics import balanced_accuracy_score

y_pred = brf.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.7039195271929052

In [49]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

array([[137,   5,   9],
       [ 72, 144, 445],
       [  2,  10, 885]])

In [50]:
# Print the imbalanced classification report
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

         -1       0.65      0.91      0.95      0.76      0.93      0.86       151
          0       0.91      0.22      0.99      0.35      0.46      0.20       661
          1       0.66      0.99      0.44      0.79      0.66      0.46       897

avg / total       0.75      0.68      0.70      0.62      0.61      0.39      1709



In [51]:
pred_col = np.concatenate((y_test, pd.DataFrame(y_pred)), axis=0)

In [52]:
#df['Prediction'] = pred_col

ValueError: Length of values (3418) does not match length of index (5493)

In [None]:
len(df)

In [None]:
len(pred_col)