In [36]:
# Import
import pandas as pd
from finta import TA as ta
from pandas.tseries.offsets import DateOffset
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report
import yfinance as yf
from sklearn.preprocessing import StandardScaler


# Setting these options will allow for reviewing more of the DataFrames
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_columns', 2000)
pd.set_option('display.width', 1000)

In [37]:
df = yf.download("BTC-USD", interval="1h", start="2020-05-01", end=pd.to_datetime('today'))

df.tail()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
2022-04-06 15:00:00+00:00,44308.28125,44308.28125,43845.363281,44151.644531,44151.644531,1156792320
2022-04-06 16:00:00+00:00,44155.347656,44231.949219,43729.03125,43764.480469,43764.480469,1157632000
2022-04-06 17:00:00+00:00,43809.972656,43941.277344,43538.171875,43906.554688,43906.554688,944455680
2022-04-06 18:00:00+00:00,43905.808594,44207.109375,43452.957031,43643.773438,43643.773438,1831428096
2022-04-06 18:37:00+00:00,43676.15625,43676.15625,43676.15625,43676.15625,43676.15625,0


In [38]:
def appendData(maindf, dataarray, namesarray=None):
    if namesarray==None:
        return maindf.join(pd.DataFrame(dataarray), how='outer')
    return maindf.join(pd.DataFrame(dataarray,columns=namesarray), how='outer')

In [39]:
### Oscillators ###
## RSI
df = appendData(df,ta.RSI(df))
## Sto-%K
df = appendData(df,ta.STOCH(df))
## CCI
df = appendData(df,ta.CCI(df))
## ADX
df = appendData(df,ta.ADX(df))
## DMI (Added to aid in interpreting ADX)
df = appendData(df,ta.DMI(df, 14))
## Awesome
df = appendData(df,ta.AO(df))
## Momentum
df = appendData(df,ta.MOM(df,10))
## MACD (We rename the undescriptive "SIGNAL" here)
df = appendData(df,ta.MACD(df)).rename(columns={"SIGNAL": "MACD SIGNAL"})
## Sto-RSI
df = appendData(df,ta.STOCHRSI(df))
## Williams %R
df = appendData(df,ta.WILLIAMS(df))
## Bull-Bear Power
df = appendData(df,ta.EBBP(df))
## Ultimate (FinTA does not name this column, so we must)
df = appendData(df,ta.UO(df),["UO"])
### Moving Averages ###
sma_ema_averages = [5, 10, 20, 30, 50, 100, 200]
## SMA, EMA
for i in sma_ema_averages:
  df = appendData(df,ta.SMA(df, i))
  df = appendData(df,ta.EMA(df, i))
## VWMA
df = appendData(df, ta.VAMA(df, 20))
## Hull
df = appendData(df,ta.HMA(df, 9))
# Ichimoku -- Base (Kijun) and Conversion (Tenkan) Only
df = appendData(df,ta.ICHIMOKU(df).drop(['senkou_span_a','SENKOU','CHIKOU'], axis=1))

In [16]:
# Use the pct_change function to generate the returns from "close"
#df["actual_return"] = df["Close"].pct_change()
i = 3
i_end = 5
model = SVC()
percent_threshold = .03
investment_amount = 10000
training_length = 12
while i <= i_end:
    column_name = str(i) + "_period_return"
    df[column_name] = df["Close"].pct_change(periods=i)
    # Initialize the new `Signal` column
    signal_column_name = str(i) + "_signal"
    df[signal_column_name] = 0.0
    # Generate signal to buy stock long
    df.loc[(df[column_name] >= (percent_threshold)), signal_column_name] = 1
    # Drop all NaN values from the DataFrame
    df = df.dropna()
    y = df[signal_column_name]
    # set up X for ml 
    X = df[["14 period RSI", "14 period STOCH %K", "20 period CCI", "14 period ADX.", "DI+", "DI-", "AO", "MOM", "MACD", "MACD SIGNAL", "14 period stochastic RSI.", "14 Williams %R", "Bull.", "Bear.", "UO", "5 period SMA", "5 period EMA", "10 period SMA", "10 period EMA", "20 period SMA", "20 period EMA", "30 period SMA", "30 period EMA", "50 period SMA", "50 period EMA", "100 period SMA", "100 period EMA", "200 period SMA", "200 period EMA", "20 period VAMA", "9 period HMA.", "TENKAN", "KIJUN"]].shift().dropna().copy()
    y_count = y.value_counts()
    training_begin = X.index.min()
    # Use the following code to select the ending period for the training data: `training_end = X.index.min() + DateOffset(months=3)`
    training_end = X.index.min() + DateOffset(months=training_length)
    # Generate the X_train and y_train DataFrames using loc to select the rows from `training_begin` up to `training_end`
    # Hint: Use `loc[training_begin:training_end]` for X_train and y_train
    X_train = X.loc[training_begin:training_end]
    y_train = y.loc[training_begin:training_end]

    # Generate the X_test and y_test DataFrames using loc to select from `training_end` to the last row in the DataFrame.
    # Hint: Use `loc[training_end:]` for X_test and y_test
    X_test = X.loc[training_end:]
    y_test = y.loc[training_end:]

    # Use StandardScaler to scale the X_train and X_test data.
    scaler = StandardScaler()
    X_scaler = scaler.fit(X_train)
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)
 
    # Fit the model to the data using X_train_scaled and y_train
    model = model.fit(X_train_scaled, y_train)

    # Use the trained model to predict the trading signals for the training data.
    training_signal_predictions = model.predict(X_train_scaled)

    # Evaluate the model using a classification report
    training_report_train = classification_report(y_train, training_signal_predictions)
    print(column_name)
    print("Training Report")
    print(training_report_train)

    # Use the trained model to predict the trading signals for the testing data.
    testing_signal_predictions = model.predict(X_test_scaled)

    # Evaluate the model's ability to predict the trading signal for the testing data using a classification report
    training_report_test = classification_report(y_test, testing_signal_predictions)
    print("Testing Report")
    print(training_report_test)

    df = df.drop([column_name, signal_column_name], axis=1)
    i+=1

df.head()

3_period_return
Training Report
              precision    recall  f1-score   support

         0.0       0.98      1.00      0.99      8278
         1.0       1.00      0.05      0.09       185

    accuracy                           0.98      8463
   macro avg       0.99      0.52      0.54      8463
weighted avg       0.98      0.98      0.97      8463

Testing Report
              precision    recall  f1-score   support

         0.0       0.98      1.00      0.99      7387
         1.0       0.92      0.06      0.12       177

    accuracy                           0.98      7564
   macro avg       0.95      0.53      0.55      7564
weighted avg       0.98      0.98      0.97      7564

4_period_return
Training Report
              precision    recall  f1-score   support

         0.0       0.97      1.00      0.99      8196
         1.0       0.91      0.20      0.33       267

    accuracy                           0.97      8463
   macro avg       0.94      0.60      0.66      

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,14 period RSI,14 period STOCH %K,20 period CCI,14 period ADX.,DI+,DI-,AO,MOM,MACD,MACD SIGNAL,14 period stochastic RSI.,14 Williams %R,Bull.,Bear.,UO,5 period SMA,5 period EMA,10 period SMA,10 period EMA,20 period SMA,20 period EMA,30 period SMA,30 period EMA,50 period SMA,50 period EMA,100 period SMA,100 period EMA,200 period SMA,200 period EMA,20 period VAMA,9 period HMA.,TENKAN,KIJUN
2020-05-10 06:00:00+00:00,8644.21582,8644.72168,8607.754883,8623.298828,8623.298828,479838208,16.961004,8.422995,-105.862044,49.377536,9.448129,64.758805,-921.139223,-1096.724609,-311.527829,-214.51881,0.300804,-91.577005,-359.981754,-396.948551,26.567172,8632.405859,8694.213246,8954.757715,8900.30881,9328.175391,9182.601878,9483.132096,9332.785485,9656.307734,9451.915232,9499.72624,9446.526063,9200.590874,9349.498826,9197.92368,8479.523872,9088.888672,9200.460449
2020-05-10 07:00:00+00:00,8627.400391,8819.001953,8623.360352,8819.001953,8819.001953,902176768,30.332235,24.12371,-76.317886,49.765079,17.556694,60.133174,-883.479274,-826.564453,-300.662798,-231.747608,0.287068,-75.87629,-159.172697,-354.814299,51.046554,8658.628516,8735.809482,8872.10127,8885.525745,9286.332471,9147.973314,9447.455241,9299.638133,9634.803555,9427.089527,9498.071387,9433.910694,9200.35895,9343.483668,9133.692551,8601.843981,9088.888672,9179.400879
2020-05-10 08:00:00+00:00,8824.740234,8882.166992,8814.505859,8841.40332,8841.40332,1132785664,31.688194,26.072374,-56.623152,49.665297,19.431512,55.837945,-805.360668,-791.199219,-286.936949,-242.785476,0.273877,-73.927626,-76.468897,-144.13003,50.540786,8696.899609,8771.007428,8792.981348,8877.503486,9246.78877,9118.776171,9413.024935,9270.074574,9613.416582,9404.116483,9496.546104,9422.002871,9200.485542,9337.798611,9107.614854,8736.865129,9056.945312,9176.946289
2020-05-10 09:00:00+00:00,8835.525391,8856.499023,8817.598633,8824.708008,8824.708008,116449280,31.200794,25.090666,-52.599221,49.572643,18.043547,51.849519,-722.894764,-809.125977,-274.244962,-249.077373,0.263084,-74.909334,-83.004311,-121.904702,51.805118,8751.481055,8788.907621,8712.06875,8867.904308,9203.677051,9090.76968,9378.185286,9241.341226,9591.941387,9381.38987,9494.851094,9410.002376,9200.671982,9331.99685,9106.682206,8834.239771,8700.238281,9141.539551
2020-05-10 10:00:00+00:00,8795.744141,8825.411133,8773.115234,8818.807617,8818.807617,16891904,31.019206,24.607489,-51.070972,49.605871,16.754722,50.314929,-649.297076,71.595703,-261.646485,-251.591196,0.251959,-75.392511,-96.849956,-149.145855,52.742767,8785.443945,8798.874287,8719.22832,8858.977637,9158.845459,9064.868531,9342.597786,9214.080975,9572.377734,9359.323425,9493.231631,9398.127878,9200.76897,9326.201838,9035.705515,8878.213122,8700.238281,9141.539551


In [17]:
#CASE 2 - Neural network.

volume_df = df["Volume"]
volume_df.tail(100)

2022-04-02 16:00:00+00:00             0
2022-04-02 17:00:00+00:00             0
2022-04-02 18:00:00+00:00    1172469760
2022-04-02 19:00:00+00:00      99033088
2022-04-02 20:00:00+00:00     325091328
2022-04-02 21:00:00+00:00     111712256
2022-04-02 22:00:00+00:00             0
2022-04-02 23:00:00+00:00             0
2022-04-03 00:00:00+00:00     109555712
2022-04-03 01:00:00+00:00             0
2022-04-03 02:00:00+00:00             0
2022-04-03 03:00:00+00:00             0
2022-04-03 04:00:00+00:00     167596032
2022-04-03 05:00:00+00:00      13764608
2022-04-03 06:00:00+00:00     541728768
2022-04-03 07:00:00+00:00             0
2022-04-03 08:00:00+00:00             0
2022-04-03 09:00:00+00:00      91154432
2022-04-03 10:00:00+00:00             0
2022-04-03 11:00:00+00:00             0
2022-04-03 12:00:00+00:00             0
2022-04-03 13:00:00+00:00     139196416
2022-04-03 14:00:00+00:00             0
2022-04-03 15:00:00+00:00             0
2022-04-03 16:00:00+00:00             0


In [42]:
# Model K-nearest neighbors.
from sklearn.neighbors import KNeighborsClassifier


i = 3
i_end = 30
model = KNeighborsClassifier(n_neighbors=3)
percent_threshold = .03
investment_amount = 10000
training_length = 12
while i <= i_end:
    column_name = str(i) + "_period_return"
    df[column_name] = df["Close"].pct_change(periods=i)
    # Initialize the new `Signal` column
    signal_column_name = str(i) + "_signal"
    df[signal_column_name] = 0.0
    # Generate signal to buy stock long
    df.loc[(df[column_name] >= (percent_threshold)), signal_column_name] = 1
    # Drop all NaN values from the DataFrame
    df = df.dropna()
    y = df[signal_column_name]
    # set up X for ml 
    X = df[["14 period RSI", "14 period STOCH %K", "20 period CCI", "14 period ADX.", "DI+", "DI-", "AO", "MOM", "MACD", "MACD SIGNAL", "14 period stochastic RSI.", "14 Williams %R", "Bull.", "Bear.", "UO", "5 period SMA", "5 period EMA", "10 period SMA", "10 period EMA", "20 period SMA", "20 period EMA", "30 period SMA", "30 period EMA", "50 period SMA", "50 period EMA", "100 period SMA", "100 period EMA", "200 period SMA", "200 period EMA", "20 period VAMA", "9 period HMA.", "TENKAN", "KIJUN"]].shift().dropna().copy()
    y_count = y.value_counts()
    training_begin = X.index.min()
    # Use the following code to select the ending period for the training data: `training_end = X.index.min() + DateOffset(months=3)`
    training_end = X.index.min() + DateOffset(months=training_length)
    # Generate the X_train and y_train DataFrames using loc to select the rows from `training_begin` up to `training_end`
    # Hint: Use `loc[training_begin:training_end]` for X_train and y_train
    X_train = X.loc[training_begin:training_end]
    y_train = y.loc[training_begin:training_end]

    # Generate the X_test and y_test DataFrames using loc to select from `training_end` to the last row in the DataFrame.
    # Hint: Use `loc[training_end:]` for X_test and y_test
    X_test = X.loc[training_end:]
    y_test = y.loc[training_end:]

    # Use StandardScaler to scale the X_train and X_test data.
    scaler = StandardScaler()
    X_scaler = scaler.fit(X_train)
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)
 
    # Fit the model to the data using X_train_scaled and y_train
    model = model.fit(X_train_scaled, y_train)

    # Use the trained model to predict the trading signals for the training data.
    training_signal_predictions = model.predict(X_train_scaled)

    # Evaluate the model using a classification report
    training_report_train = classification_report(y_train, training_signal_predictions)
    print(column_name)
    print("Training Report")
    print(training_report_train)

    # Use the trained model to predict the trading signals for the testing data.
    testing_signal_predictions = model.predict(X_test_scaled)

    # Evaluate the model's ability to predict the trading signal for the testing data using a classification report
    training_report_test = classification_report(y_test, testing_signal_predictions)
    print("Testing Report")
    print(training_report_test)

    df = df.drop([column_name, signal_column_name], axis=1)
    i+=1

display(df.head())
#display(list(df.columns))
#display(y_count)

3_period_return
Training Report
              precision    recall  f1-score   support

         0.0       0.99      1.00      0.99      8279
         1.0       0.87      0.33      0.48       184

    accuracy                           0.98      8463
   macro avg       0.93      0.67      0.74      8463
weighted avg       0.98      0.98      0.98      8463

Testing Report
              precision    recall  f1-score   support

         0.0       0.98      0.99      0.99      7361
         1.0       0.35      0.20      0.26       179

    accuracy                           0.97      7540
   macro avg       0.67      0.60      0.62      7540
weighted avg       0.97      0.97      0.97      7540

4_period_return
Training Report
              precision    recall  f1-score   support

         0.0       0.98      1.00      0.99      8199
         1.0       0.88      0.52      0.65       264

    accuracy                           0.98      8463
   macro avg       0.93      0.76      0.82      

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,14 period RSI,14 period STOCH %K,20 period CCI,14 period ADX.,DI+,DI-,AO,MOM,MACD,MACD SIGNAL,14 period stochastic RSI.,14 Williams %R,Bull.,Bear.,UO,5 period SMA,5 period EMA,10 period SMA,10 period EMA,20 period SMA,20 period EMA,30 period SMA,30 period EMA,50 period SMA,50 period EMA,100 period SMA,100 period EMA,200 period SMA,200 period EMA,20 period VAMA,9 period HMA.,TENKAN,KIJUN
2020-05-30 20:00:00+00:00,9548.938477,9551.353516,9461.905273,9470.121094,9470.121094,436234240,43.381362,6.157551,-35.124219,31.479375,24.429822,35.61057,57.312221,-72.185547,23.27618,32.237108,0.672706,-93.842449,21.1434,-68.304842,34.383179,9530.991406,9522.969836,9550.146484,9532.27417,9521.879688,9519.425371,9488.424479,9499.454779,9483.326445,9449.963743,9274.551641,9343.818519,9167.232417,9277.251056,9520.481033,9516.382086,9525.540527,9481.149414
2020-05-30 21:00:00+00:00,9472.118164,9501.948242,9470.423828,9501.696289,9501.696289,198596608,49.307148,29.822366,-54.923926,30.560994,22.684835,33.066958,43.871223,-71.105469,19.197476,29.629181,0.658677,-70.177634,-24.188469,-55.712883,41.032534,9521.104102,9515.878654,9543.035938,9526.714555,9527.679492,9517.736887,9490.189128,9499.599392,9483.909727,9451.99247,9281.418203,9346.944815,9169.275508,9279.486735,9518.83341,9497.669864,9525.540527,9481.149414
2020-05-30 22:00:00+00:00,9507.450195,9668.887695,9507.450195,9668.887695,9668.887695,1712558080,68.253756,100.0,153.09063,30.35092,54.138884,30.705032,47.00189,100.838867,29.120341,29.527413,0.659564,-0.0,122.357986,-39.079514,67.457069,9546.259375,9566.881668,9553.119824,9552.564217,9541.167871,9532.132202,9498.73597,9510.521218,9488.082207,9460.498165,9289.916904,9353.319929,9172.171724,9283.36548,9562.321025,9535.532704,9565.396484,9517.927246
2020-05-31 00:00:00+00:00,9700.115234,9700.115234,9667.44043,9671.981445,9671.981445,0,68.488455,88.189499,286.079214,30.528558,56.392594,28.511816,67.596054,99.013672,36.809605,30.983852,0.664047,-11.810501,135.663849,102.989044,63.807847,9572.59082,9601.914927,9563.021191,9574.27644,9555.857275,9545.451177,9506.76582,9520.938007,9492.201348,9468.791627,9299.063779,9359.630065,9175.036636,9287.236366,9562.321025,9603.718493,9581.010254,9533.541016
2020-05-31 01:00:00+00:00,9669.006836,9679.958008,9668.699219,9676.313477,9676.313477,211204096,68.835882,90.008076,237.794413,30.693507,52.364552,26.475257,86.072516,95.191406,42.76005,33.339091,0.66777,-9.991924,99.526323,88.267534,64.517337,9597.8,9626.714443,9572.540332,9592.828629,9565.504492,9557.914254,9514.874935,9530.962231,9496.674961,9476.929739,9307.82959,9365.90103,9177.510493,9291.111804,9576.803432,9672.654315,9581.010254,9533.541016


In [7]:
# investment_amount = 10000 * .05

# # Initialize the new `Signal` column
# df['signal'] = 0.0
# # Generate signal to buy stock long
# df.loc[(df['three_period_return'] >= ((investment_amount * 0.00001))), 'signal'] = 1
# # Generate signal to sell stock short
# #df.loc[(df['three_period_return'] < (investment_amount * 0.00001)), 'signal'] = 0

In [8]:
# # Copy the new "signal" column to a new Series called `y`.
# y = df['signal']
# # set up X for ml 
# X = df[["14 period RSI", "14 period STOCH %K", "20 period CCI", "14 period ADX.", "DI+", "DI-", "AO", "MOM", "MACD", "MACD SIGNAL", "14 period stochastic RSI.", "14 Williams %R", "Bull.", "Bear.", "UO", "5 period SMA", "5 period EMA", "10 period SMA", "10 period EMA", "20 period SMA", "20 period EMA", "30 period SMA", "30 period EMA", "50 period SMA", "50 period EMA", "100 period SMA", "100 period EMA", "200 period SMA", "200 period EMA", "20 period VAMA", "9 period HMA.", "TENKAN", "KIJUN"]].shift().dropna().copy()

In [9]:
# value counts for -1 and 1
# y.value_counts()

0.0    15316
1.0      661
Name: 5_signal, dtype: int64

In [10]:
# # Use the following code to select the start of the training period: `training_begin = X.index.min()`
# training_begin = X.index.min()
# print(training_begin)

# # Use the following code to select the ending period for the training data: `training_end = X.index.min() + DateOffset(months=3)`
# training_end = X.index.min() + DateOffset(months=3)
# print(training_end)

# # Generate the X_train and y_train DataFrames using loc to select the rows from `training_begin` up to `training_end`
# # Hint: Use `loc[training_begin:training_end]` for X_train and y_train
# X_train = X.loc[training_begin:training_end]
# y_train = y.loc[training_begin:training_end]

# # Generate the X_test and y_test DataFrames using loc to select from `training_end` to the last row in the DataFrame.
# # Hint: Use `loc[training_end:]` for X_test and y_test
# X_test = X.loc[training_end:]
# y_test = y.loc[training_end:]

# # Use StandardScaler to scale the X_train and X_test data.
# from sklearn.preprocessing import StandardScaler
# scaler = StandardScaler()
# X_scaler = scaler.fit(X_train)
# X_train_scaled = X_scaler.transform(X_train)
# X_test_scaled = X_scaler.transform(X_test)

2020-05-10 19:00:00+00:00
2020-08-10 19:00:00+00:00


In [11]:
# model = SVC()
 
# # Fit the model to the data using X_train_scaled and y_train
# model = model.fit(X_train_scaled, y_train)

# # Use the trained model to predict the trading signals for the training data.
# training_signal_predictions = model.predict(X_train_scaled)

# # Evaluate the model using a classification report
# from sklearn.metrics import classification_report
# training_report = classification_report(y_train, training_signal_predictions)
# print(training_report)

              precision    recall  f1-score   support

         0.0       0.99      1.00      1.00      2072
         1.0       1.00      0.35      0.51        26

    accuracy                           0.99      2098
   macro avg       1.00      0.67      0.76      2098
weighted avg       0.99      0.99      0.99      2098



In [12]:
# Use the trained model to predict the trading signals for the testing data.
testing_signal_predictions = model.predict(X_test_scaled)

# Evaluate the model's ability to predict the trading signal for the testing data using a classification report
training_report = classification_report(y_test, testing_signal_predictions)
print(training_report)

              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98     13244
         1.0       0.00      0.00      0.00       635

    accuracy                           0.95     13879
   macro avg       0.48      0.50      0.49     13879
weighted avg       0.91      0.95      0.93     13879



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
 # Create a new empty predictions DataFrame using code provided below.
predictions_df = pd.DataFrame(index=X_test.index)
predictions_df['signal'] = testing_signal_predictions
predictions_df['signal'].value_counts()
predictions_df["three_period_return"] = df["three_period_return"]


intial_investment = 10000
# Add in actual returns and calculate trading returns
predictions_df['actual_return'] = df['actual_return']
predictions_df['trading_algorithm_returns_3'] = predictions_df.loc[predictions_df["signal"]==1]["three_period_return"]
#predictions_df["trading_algorithm_returns_5"] = 
predictions_df["trading_algorithm_returns_3"].fillna(0, inplace=True)
predictions_df["trading_algo_returns_3_+1"] = ((predictions_df["trading_algorithm_returns_3"] + 1) * intial_investment)
# we need to add trading algo cumulative returns in order to plot agaisnt actual returns and see how well our algo performed compared to actual returns

predictions_df[50:100]

KeyError: 'three_period_return'

In [None]:
# Calculate and plot the cumulative returns for the `actual_returns` and the `trading_algorithm_returns`

(1 + predictions_df[['actual_return', 'trading_algo_returns_3_+1']]).cumprod().plot()