In [1]:
# Import
import pandas as pd
from finta import TA as ta
from pandas.tseries.offsets import DateOffset
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report
import yfinance as yf
from sklearn.preprocessing import StandardScaler


# Setting these options will allow for reviewing more of the DataFrames
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_columns', 2000)
pd.set_option('display.width', 1000)

In [2]:
df = yf.download("BTC-USD", interval="1h", start="2020-05-01", end=pd.to_datetime('today'))

df.tail()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
2022-04-05 10:00:00+00:00,46589.066406,46663.65625,46549.433594,46661.402344,46661.402344,143583232
2022-04-05 11:00:00+00:00,46662.25,46706.382812,46597.640625,46674.746094,46674.746094,164456448
2022-04-05 12:00:00+00:00,46676.183594,47106.140625,46676.183594,46912.621094,46912.621094,1234987008
2022-04-05 13:00:00+00:00,46888.921875,46888.921875,46800.296875,46818.910156,46818.910156,14270464
2022-04-05 13:05:00+00:00,46835.671875,46835.671875,46835.671875,46835.671875,46835.671875,0


In [3]:
def appendData(maindf, dataarray, namesarray=None):
    if namesarray==None:
        return maindf.join(pd.DataFrame(dataarray), how='outer')
    return maindf.join(pd.DataFrame(dataarray,columns=namesarray), how='outer')

In [4]:
### Oscillators ###
## RSI
df = appendData(df,ta.RSI(df))
## Sto-%K
df = appendData(df,ta.STOCH(df))
## CCI
df = appendData(df,ta.CCI(df))
## ADX
df = appendData(df,ta.ADX(df))
## DMI (Added to aid in interpreting ADX)
df = appendData(df,ta.DMI(df, 14))
## Awesome
df = appendData(df,ta.AO(df))
## Momentum
df = appendData(df,ta.MOM(df,10))
## MACD (We rename the undescriptive "SIGNAL" here)
df = appendData(df,ta.MACD(df)).rename(columns={"SIGNAL": "MACD SIGNAL"})
## Sto-RSI
df = appendData(df,ta.STOCHRSI(df))
## Williams %R
df = appendData(df,ta.WILLIAMS(df))
## Bull-Bear Power
df = appendData(df,ta.EBBP(df))
## Ultimate (FinTA does not name this column, so we must)
df = appendData(df,ta.UO(df),["UO"])
### Moving Averages ###
sma_ema_averages = [5, 10, 20, 30, 50, 100, 200]
## SMA, EMA
for i in sma_ema_averages:
  df = appendData(df,ta.SMA(df, i))
  df = appendData(df,ta.EMA(df, i))
## VWMA
df = appendData(df, ta.VAMA(df, 20))
## Hull
df = appendData(df,ta.HMA(df, 9))
# Ichimoku -- Base (Kijun) and Conversion (Tenkan) Only
df = appendData(df,ta.ICHIMOKU(df).drop(['senkou_span_a','SENKOU','CHIKOU'], axis=1))

In [22]:
# Use the pct_change function to generate the returns from "close"
#df["actual_return"] = df["Close"].pct_change()
i = 3
i_end = 5
model = SVC()
percent_threshold = .03
investment_amount = 10000
training_length = 12
while i <= i_end:
    column_name = str(i) + "_period_return"
    df[column_name] = df["Close"].pct_change(periods=i)
    # Initialize the new `Signal` column
    signal_column_name = str(i) + "_signal"
    df[signal_column_name] = 0.0
    # Generate signal to buy stock long
    df.loc[(df[column_name] >= (percent_threshold)), signal_column_name] = 1
    # Drop all NaN values from the DataFrame
    df = df.dropna()
    y = df[signal_column_name]
    # set up X for ml 
    X = df[["14 period RSI", "14 period STOCH %K", "20 period CCI", "14 period ADX.", "DI+", "DI-", "AO", "MOM", "MACD", "MACD SIGNAL", "14 period stochastic RSI.", "14 Williams %R", "Bull.", "Bear.", "UO", "5 period SMA", "5 period EMA", "10 period SMA", "10 period EMA", "20 period SMA", "20 period EMA", "30 period SMA", "30 period EMA", "50 period SMA", "50 period EMA", "100 period SMA", "100 period EMA", "200 period SMA", "200 period EMA", "20 period VAMA", "9 period HMA.", "TENKAN", "KIJUN"]].shift().dropna().copy()
    y_count = y.value_counts()
    training_begin = X.index.min()
    # Use the following code to select the ending period for the training data: `training_end = X.index.min() + DateOffset(months=3)`
    training_end = X.index.min() + DateOffset(months=training_length)
    # Generate the X_train and y_train DataFrames using loc to select the rows from `training_begin` up to `training_end`
    # Hint: Use `loc[training_begin:training_end]` for X_train and y_train
    X_train = X.loc[training_begin:training_end]
    y_train = y.loc[training_begin:training_end]

    # Generate the X_test and y_test DataFrames using loc to select from `training_end` to the last row in the DataFrame.
    # Hint: Use `loc[training_end:]` for X_test and y_test
    X_test = X.loc[training_end:]
    y_test = y.loc[training_end:]

    # Use StandardScaler to scale the X_train and X_test data.
    scaler = StandardScaler()
    X_scaler = scaler.fit(X_train)
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)
 
    # Fit the model to the data using X_train_scaled and y_train
    model = model.fit(X_train_scaled, y_train)

    # Use the trained model to predict the trading signals for the training data.
    training_signal_predictions = model.predict(X_train_scaled)

    # Evaluate the model using a classification report
    training_report_train = classification_report(y_train, training_signal_predictions)
    print(column_name)
    print("Training Report")
    print(training_report_train)

    # Use the trained model to predict the trading signals for the testing data.
    testing_signal_predictions = model.predict(X_test_scaled)

    # Evaluate the model's ability to predict the trading signal for the testing data using a classification report
    training_report_test = classification_report(y_test, testing_signal_predictions)
    print("Testing Report")
    print(training_report_test)

    df = df.drop([column_name, signal_column_name], axis=1)
    i+=1

df.head()

3_period_return
Training Report
              precision    recall  f1-score   support

         0.0       0.98      1.00      0.99      8282
         1.0       1.00      0.05      0.09       182

    accuracy                           0.98      8464
   macro avg       0.99      0.52      0.54      8464
weighted avg       0.98      0.98      0.97      8464

Testing Report
              precision    recall  f1-score   support

         0.0       0.98      1.00      0.99      7283
         1.0       0.92      0.06      0.12       179

    accuracy                           0.98      7462
   macro avg       0.95      0.53      0.55      7462
weighted avg       0.98      0.98      0.97      7462

4_period_return
Training Report
              precision    recall  f1-score   support

         0.0       0.98      1.00      0.99      8202
         1.0       0.93      0.20      0.33       262

    accuracy                           0.97      8464
   macro avg       0.95      0.60      0.66      

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,14 period RSI,14 period STOCH %K,20 period CCI,14 period ADX.,DI+,DI-,AO,MOM,MACD,MACD SIGNAL,14 period stochastic RSI.,14 Williams %R,Bull.,Bear.,UO,5 period SMA,5 period EMA,10 period SMA,10 period EMA,20 period SMA,20 period EMA,30 period SMA,30 period EMA,50 period SMA,50 period EMA,100 period SMA,100 period EMA,200 period SMA,200 period EMA,20 period VAMA,9 period HMA.,TENKAN,KIJUN
2020-05-13 10:00:00+00:00,8926.449219,8947.661133,8917.427734,8947.661133,8947.661133,0,60.936559,93.358618,103.914219,16.736027,26.279087,16.25086,88.121401,15.988281,37.899809,38.096173,0.614913,-6.641382,46.373409,16.140011,67.296839,8914.236523,8922.526561,8917.574414,8909.568987,8895.424121,8880.972377,8845.870605,8859.130449,8790.222031,8858.706741,8957.870518,8946.738939,9155.632046,9037.850026,8904.84885,8919.526367,8911.080078,8806.872559
2020-05-13 11:00:00+00:00,8946.306641,9000.636719,8946.306641,9000.636719,9000.636719,788512768,65.699984,100.0,186.820878,18.280386,33.869563,15.090085,91.705302,62.692383,42.791235,39.035186,0.624811,-0.0,85.156282,30.826204,72.81989,8933.143555,8948.56328,8923.843652,8926.126756,8901.045313,8892.368981,8854.674284,8868.259886,8796.264004,8864.272692,8949.17791,8947.810027,9156.476523,9037.456275,8950.213503,8956.901432,8931.881836,8858.660645
2020-05-13 12:00:00+00:00,9001.78418,9045.630859,8980.541992,9038.044922,9038.044922,654880768,68.610739,97.057754,222.490672,20.353842,39.173938,14.012221,103.563623,109.229492,49.12003,41.052155,0.639415,-2.942246,112.64121,47.552343,80.985294,8962.94043,8978.390494,8934.766602,8946.475514,8907.418945,8906.24288,8864.533854,8879.213759,8803.405469,8871.087372,8940.694033,8949.6031,9157.250386,9037.4625,8970.703957,9004.244101,8954.378906,8881.157715
2020-05-13 13:00:00+00:00,9045.356445,9051.743164,9028.521484,9034.925781,9034.925781,412782592,68.091868,93.127572,208.258357,22.359021,37.446002,13.011348,123.70542,118.032227,53.269894,43.495702,0.651806,-6.872428,104.19121,80.969531,77.517026,8989.75625,8997.23559,8946.569824,8962.557381,8912.961572,8918.498395,8875.635124,8889.259696,8811.567793,8877.512483,8932.713545,8951.298444,9158.105454,9037.435693,8977.809934,9042.800796,8957.435059,8884.213867
2020-05-13 14:00:00+00:00,9119.077148,9119.077148,9025.022461,9047.835938,9047.835938,0,69.13239,72.165987,198.090322,24.994668,47.227793,12.081966,144.406046,130.706055,56.944018,46.185366,0.658662,-27.834013,157.198911,63.144224,66.458048,9013.820898,9014.102372,8959.64043,8978.062573,8921.351709,8930.816256,8888.427051,8899.490422,8821.826621,8884.191908,8925.080273,8953.216494,9158.700366,9037.545531,8977.809934,9067.212731,8991.102051,8917.880859


In [7]:

# investment_amount = 10000 * .05

# # Initialize the new `Signal` column
# df['signal'] = 0.0
# # Generate signal to buy stock long
# df.loc[(df['three_period_return'] >= ((investment_amount * 0.00001))), 'signal'] = 1
# # Generate signal to sell stock short
# #df.loc[(df['three_period_return'] < (investment_amount * 0.00001)), 'signal'] = 0

In [8]:
# # Copy the new "signal" column to a new Series called `y`.
# y = df['signal']
# # set up X for ml 
# X = df[["14 period RSI", "14 period STOCH %K", "20 period CCI", "14 period ADX.", "DI+", "DI-", "AO", "MOM", "MACD", "MACD SIGNAL", "14 period stochastic RSI.", "14 Williams %R", "Bull.", "Bear.", "UO", "5 period SMA", "5 period EMA", "10 period SMA", "10 period EMA", "20 period SMA", "20 period EMA", "30 period SMA", "30 period EMA", "50 period SMA", "50 period EMA", "100 period SMA", "100 period EMA", "200 period SMA", "200 period EMA", "20 period VAMA", "9 period HMA.", "TENKAN", "KIJUN"]].shift().dropna().copy()

In [9]:
# value counts for -1 and 1
# y.value_counts()

0.0    15316
1.0      661
Name: 5_signal, dtype: int64

In [10]:
# # Use the following code to select the start of the training period: `training_begin = X.index.min()`
# training_begin = X.index.min()
# print(training_begin)

# # Use the following code to select the ending period for the training data: `training_end = X.index.min() + DateOffset(months=3)`
# training_end = X.index.min() + DateOffset(months=3)
# print(training_end)

# # Generate the X_train and y_train DataFrames using loc to select the rows from `training_begin` up to `training_end`
# # Hint: Use `loc[training_begin:training_end]` for X_train and y_train
# X_train = X.loc[training_begin:training_end]
# y_train = y.loc[training_begin:training_end]

# # Generate the X_test and y_test DataFrames using loc to select from `training_end` to the last row in the DataFrame.
# # Hint: Use `loc[training_end:]` for X_test and y_test
# X_test = X.loc[training_end:]
# y_test = y.loc[training_end:]

# # Use StandardScaler to scale the X_train and X_test data.
# from sklearn.preprocessing import StandardScaler
# scaler = StandardScaler()
# X_scaler = scaler.fit(X_train)
# X_train_scaled = X_scaler.transform(X_train)
# X_test_scaled = X_scaler.transform(X_test)

2020-05-10 19:00:00+00:00
2020-08-10 19:00:00+00:00


In [11]:
# model = SVC()
 
# # Fit the model to the data using X_train_scaled and y_train
# model = model.fit(X_train_scaled, y_train)

# # Use the trained model to predict the trading signals for the training data.
# training_signal_predictions = model.predict(X_train_scaled)

# # Evaluate the model using a classification report
# from sklearn.metrics import classification_report
# training_report = classification_report(y_train, training_signal_predictions)
# print(training_report)

              precision    recall  f1-score   support

         0.0       0.99      1.00      1.00      2072
         1.0       1.00      0.35      0.51        26

    accuracy                           0.99      2098
   macro avg       1.00      0.67      0.76      2098
weighted avg       0.99      0.99      0.99      2098



In [12]:
# Use the trained model to predict the trading signals for the testing data.
testing_signal_predictions = model.predict(X_test_scaled)

# Evaluate the model's ability to predict the trading signal for the testing data using a classification report
training_report = classification_report(y_test, testing_signal_predictions)
print(training_report)

              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98     13244
         1.0       0.00      0.00      0.00       635

    accuracy                           0.95     13879
   macro avg       0.48      0.50      0.49     13879
weighted avg       0.91      0.95      0.93     13879



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
 # Create a new empty predictions DataFrame using code provided below.
predictions_df = pd.DataFrame(index=X_test.index)
predictions_df['signal'] = testing_signal_predictions
predictions_df['signal'].value_counts()
predictions_df["three_period_return"] = df["three_period_return"]


intial_investment = 10000
# Add in actual returns and calculate trading returns
predictions_df['actual_return'] = df['actual_return']
predictions_df['trading_algorithm_returns_3'] = predictions_df.loc[predictions_df["signal"]==1]["three_period_return"]
#predictions_df["trading_algorithm_returns_5"] = 
predictions_df["trading_algorithm_returns_3"].fillna(0, inplace=True)
predictions_df["trading_algo_returns_3_+1"] = ((predictions_df["trading_algorithm_returns_3"] + 1) * intial_investment)
# we need to add trading algo cumulative returns in order to plot agaisnt actual returns and see how well our algo performed compared to actual returns

predictions_df[50:100]

KeyError: 'three_period_return'

In [None]:
# Calculate and plot the cumulative returns for the `actual_returns` and the `trading_algorithm_returns`

(1 + predictions_df[['actual_return', 'trading_algo_returns_3_+1']]).cumprod().plot()