In [1]:
import datetime

import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
import datetime 

from tensorflow.keras import Model, Sequential

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import MeanAbsoluteError

from tensorflow.keras.layers import Dense, Conv1D, LSTM, Lambda, Reshape, RNN, LSTMCell

#Yahoo!Finance 模組
import yfinance as yf 
import datetime as dt
import warnings
warnings.filterwarnings('ignore')

In [2]:
plt.rcParams['figure.figsize'] = (10, 7.5)
plt.rcParams['axes.grid'] = False

In [3]:
print(tf.__version__)

2.12.0


In [4]:
tf.random.set_seed(42)
np.random.seed(42)

In [5]:
# https://finance.yahoo.com/most-active
# https://finance.yahoo.com/crypto/
symbol = "BTC-USD"
# Read data
# start =dt.date(2018, 1, 1)
end = dt.date(2023, 7, 1)


df = yf.download(symbol,end=end)
df_Indicators= yf.download(symbol)
df.tail()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-06-26,30480.523438,30636.029297,29955.744141,30271.130859,30271.130859,16493186997
2023-06-27,30274.320312,31006.787109,30236.650391,30688.164062,30688.164062,16428827944
2023-06-28,30696.560547,30703.279297,29921.822266,30086.246094,30086.246094,14571500779
2023-06-29,30086.1875,30796.25,30057.203125,30445.351562,30445.351562,13180860821
2023-06-30,30441.353516,31256.863281,29600.275391,30477.251953,30477.251953,26387306197


In [6]:
df_Indicators.shape

(3243, 6)

In [7]:
def SMA(close,n):   
    return close.rolling(window=n).mean()

def WMA(close, n):
    weights = np.array(range(1, n+1))
    sum_weights = np.sum(weights)

    res = close.rolling(window=n).apply(lambda x: np.sum(weights*x) / sum_weights, raw=False)
    return res

def EMA(close, n):    
    res=close.ewm(span=n,min_periods=n).mean()
    return res

def RSI(close, n):
    # RSI
    change = close.diff(1)
    df["Gain"] = change.mask(change < 0, 0)
    df["Loss"] = abs(change.mask(change > 0, 0))
    df["AVG_Gain"] = df.Gain.rolling(n).mean()
    df["AVG_Loss"] = df.Loss.rolling(n).mean()
    df["RS"] = df["AVG_Gain"] / df["AVG_Loss"]
    df["RSI"] = 100 - (100 / (1 + df["RS"]))
    return df["RSI"]


def Stoch_RSI(close, n):
    #Stochastic RSI
    RSI=df["RSI"]
    LL_RSI = RSI.rolling(n).min()
    HH_RSI = RSI.rolling(n).max()         
    return (RSI - LL_RSI) / (HH_RSI - LL_RSI)

def BB(close, n):
    MA = pd.Series(close.rolling(n).mean())
    STD = pd.Series(close.rolling(n).std())
    bb1 = MA + 2 * STD
    df["Upper Bollinger Band"] = pd.Series(bb1)
    bb2 = MA - 2 * STD
    df["Lower Bollinger Band"] = pd.Series(bb2)
    return df["Upper Bollinger Band"],df["Lower Bollinger Band"]

def MACD(close):
    df['EMA12'] = close.ewm(span=12, adjust=False).mean() 
    df['EMA26'] = close.ewm(span=26, adjust= False).mean() 
    return df['EMA12'] - df['EMA26'] 


# 指標副程式(資料,天數)
df_Indicators['SMA_7']=SMA(df['Adj Close'],7)
df_Indicators['SMA_14']=SMA(df['Adj Close'],14)

df_Indicators['EMA_7']=EMA(df['Adj Close'],7)
df_Indicators['EMA_14']=EMA(df['Adj Close'],14)

df_Indicators['RSI']=RSI(df['Adj Close'],14)


df_Indicators['Stoch_RSI']=Stoch_RSI(df['Adj Close'],14)

df_Indicators['Upper_BB']=BB(df['Adj Close'],20)[0]
df_Indicators['Lower_BB']=BB(df['Adj Close'],20)[1]

df_Indicators['MACD']=MACD(df['Adj Close'])
df_Indicators= df_Indicators.dropna()
df_Indicators.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,SMA_7,SMA_14,EMA_7,EMA_14,RSI,Stoch_RSI,Upper_BB,Lower_BB,MACD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2014-10-14,391.691986,411.697998,391.324005,400.869995,400.869995,38491500,373.094291,360.392931,376.98669,371.799589,54.529605,0.991932,422.829855,317.231348,-10.737706
2014-10-15,400.954987,402.22699,388.765991,394.77301,394.77301,25267100,379.070435,361.189933,381.434329,374.911777,53.565092,0.965996,419.691379,318.689724,-8.196223
2014-10-16,394.518005,398.807007,373.070007,382.556,382.556,26990000,381.57472,361.724505,381.714797,375.945126,52.336359,0.932954,416.279522,319.914682,-7.086205
2014-10-17,382.756012,385.477997,375.389008,383.757996,383.757996,13600700,384.745575,363.456362,382.225665,376.999326,58.314477,1.0,413.813751,320.804254,-6.039892
2014-10-18,383.976013,395.15799,378.971008,391.441986,391.441986,11416800,388.908857,367.926076,384.529977,378.944982,75.469711,1.0,415.589314,320.45479,-4.538333


In [8]:
df_Indicators.shape

(3182, 15)

In [9]:
df_Indicators.to_csv('BTC-all.csv')

In [10]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(df_Indicators)
df_Indicators[df_Indicators.columns] = scaler.transform(df_Indicators[df_Indicators.columns])
df_Indicators.to_csv('BTC-all Standardization.csv')

In [9]:
n = len(df)
train_df = df_Indicators[0:int(n*0.7)]
val_df = df_Indicators[int(n*0.7):int(n*0.9)]
test_df = df_Indicators[int(n*0.9):]

train_df.shape, val_df.shape, test_df.shape

((2246, 15), (642, 15), (294, 15))

In [11]:
#將值縮限在0~1之間，加速訓練時間
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(train_df)

train_df[train_df.columns] = scaler.transform(train_df[train_df.columns])
val_df[val_df.columns] = scaler.transform(val_df[val_df.columns])
test_df[test_df.columns] = scaler.transform(test_df[test_df.columns])

In [12]:
train_df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,SMA_7,SMA_14,EMA_7,EMA_14,RSI,Stoch_RSI,Upper_BB,Lower_BB,MACD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2014-10-14,0.011040,0.010060,0.011691,0.011455,0.011455,0.000439,0.008881,0.007476,0.008575,0.007971,0.524163,0.991932,0.008637,0.008755,0.362633
2014-10-15,0.011516,0.009584,0.011555,0.011141,0.011141,0.000261,0.009196,0.007520,0.008812,0.008140,0.514017,0.965996,0.008490,0.008842,0.363263
2014-10-16,0.011185,0.009412,0.010720,0.010513,0.010513,0.000284,0.009328,0.007549,0.008827,0.008196,0.501092,0.932954,0.008329,0.008915,0.363538
2014-10-17,0.010580,0.008741,0.010843,0.010575,0.010575,0.000104,0.009495,0.007643,0.008854,0.008254,0.563976,1.000000,0.008214,0.008968,0.363797
2014-10-18,0.010643,0.009228,0.011034,0.010970,0.010970,0.000074,0.009715,0.007886,0.008976,0.008360,0.744431,1.000000,0.008297,0.008947,0.364169
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-02,0.957237,0.960725,0.966686,0.978160,0.978160,0.504131,0.951141,0.987597,0.980458,0.976550,0.573236,0.083113,0.929510,0.927519,0.616316
2020-12-03,0.978011,0.973698,0.997430,0.990722,0.990722,0.430533,0.968415,0.993920,0.991196,0.985749,0.585914,0.120464,0.935484,0.938556,0.620090
2020-12-04,0.990399,0.970942,0.985273,0.952382,0.952382,0.456724,0.980394,0.994225,0.989329,0.988313,0.481826,0.000000,0.932093,0.958499,0.605407
2020-12-05,0.951925,0.953286,0.979582,0.975750,0.975750,0.367313,0.991210,0.996213,0.993975,0.993832,0.509342,0.070827,0.926855,0.984153,0.600149


In [10]:
train_df.to_csv('data/train.csv')
val_df.to_csv('data/val.csv')
test_df.to_csv('data/test.csv')