In [1]:
import datetime

import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
import datetime 

from tensorflow.keras import Model, Sequential

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import MeanAbsoluteError

from tensorflow.keras.layers import Dense, Conv1D, LSTM, Lambda, Reshape, RNN, LSTMCell

#Yahoo!Finance 模組
import yfinance as yf 
import datetime as dt
import warnings
warnings.filterwarnings('ignore')

In [2]:
plt.rcParams['figure.figsize'] = (10, 7.5)
plt.rcParams['axes.grid'] = False

In [3]:
print(tf.__version__)

2.12.0


In [4]:
tf.random.set_seed(42)
np.random.seed(42)

In [5]:
# https://finance.yahoo.com/most-active
# https://finance.yahoo.com/crypto/
symbol = "TSLA"
# Read data
# start =dt.date(2018, 1, 1)
end = dt.date(2023, 7, 1)


df = yf.download(symbol,end=end)
df_Indicators= yf.download(symbol)
df.tail()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-06-26,250.070007,258.369995,240.699997,241.050003,241.050003,179990600
2023-06-27,243.240005,250.389999,240.850006,250.210007,250.210007,164968200
2023-06-28,249.699997,259.880005,248.889999,256.23999,256.23999,159770800
2023-06-29,258.029999,260.73999,253.610001,257.5,257.5,131283400
2023-06-30,260.600006,264.450012,259.890015,261.769989,261.769989,112267600


In [6]:
df_Indicators.shape

(3288, 6)

In [7]:
def SMA(close,n):   
    return close.rolling(window=n).mean()

def WMA(close, n):
    weights = np.array(range(1, n+1))
    sum_weights = np.sum(weights)

    res = close.rolling(window=n).apply(lambda x: np.sum(weights*x) / sum_weights, raw=False)
    return res

def EMA(close, n):    
    res=close.ewm(span=n,min_periods=n).mean()
    return res

def RSI(close, n):
    # RSI
    change = close.diff(1)
    df["Gain"] = change.mask(change < 0, 0)
    df["Loss"] = abs(change.mask(change > 0, 0))
    df["AVG_Gain"] = df.Gain.rolling(n).mean()
    df["AVG_Loss"] = df.Loss.rolling(n).mean()
    df["RS"] = df["AVG_Gain"] / df["AVG_Loss"]
    df["RSI"] = 100 - (100 / (1 + df["RS"]))
    return df["RSI"]


def Stoch_RSI(close, n):
    #Stochastic RSI
    RSI=df["RSI"]
    LL_RSI = RSI.rolling(n).min()
    HH_RSI = RSI.rolling(n).max()         
    return (RSI - LL_RSI) / (HH_RSI - LL_RSI)

def BB(close, n):
    MA = pd.Series(close.rolling(n).mean())
    STD = pd.Series(close.rolling(n).std())
    bb1 = MA + 2 * STD
    df["Upper Bollinger Band"] = pd.Series(bb1)
    bb2 = MA - 2 * STD
    df["Lower Bollinger Band"] = pd.Series(bb2)
    return df["Upper Bollinger Band"],df["Lower Bollinger Band"]

def MACD(close):
    df['EMA12'] = close.ewm(span=12, adjust=False).mean() 
    df['EMA26'] = close.ewm(span=26, adjust= False).mean() 
    return df['EMA12'] - df['EMA26'] 


# 指標副程式(資料,天數)
df_Indicators['SMA_7']=SMA(df['Adj Close'],7)
df_Indicators['SMA_14']=SMA(df['Adj Close'],14)

df_Indicators['EMA_7']=EMA(df['Adj Close'],7)
df_Indicators['EMA_14']=EMA(df['Adj Close'],14)

df_Indicators['RSI']=RSI(df['Adj Close'],14)


df_Indicators['Stoch_RSI']=Stoch_RSI(df['Adj Close'],14)

df_Indicators['Upper_BB']=BB(df['Adj Close'],20)[0]
df_Indicators['Lower_BB']=BB(df['Adj Close'],20)[1]

df_Indicators['MACD']=MACD(df['Adj Close'])
df_Indicators= df_Indicators.dropna()
df_Indicators.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,SMA_7,SMA_14,EMA_7,EMA_14,RSI,Stoch_RSI,Upper_BB,Lower_BB,MACD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2010-08-06,1.34,1.344,1.301333,1.306,1.306,11128500,1.375809,1.378524,1.369084,1.366942,36.84802,0.0,1.509766,1.203301,-0.013245
2010-08-09,1.326667,1.332,1.296667,1.306667,1.306667,12190500,1.368667,1.37519,1.353476,1.358777,45.152407,0.227167,1.481319,1.248747,-0.017388
2010-08-10,1.31,1.31,1.254667,1.268667,1.268667,19219500,1.36,1.369524,1.33227,1.346596,42.28276,0.148668,1.46967,1.26633,-0.023467
2010-08-11,1.246,1.258667,1.19,1.193333,1.193333,11964000,1.331238,1.354762,1.297531,1.325916,30.769193,0.0,1.488619,1.234447,-0.033972
2010-08-12,1.186667,1.193333,1.159333,1.173333,1.173333,10365000,1.289809,1.33719,1.266478,1.30536,27.137552,0.0,1.505875,1.201925,-0.043411


In [8]:
df_Indicators.shape

(3247, 15)

In [9]:
df_Indicators.to_csv('TSLA-all.csv')

In [66]:
# from sklearn.preprocessing import MinMaxScaler
# scaler = MinMaxScaler()
# scaler.fit(df_Indicators)
# df_Indicators[df_Indicators.columns] = scaler.transform(df_Indicators[df_Indicators.columns])
# df_Indicators.to_csv('ETH-all Standardization.csv')

In [10]:
n = len(df)
train_df = df_Indicators[0:int(n*0.7)]
val_df = df_Indicators[int(n*0.7):int(n*0.9)]
test_df = df_Indicators[int(n*0.9):]

train_df.shape, val_df.shape, test_df.shape

((2291, 15), (655, 15), (301, 15))

In [68]:
#將值縮限在0~1之間，加速訓練時間
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(train_df)

train_df[train_df.columns] = scaler.transform(train_df[train_df.columns])
val_df[val_df.columns] = scaler.transform(val_df[val_df.columns])
test_df[test_df.columns] = scaler.transform(test_df[test_df.columns])

In [33]:
train_df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,SMA_7,SMA_14,EMA_7,EMA_14,RSI,Stoch_RSI,Upper_BB,Lower_BB,MACD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2017-12-06,0.080055,0.078514,0.072786,0.072821,0.072821,0.012567,0.080419,0.080748,0.079892,0.077974,0.587884,0.000000,0.085372,0.063123,0.369180
2017-12-07,0.072388,0.074080,0.071535,0.074052,0.074052,0.014139,0.080025,0.081129,0.078886,0.077687,0.543108,0.000000,0.084292,0.066858,0.364501
2017-12-08,0.074212,0.079212,0.073252,0.078625,0.078625,0.016615,0.079699,0.080832,0.079316,0.078093,0.433476,0.000000,0.083606,0.070286,0.362944
2017-12-09,0.078942,0.087135,0.080562,0.082321,0.082321,0.012634,0.080011,0.080946,0.080595,0.078970,0.506530,0.150088,0.083059,0.073812,0.363357
2017-12-10,0.082210,0.080611,0.074794,0.075598,0.075598,0.005455,0.079262,0.080480,0.079814,0.078771,0.416108,0.000000,0.081889,0.076997,0.359468
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-12,0.981852,0.982738,0.955316,0.969336,0.969336,0.207909,0.995792,0.985873,1.000000,0.995051,0.599767,0.000000,0.997233,0.945317,0.665867
2021-11-13,0.969666,0.960554,0.970690,0.966025,0.966025,0.161717,0.999820,0.991004,0.999797,0.998103,0.641876,0.308957,0.998273,0.957757,0.648828
2021-11-14,0.965839,0.958001,0.956614,0.960715,0.960715,0.134369,1.000000,0.996332,0.998271,1.000000,0.649568,0.365391,1.000000,0.965636,0.628804
2021-11-15,0.961280,0.973562,0.963014,0.946152,0.946152,0.183486,0.992105,1.000000,0.993357,0.999593,0.595814,0.000000,0.997383,0.979026,0.601483


In [11]:
train_df.to_csv('data/train.csv')
val_df.to_csv('data/val.csv')
test_df.to_csv('data/test.csv')