### **1. 통계 기반 예측**

In [None]:
#코드 12-18
import pandas as pd
import yfinance as yf
tsla = yf.download('TSLA', start='2021-11-01', end='2023-03-31')
df_tsla = pd.DataFrame(tsla['Close'])
df_tsla.head()

In [None]:
#코드 12-19
df_tsla = df_tsla.reset_index()
df_tsla.columns = ['date', 'value']
df_tsla['date'] = pd.to_datetime(df_tsla['date'])

In [None]:
#코드 12-20
import matplotlib.pyplot as plt

#데이터 분할하기
df_tsla_train = pd.DataFrame(df_tsla['value'][:int(0.8*len(df_tsla))])
df_tsla_test = pd.DataFrame(df_tsla['value'][int(0.8*len(df_tsla)):])
df_tsla_train['date'] = df_tsla['date'][:int(0.8*len(df_tsla))]
df_tsla_test['date'] = df_tsla['date'][int(0.8*len(df_tsla)):]
df_tsla_train.set_index('date', inplace=True)
df_tsla_test.set_index('date', inplace=True)

df_tsla_train['value'].plot(figsize=(12.2, 6.4), color='blue')
df_tsla_test['value'].plot(color='green')
plt.show()

In [None]:
#코드 12-21
!pip install pmdarima
from pmdarima.arima import ndiffs, nsdiffs
print(f"최적의 차분 횟수 (ADF): {ndiffs(df_tsla_train, test='adf')}")
print(f"최적의 차분 횟수 (KPSS): {ndiffs(df_tsla_train, test='kpss')}")
print(f"최적의 차분 횟수 (PP): {ndiffs(df_tsla_train, test='pp')}")

In [None]:
#코드 12-22
print(f"최적의 차분 계수 (OSCB): {nsdiffs(df_tsla_train, m=12, test='ocsb')}")
print(f"최적의 차분 계수 (CH): {nsdiffs(df_tsla_train, m=12, test='ch')}")

In [None]:
#코드 12-23
from statsmodels.tsa.arima_model import ARIMA
import pmdarima as pm

n_diffs = 1
model_fit = pm.auto_arima(
    y=df_tsla_train['value'],
    d=n_diffs,
    start_p=0, max_p=2,
    start_q=0, max_q=2,
    m=1, seasonal=False, #데이터에 계절성이 없음
    stepwise=True,
    trace=True)
print(model_fit.summary())

In [None]:
#코드 12-24
tsla_pred= model_fit.predict(n_periods=len(df_tsla_test))
df_tsla_pred = pd.DataFrame(tsla_pred)

result = pd.DataFrame(df_tsla_test['value'].values,\
                      index=df_tsla_test.index, columns=['value'])
result

In [None]:
#코드 12-25
fig, axes = plt.subplots(1, 1, figsize=(12, 4))
plt.plot(df_tsla_train, label='Train') # 훈련 데이터
plt.plot(df_tsla_test, label='Test') # 테스트 데이터
plt.plot(result, label='Prediction') # 예측 데이터

plt.legend()
plt.show()

In [None]:
#코드 12-26
def each_step_prediction():
  pred_next = model_fit.predict(n_periods=1)
  return pred_next.tolist()[0] # 리스트 형태로 반환하기

pred_steps = []
for new_inst in df_tsla_test['value']:
  pred = each_step_prediction()
  pred_steps.append(pred)
  model_fit.update(new_inst)

In [None]:
#코드 12-27
fig, axes = plt.subplots(1, 1, figsize=(12, 4))
plt.plot(df_tsla_train.index, df_tsla_train['value'], label='Train')
plt.plot(df_tsla_test.index, df_tsla_test['value'], label='Test')
plt.plot(df_tsla_test.index, pred_steps, label='Prediction')

plt.legend()
plt.show()

### **2. 심화분석: 인공신경망을 활용하는 주가 예측**

In [None]:
#코드 12-28
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

tsla = yf.download('TSLA', start='2021-11-01', end='2023-03-31')
df_tsla = pd.DataFrame(tsla['Close'])

df_tsla = df_tsla.reset_index()
df_tsla.columns = ['date', 'value']
df_tsla['date'] = pd.to_datetime(df_tsla['date'])
df_tsla.set_index('date', inplace=True)

In [None]:
#코드 12-29
df_tsla.reset_index()
dataset_tsla = df_tsla.values

#데이터 분할하기
df_tsla_train = dataset_tsla[:int(0.8*len(dataset_tsla)), :]
df_tsla_test = dataset_tsla[int(0.8*len(dataset_tsla)):, :]

#데이터 스케일링하기
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data=scaler.fit_transform(dataset_tsla)

In [None]:
#코드 12-30
x_train_data,y_train_data=[],[]

#28일을 기준으로 데이터 생성하기
for i in range(28,len(df_tsla_train)):
  x_train_data.append(scaled_data[i-28:i,0])
  y_train_data.append(scaled_data[i,0])

x_train_data, y_train_data = np.array(x_train_data), np.array(y_train_data)
x_train_data = np.reshape(x_train_data, (x_train_data.shape[0],\
                                         x_train_data.shape[1],1))

In [None]:
#코드 12-31
lstm_tsla = Sequential()

lstm_tsla.add(LSTM(units=28, return_sequences=True,\
                   input_shape=(x_train_data.shape[1],1)))
lstm_tsla.add(LSTM(units=28))
lstm_tsla.add(Dense(1))

#데이터 재가공하기
inputs_data = df_tsla[len(df_tsla) - len(df_tsla_test)-28:].values
inputs_data = inputs_data.reshape(-1,1)
inputs_data = scaler.transform(inputs_data)

#모형의 학습 방법 설정하여 학습 진행하기
lstm_tsla.compile(loss='mean_squared_error', optimizer='adam')
lstm_tsla.fit(x_train_data, y_train_data, epochs=100, batch_size=1, verbose=2)

In [None]:
#코드 12-32
X_test = []
for i in range(28, inputs_data.shape[0]):
  X_test.append(inputs_data[i-28:i,0])
X_test = np.array(X_test)

X_test = np.reshape(X_test,(X_test.shape[0],X_test.shape[1],1))
predicted_value = lstm_tsla.predict(X_test)
predicted_value = scaler.inverse_transform(predicted_value)

In [None]:
#코드 12-33
df_tsla_train_vis = tsla[:284]
df_tsla_test_vis = tsla[284:]

df_tsla_test_vis['Predictions']=predicted_value
plt.plot(df_tsla_train_vis["Close"])
plt.plot(df_tsla_test_vis[['Close',"Predictions"]])