In [2]:
import numpy as np
import pandas as pd
import pandas_datareader.data as data
from matplotlib import pyplot as plt
%matplotlib inline

from datetime import datetime
from datetime import timedelta

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay

from tensorflow.keras.layers import Input, SimpleRNN, LSTM, GRU, Dense, Flatten, GlobalMaxPool1D
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import SGD, Adam

import tensorflow as tf

In [3]:
# !pip install pandas_datareader

In [4]:
start = '2005-01-01'
end = '2022-01-01'

df = data.DataReader('AAPL.US', 'stooq', start, end)
df.to_csv('APPLE.csv')

In [5]:
df = pd.read_csv('APPLE.csv')

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4280 entries, 0 to 4279
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    4280 non-null   object 
 1   Open    4280 non-null   float64
 2   High    4280 non-null   float64
 3   Low     4280 non-null   float64
 4   Close   4280 non-null   float64
 5   Volume  4280 non-null   int64  
dtypes: float64(4), int64(1), object(1)
memory usage: 200.8+ KB


In [7]:
df['Date']

0       2021-12-31
1       2021-12-30
2       2021-12-29
3       2021-12-28
4       2021-12-27
           ...    
4275    2005-01-07
4276    2005-01-06
4277    2005-01-05
4278    2005-01-04
4279    2005-01-03
Name: Date, Length: 4280, dtype: object

In [8]:
# datetime 자료형으로 변경

df['Date'] = pd.to_datetime(df['Date'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4280 entries, 0 to 4279
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    4280 non-null   datetime64[ns]
 1   Open    4280 non-null   float64       
 2   High    4280 non-null   float64       
 3   Low     4280 non-null   float64       
 4   Close   4280 non-null   float64       
 5   Volume  4280 non-null   int64         
dtypes: datetime64[ns](1), float64(4), int64(1)
memory usage: 200.8 KB


In [9]:
# 가장 오래된 날짜를 먼저 표시하도록 변경
df.sort_values(by = 'Date', ascending = True, inplace = True)
df

Unnamed: 0,Date,Open,High,Low,Close,Volume
4279,2005-01-03,0.9917,0.9954,0.9573,0.9675,809378621
4278,2005-01-04,0.9748,1.0010,0.9628,0.9776,1282096319
4277,2005-01-05,0.9776,0.9975,0.9776,0.9862,794826329
4276,2005-01-06,0.9908,0.9922,0.9681,0.9868,823978894
4275,2005-01-07,0.9945,1.0644,0.9903,1.0588,2610440418
...,...,...,...,...,...,...
4,2021-12-27,176.8600,180.1900,176.8400,180.1000,75015032
3,2021-12-28,179.9300,181.1000,178.3000,179.0600,79245171
2,2021-12-29,179.1000,180.4000,177.9100,179.1500,62428366
1,2021-12-30,179.2400,180.3400,177.8600,177.9700,59849167


In [10]:
# Date 칼럼을 인덱스로 변경
df.set_index(keys='Date', inplace = True)

In [11]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2005-01-03,0.9917,0.9954,0.9573,0.9675,809378621
2005-01-04,0.9748,1.0010,0.9628,0.9776,1282096319
2005-01-05,0.9776,0.9975,0.9776,0.9862,794826329
2005-01-06,0.9908,0.9922,0.9681,0.9868,823978894
2005-01-07,0.9945,1.0644,0.9903,1.0588,2610440418
...,...,...,...,...,...
2021-12-27,176.8600,180.1900,176.8400,180.1000,75015032
2021-12-28,179.9300,181.1000,178.3000,179.0600,79245171
2021-12-29,179.1000,180.4000,177.9100,179.1500,62428366
2021-12-30,179.2400,180.3400,177.8600,177.9700,59849167


In [12]:
# 다음 달의 종가하고 당일의 종가 사이의 차를 계산하여 뉴 컬럼 생성
df_shift = df.shift(-1)

In [13]:
df['Gap_close'] = df_shift['Close'] - df['Close']
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Gap_close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2005-01-03,0.9917,0.9954,0.9573,0.9675,809378621,0.0101
2005-01-04,0.9748,1.0010,0.9628,0.9776,1282096319,0.0086
2005-01-05,0.9776,0.9975,0.9776,0.9862,794826329,0.0006
2005-01-06,0.9908,0.9922,0.9681,0.9868,823978894,0.0720
2005-01-07,0.9945,1.0644,0.9903,1.0588,2610440418,-0.0042
...,...,...,...,...,...,...
2021-12-27,176.8600,180.1900,176.8400,180.1000,75015032,-1.0400
2021-12-28,179.9300,181.1000,178.3000,179.0600,79245171,0.0900
2021-12-29,179.1000,180.4000,177.9100,179.1500,62428366,-1.1800
2021-12-30,179.2400,180.3400,177.8600,177.9700,59849167,-0.6300


In [14]:
# 새로운 컬럼 UP을 추가하고 올라갈 경ㅇ우 1, 내려갈 경우 0
df['Up'] = 0
df['Up'][df['Gap_close'] > 0] = 1
df.drop('Gap_close', axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Up'][df['Gap_close'] > 0] = 1


In [15]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Up
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2005-01-03,0.9917,0.9954,0.9573,0.9675,809378621,1
2005-01-04,0.9748,1.0010,0.9628,0.9776,1282096319,1
2005-01-05,0.9776,0.9975,0.9776,0.9862,794826329,1
2005-01-06,0.9908,0.9922,0.9681,0.9868,823978894,1
2005-01-07,0.9945,1.0644,0.9903,1.0588,2610440418,0
...,...,...,...,...,...,...
2021-12-27,176.8600,180.1900,176.8400,180.1000,75015032,0
2021-12-28,179.9300,181.1000,178.3000,179.0600,79245171,1
2021-12-29,179.1000,180.4000,177.9100,179.1500,62428366,0
2021-12-30,179.2400,180.3400,177.8600,177.9700,59849167,0


In [17]:
df['Close_ratio'] = (df['Close'] - df_shift['Close']) / df_shift['Close']
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Up,Close_ratio
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2005-01-03,0.9917,0.9954,0.9573,0.9675,809378621,1,-0.010331
2005-01-04,0.9748,1.0010,0.9628,0.9776,1282096319,1,-0.008720
2005-01-05,0.9776,0.9975,0.9776,0.9862,794826329,1,-0.000608
2005-01-06,0.9908,0.9922,0.9681,0.9868,823978894,1,-0.068002
2005-01-07,0.9945,1.0644,0.9903,1.0588,2610440418,0,0.003983
...,...,...,...,...,...,...,...
2021-12-27,176.8600,180.1900,176.8400,180.1000,75015032,0,0.005808
2021-12-28,179.9300,181.1000,178.3000,179.0600,79245171,1,-0.000502
2021-12-29,179.1000,180.4000,177.9100,179.1500,62428366,0,0.006630
2021-12-30,179.2400,180.3400,177.8600,177.9700,59849167,0,0.003552
