## 주식 데이터 분석 - 야후 finance
[yahoo finance - naver url](https://finance.yahoo.com/quote/035420.KS/history?p=035420.KS)

In [5]:
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
import pandas as pd

req = Request(
    'https://finance.yahoo.com/quote/035420.KS/history?p=035420.KS',
    headers = {'User-Agent' : 'Chrome'}
)
page = urlopen(req).read()

soup = BeautifulSoup(page, 'html.parser')
table = soup.find('table')  # table tag
df_raw = pd.read_html(str(table))[0]        # html의 table을 가져옴
df_raw.head()

Unnamed: 0,Date,Open,High,Low,Close*,Adj Close**,Volume
0,"Aug 04, 2022",272500.00,278000.00,271000.00,272500.00,272500.00,592580
1,"Aug 03, 2022",-,-,-,-,-,-
2,"Aug 02, 2022",260500.00,260500.00,254000.00,259000.00,259000.00,551218
3,"Aug 01, 2022",260500.00,261500.00,256500.00,259000.00,259000.00,532517
4,"Jul 29, 2022",252000.00,262000.00,251500.00,259000.00,259000.00,1105156


In [6]:
# 종가(Close*)를 기준으로 정렬하기
df_tmp = pd.DataFrame({'ds':df_raw['Date'], 'y':df_raw['Close*']})
df_tmp.head()

Unnamed: 0,ds,y
0,"Aug 04, 2022",272500.00
1,"Aug 03, 2022",-
2,"Aug 02, 2022",259000.00
3,"Aug 01, 2022",259000.00
4,"Jul 29, 2022",259000.00


In [7]:
df_target = df_tmp[:-1]
df_target.head()

Unnamed: 0,ds,y
0,"Aug 04, 2022",272500.00
1,"Aug 03, 2022",-
2,"Aug 02, 2022",259000.00
3,"Aug 01, 2022",259000.00
4,"Jul 29, 2022",259000.00


---

In [8]:
df = df_target.copy()   # hard copy 하고
df['ds'] = pd.to_datetime(df_target['ds'], format='%b %d, %Y')  # 날짜를 fbprophet의 형태로 바꿈
df.head()

Unnamed: 0,ds,y
0,2022-08-04,272500.00
1,2022-08-03,-
2,2022-08-02,259000.00
3,2022-08-01,259000.00
4,2022-07-29,259000.00


In [9]:
df.info()       # y 컬럼의 데이터가 문자열로 잡혀있음 >> float 형태로 바꿔야 한다.

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   ds      100 non-null    datetime64[ns]
 1   y       100 non-null    object        
dtypes: datetime64[ns](1), object(1)
memory usage: 1.7+ KB


In [10]:
# df['y'] = df['y'].astype('float')       # y값 중 '-' 값이 float 형태로 바뀌지 않게 방해


---


#### y의 값이 숫자가 아닌 것들을 빼고 다시 생성하기

In [11]:
df_tmp_ = df_tmp[df_tmp['y'] != '-'][:-1].reset_index()
df_tmp_

Unnamed: 0,index,ds,y
0,0,"Aug 04, 2022",272500.00
1,2,"Aug 02, 2022",259000.00
2,3,"Aug 01, 2022",259000.00
3,4,"Jul 29, 2022",259000.00
4,5,"Jul 28, 2022",247000.00
...,...,...,...
94,95,"Mar 18, 2022",344500.00
95,96,"Mar 17, 2022",329000.00
96,97,"Mar 16, 2022",329000.00
97,98,"Mar 15, 2022",329000.00


In [12]:
df_tmp_ = pd.DataFrame(df_tmp_, columns=['ds', 'y'])
df_tmp_

Unnamed: 0,ds,y
0,"Aug 04, 2022",272500.00
1,"Aug 02, 2022",259000.00
2,"Aug 01, 2022",259000.00
3,"Jul 29, 2022",259000.00
4,"Jul 28, 2022",247000.00
...,...,...
94,"Mar 18, 2022",344500.00
95,"Mar 17, 2022",329000.00
96,"Mar 16, 2022",329000.00
97,"Mar 15, 2022",329000.00


In [13]:
df = df_tmp_.copy()
df['ds'] = pd.to_datetime(df['ds'], format='%b %d, %Y')
df.head()

Unnamed: 0,ds,y
0,2022-08-04,272500.0
1,2022-08-02,259000.0
2,2022-08-01,259000.0
3,2022-07-29,259000.0
4,2022-07-28,247000.0


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   ds      99 non-null     datetime64[ns]
 1   y       99 non-null     object        
dtypes: datetime64[ns](1), object(1)
memory usage: 1.7+ KB


In [15]:
# 'y' 컬럼의 데이터 float 형태로 바꾸기
df['y'] = df['y'].astype('float')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99 entries, 0 to 98
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   ds      99 non-null     datetime64[ns]
 1   y       99 non-null     float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 1.7 KB


In [16]:
df.head()

Unnamed: 0,ds,y
0,2022-08-04,272500.0
1,2022-08-02,259000.0
2,2022-08-01,259000.0
3,2022-07-29,259000.0
4,2022-07-28,247000.0


In [21]:
df.tail()

Unnamed: 0,ds,y
94,2022-03-18,344500.0
95,2022-03-17,329000.0
96,2022-03-16,329000.0
97,2022-03-15,329000.0
98,2022-03-14,329000.0


---
### prophet 해보기

In [22]:
from fbprophet import Prophet

In [23]:
m = Prophet(yearly_seasonality=True, daily_seasonality=True)
# m.fit(df);

In [24]:
m.fit(df);

: 

: 

In [19]:
# future = m.make_future_dataframe(periods=30)
# forecast = m.predict(future)
# forecast[['ds','yhat','yhat_lower','yhat_upper']].tail()

In [20]:
# m.predict(forecast)