## Import library

In [0]:
import pandas as pd
import plotly.offline as offline
import plotly.graph_objs as go

In [0]:
code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
    
# Set format
code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)

code_df = code_df[['회사명','종목코드']]

code_df = code_df.rename(columns={'회사명':'name','종목코드':'code'})
    
code_df.head()

Unnamed: 0,name,code
0,DSR,155660
1,GS글로벌,1250
2,HSD엔진,82740
3,LG이노텍,11070
4,LS산전,10120


In [0]:
# testing
'DSR' in code_df.name.values

True

## Getting historical stock price

In [0]:
# Find Stock Code
def stock():
    # Get stock code from KRX
    code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0]
    
    # Set format
    code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)

    code_df = code_df[['회사명','종목코드']]

    code_df = code_df.rename(columns={'회사명':'name','종목코드':'code'})
    
    return code_df

# Find proper URL in Naver
def naver(code_df, stockCode):
    if type(stockCode) != int:
        code = code_df.query("name=='{}'".format(stockCode))['code'].to_string(index=False)[1:]
        url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code)
    else: 
        code = stockCode
        url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code)
    
    print("Requested URL = {}".format(url))
    return url

# Getting data from Naver Stock
def crawling(a, b):
    df = pd.DataFrame()
    b = int(b/10)+1
    
    for page in range(1,b):
        pg_url = '{url}&page={page}'.format(url=a, page=page)
        df = df.append(pd.read_html(pg_url,header=0)[0], ignore_index=True)
        
    df = df.dropna()
    
    # Reset index 
    df = df.reset_index(drop=True)
    
    # Change column names into English for further analysis
    df = df.rename(columns= {'날짜': 'Date', '종가': 'Closing Price', '전일비': 'Difference', '시가': 'Open Price', \
                             '고가': 'Highest', '저가': 'Lowest', '거래량': 'Trading Vol'})
    
    # Change date format to datetime
    df['Date'] = pd.to_datetime(df['Date'])
    
    # Set ascending order
    df = df.sort_values(by=['Date'], ascending=True)
    
    return df

# Draw graph
def graph(df):
    offline.init_notebook_mode(connected=True) 
    trace = go.Candlestick(x=df['Date'], open=df['Open Price'], high=df['Highest'], low=df['Lowest'],
                          close=df['Closing Price']) 
    data = [trace]
        
    layout = dict(title='Historical Stock Price', 
                  xaxis=dict( 
                      rangeselector=dict( 
                          buttons=list([ 
                              dict(count=1, 
                                   label='1m', 
                                   step='month', 
                                   stepmode='backward'), 
                              dict(count=3, 
                                   label='3m', 
                                   step='month', 
                                   stepmode='backward'), 
                              dict(count=6, 
                                   label='6m', 
                                   step='month', 
                                   stepmode='backward'), 
                              dict(step='all') 
                          ]) 
                      ), 
                      rangeslider=dict(), 
                      type='date' 
                  ) 
                 ) 
    fig = go.Figure(data=data, layout=layout) 
    plot = offline.iplot(fig)
    return plot
        
# Choose Option
def option():
    print("=" * 54, "Menu", "=" * 54)
    print("""
    1. Draw Historical Stock Graph
    2. Golden Cross Check
    3. Dead Cross Check
    4. Trading Volume
    5. The end
    """)
    print("=" * 115)
    
    choice = input('Choose Menu: ')
    return choice
  
# Main Func
def main():
    flag = True
    flag_code = False ##### 
    while flag:
        
        code_df = stock()
        
        choice = option()
    
        if choice == '1':
############################################################################################
            while flag_code == 0:
                code = input("Please enter proper stock code or accurate company name: ")

                if code.isdigit(): 
                    if code in code_df.code.values:
                        flag_code = True
                        code = int(code)
                        url = naver(code_df, code)
                    else:
                        print("Please check the stock code")

                else:
                    if code in code_df.name.values:
                        flag_code = True
                        url = naver(code_df, code)
                    else:
                        print("Please check the stock name")
#################################################################################################
            ran = int(input("Enter the number of working days you want to see (min. 10days): "))

            file = crawling(url,ran)

            stockGraph = graph(file)
            
            continue
            
            return stockGraph
        
        if choice == '5':
            print("It was nice to meet you:)")
            flag = False
            
if __name__ == '__main__':
    main()


    1. Draw Historical Stock Graph
    2. Golden Cross Check
    3. Dead Cross Check
    4. Trading Volume
    5. The end
    
Choose Menu: 1
Please enter proper stock code or accurate company name: blah
Please check the stock name
Please enter proper stock code or accurate company name: DSR
Requested URL = http://finance.naver.com/item/sise_day.nhn?code=155660
Enter the number of working days you want to see (min. 10days): 10



    1. Draw Historical Stock Graph
    2. Golden Cross Check
    3. Dead Cross Check
    4. Trading Volume
    5. The end
    


KeyboardInterrupt: ignored

In [0]:
# Moving average
def average(code_df):
    
    avg = []
    
    for i in range(len(code_df)):
        
        company = pd.DataFrame()
        
        for page in range(1,12): 

            pg_url = 'http://finance.naver.com/item/sise_day.nhn?code={code}&page={page}'\
            .format(code=code_df['code'][i], page=page)
            company = company.append(pd.read_html(pg_url,header=0)[0], ignore_index=True)

        company = company.dropna()

        # Change column names into English for further analysis
        company = company.rename(columns= {'날짜': 'Date', '종가': 'Closing Price', '전일비': 'Difference', 
                                     '시가': 'Open Price', '고가': 'Highest', '저가': 'Lowest', '거래량': 'Trading Vol'})
        
        
        # Drop useless columns
        company = company.drop(['Difference','Open Price','Highest','Lowest','Trading Vol'], axis = 1)
        
        # Change date format to datetime
        company['Date'] = pd.to_datetime(company['Date'])
        
        # Set ascending order
        company = company.sort_values(by=['Date'], ascending=True)

        company['20 Days'] = company['Closing Price'].rolling(20).mean()
        company['50 Days'] = company['Closing Price'].rolling(50).mean()
        
        company = company.dropna().reset_index(drop = True)
        
        avg.append(company)
        
    return avg

In [0]:
average(code_df)

URLError: <urlopen error [Errno 8] nodename nor servname provided, or not known>

In [0]:
company = pd.DataFrame()

for page in range(1,12): 

    pg_url = 'http://finance.naver.com/item/sise_day.nhn?code={code}&page={page}'\
    .format(code=code_df['code'][1], page=page)
    company = company.append(pd.read_html(pg_url,header=0)[0], ignore_index=True)

company = company.dropna()

# Change column names into English for further analysis
company = company.rename(columns= {'날짜': 'Date', '종가': 'Closing Price', '전일비': 'Difference', 
                             '시가': 'Open Price', '고가': 'Highest', '저가': 'Lowest', '거래량': 'Trading Vol'})


# Drop useless columns
company = company.drop(['Difference','Open Price','Highest','Lowest','Trading Vol'], axis = 1)

# Change date format to datetime
company['Date'] = pd.to_datetime(company['Date'])

# Set ascending order
company = company.sort_values(by=['Date'], ascending=True)

company['20 Days'] = company['Closing Price'].rolling(20).mean()
company['50 Days'] = company['Closing Price'].rolling(50).mean()

company = company.dropna().reset_index(drop = True)

company

Unnamed: 0,Date,Closing Price,20 Days,50 Days
0,2019-11-18,2350.0,2378.25,2412.8
1,2019-11-19,2315.0,2374.25,2409.5
2,2019-11-20,2300.0,2370.75,2406.3
3,2019-11-21,2220.0,2363.75,2401.5
4,2019-11-22,2235.0,2357.75,2397.2
...,...,...,...,...
56,2020-02-11,2105.0,2115.75,2184.9
57,2020-02-12,2095.0,2109.75,2181.3
58,2020-02-13,2060.0,2101.75,2177.1
59,2020-02-14,2035.0,2092.00,2173.2


# Resources

1. Pandas를 이용한 Naver금융에서 주식데이터 가져오기 <br>
https://excelsior-cjh.tistory.com/109 
2. Calculating Moving Average with Python <br>
https://jakevdp.github.io/PythonDataScienceHandbook/03.11-working-with-time-series.html
3. Seasonality Analysis <br>
https://www.dataquest.io/blog/tutorial-time-series-analysis-with-pandas/
