### 주식 데이터 수집(fdr, 스크래핑 등)

In [1]:
import pandas as pd
import numpy as np
import time
import pickle
from datetime import datetime
from tqdm import tqdm
import re

In [2]:
import io
import requests
import FinanceDataReader as fdr
from urllib import parse
from bs4 import BeautifulSoup as bs
import seaborn as sns

In [3]:
try:
    import pymysql
    from sqlalchemy import create_engine
    from pandas.io import sql
    from pymongo import MongoClient
    from bson.objectid import ObjectId
    client = MongoClient('127.0.0.1')
    db = client.stock
    PASSWORD=your_password
except ModuleNotFoundError:
    print("Don't use sql in this PC")
    pass

Don't use sql in this PC


### KOSPI / KOSDAQ의 현재 상장된 종목 코드

In [65]:
TODAY = datetime.now().strftime('%y%m%d')

In [63]:
# 당일 기준 상장종목 data frame 저장
kospi = fdr.StockListing('KOSPI')  # 전체 종목코드에 대한 정보
kospi.to_csv(f'data_stored/kospi_all_{TODAY}.csv', index=False)
kosdaq = fdr.StockListing('KOSDAQ')  # 전체 종목코드에 대한 정보
kosdaq.to_csv(f'data_stored/kosdaq_all_{TODAY}.csv', index=False)

In [23]:
class MarketCodes():
    """
    코스피, 코스닥 종목코드 GET
    상장폐지, 우량주, 옵션 등 종목도 포함
    """
    def __init__(self,market):
        if(market!='KOSPI')&(market!='KOSDAQ'):
            print("Error : Please input KOSPI or KOSDAQ")
        self.market=market
            
    def get_codes(self):
        '''
        market = 'KOSPI' or 'KOSDAQ'
        '''
        kospiDf = fdr.StockListing(self.market)
        kospiDf = kospiDf[kospiDf.apply(lambda x:len(x['Symbol'])==6,axis=1)] #이상한 몇호 뭐시기 이런거 들어있어서 제외
        codes = kospiDf['Symbol'].tolist()
        return codes

    def closed_jongmok(self):
        '''
        code = [kospi : STK, kosdaq : KSQ]
        '''
        if(self.market=='KOSPI'):
            code = 'STK'
        else:
            code = 'KSQ'
        url = 'http://marketdata.krx.co.kr/contents/COM/GenerateOTP.jspx?' \
                    'name=fileDown&filetype=xls&url=MKD/04/0406/04060600/mkd04060600&' \
                    'market_gubun='+code+'&isu_cdnm=%EC%A0%84%EC%B2%B4&isu_cd=&isu_nm=&' \
                    'isu_srt_cd=&fromdate=20000101&todate=22001231&del_cd=1&' \
                    'pagePath=%2Fcontents%2FMKD%2F04%2F0406%2F04060600%2FMKD04060600.jsp'

        header_data = {
            'User-Agent': 'Chrome/78.0.3904.87 Safari/537.36',
        }
        r = requests.get(url, headers=header_data)

        # STEP 02: download
        url = 'http://file.krx.co.kr/download.jspx'
        form_data = {'code': r.text}
        header_data = {
            'Referer': 'http://marketdata.krx.co.kr/contents/MKD/04/0406/04060600/MKD04060600.jsp',
            'User-Agent': 'Chrome/78.0.3904.87 Safari/537.36',
        }
        r = requests.post(url, data=form_data, headers=header_data)
        df = pd.read_excel(io.BytesIO(r.content))
        df['종목코드'] = df['종목코드'].str.replace('A', '')
        df['폐지일'] = pd.to_datetime(df['폐지일'])
        col_map = {'종목코드':'Symbol', '기업명':'Name', '폐지일':'DelistingDate', '폐지사유':'Reason'}
        df=df.rename(columns = col_map)
        codes = df['Symbol'].tolist()
        return(codes)
    
    def all_codes(self):
        '''
        상장폐지종목 포함 코드
        '''
        codes = self.get_codes() + self.closed_jongmok()
        return codes
    
    def noble_codes(self):
        """
        풋, 콜종목 및 우량주 종목 제외
        """
        codes = self.get_codes()
        codes = [c for c in codes if not re.search('[a-zA-Z]', c) and len(c)==6 and c[-1] == '0']
        return codes

In [24]:
market='KOSPI'
codeC = MarketCodes("KOSPI")
codes = codeC.noble_codes() #현재 상장주식만
#codes = codeC.allCodes() #상장폐지 종목 포함

In [25]:
len(codes)

1294

### 일일 주가 데이터
- 처음에는 Mongodb사용했는데, dataframe형태의 데이터이다보니 mysql 사용하기로 결정

In [67]:
# #Mongodb에 저장
# def store_daily_price_mongodb(codes, market,year):
#     today = datetime.today().strftime('%Y-%m-%d') #저장당시 날짜
    
#     for idx,code in tqdm(enumerate(codes)):
#         element = {}
#         element['code'] = code
#         element['date'] = today
#         element['market'] = market
#         element['price'] = fdr.DataReader(code, year).reset_index().to_dict('list')
#         db.dailyPrice.insert_one(element)

In [None]:
#mysql에 저장
engine = create_engine("mysql+pymysql://{user}:{pw}@localhost/{db}"
                       .format(user="root",
                               pw=PASSWORD,
                               db="stock_price"))

def store_daily_price_mysql(codes: list, year: str) -> None:
    """
    해당 codes에 대해 
    """
    for idx,code in tqdm(enumerate(codes)):
        df = fdr.DataReader(code, year).reset_index()
        df.to_sql('c'+str(code), con=engine, if_exists="append",chunksize=1000, index_label=None,
                     index=False)
        

In [30]:
store_daily_price_mysql(codes, '2010')

799it [05:03,  2.63it/s]


### 재무제표 크롤링
#ref : https://engkimbs.tistory.com/625

In [31]:
jaemu = []

In [32]:
dump=[]

In [71]:
def getFsNaver(code:str):
    """
    네이버 금융페이지에서 재무제표 데이터 수집
    """
    URL = "https://finance.naver.com/item/main.nhn?code="+code

    samsung_electronic = requests.get(URL)
    html = samsung_electronic.text

    soup = bs(html, 'lxml')
    finance_html = soup.select('div.section.cop_analysis div.sub_section')[0]

    th_data = [item.get_text().strip() for item in finance_html.select('thead th')]
    annual_date = th_data[3:7]
    quarter_date = th_data[7:13]
    
    finance_index = [item.get_text().strip() for item in finance_html.select('th.h_th2')][3:]
    finance_data = [item.get_text().strip() for item in finance_html.select('td')]
    finance_data = np.array(finance_data)
    finance_data.resize(len(finance_index), 10)
    finance_date = annual_date + quarter_date
    finance = pd.DataFrame(data=finance_data[0:,0:], index=finance_index, columns=finance_date)
    return finance

In [72]:
getFsNaver('005930')

Unnamed: 0,2017.12,2018.12,2019.12,2020.12(E),2019.09,2019.12.1,2020.03,2020.06,2020.09,2020.12(E).1
매출액,2395754.0,2437714.0,2304009.0,2371630.0,620035.0,598848.0,553252.0,529661.0,669642.0,618949.0
영업이익,536450.0,588867.0,277685.0,366852.0,77779.0,71603.0,64473.0,81463.0,123532.0,97440.0
당기순이익,421867.0,443449.0,217389.0,275506.0,62877.0,52270.0,48849.0,55551.0,93607.0,76182.0
영업이익률,22.39,24.16,12.05,15.47,12.54,11.96,11.65,15.38,18.45,15.74
순이익률,17.61,18.19,9.44,11.62,10.14,8.73,8.83,10.49,13.98,12.31
ROE(지배주주),21.01,19.63,8.69,10.34,10.05,8.69,8.45,8.49,9.51,
부채비율,40.68,36.97,34.12,,34.14,34.12,34.19,32.67,36.09,
당좌비율,181.61,204.12,233.57,,235.8,233.57,237.8,250.04,229.69,
유보율,24536.12,27531.92,28856.02,,28541.64,28856.02,29134.12,29477.97,30242.29,
EPS(원),5421.0,6024.0,3166.0,4021.0,899.0,770.0,720.0,808.0,1364.0,1052.0


In [None]:
for idx,code in enumerate(codes):

    URL = "https://finance.naver.com/item/main.nhn?code="+code

    samsung_electronic = requests.get(URL)
    html = samsung_electronic.text

    soup = bs(html, 'lxml')
    try:
        finance_html = soup.select('div.section.cop_analysis div.sub_section')[0]
    
    except IndexError:
        print('error : ',code)
        dump.append(code)
        time.sleep(10)
        continue


    th_data = [item.get_text().strip() for item in finance_html.select('thead th')]
    annual_date = th_data[3:7]
    quarter_date = th_data[7:13]
    
    finance_index = [item.get_text().strip() for item in finance_html.select('th.h_th2')][3:]
    finance_data = [item.get_text().strip() for item in finance_html.select('td')]
    finance_data = np.array(finance_data)
    finance_data.resize(len(finance_index), 10)
    finance_date = annual_date + quarter_date
    
    try:
        finance = pd.DataFrame(data=finance_data[0:,0:], index=finance_index, columns=finance_date)
    except:
        print('error2 : ',code)
        dump.append(code)
        continue

    jaemu.append({'code':code,'df':finance})
    
    if(idx%10==9):
        print(idx/len(codes))
        time.sleep(5)
    #time.sleep(np.random.rand(1)[0]*0.8)


In [None]:
codes

In [None]:
codes_ = list(x for x in codes if x not in dump)

In [None]:
#KB손해보험 주식 1주당 0.57287주의 KB금융지주 주식으로 교환할 수 있게 됨
#지주회사의 완전자사화 등 -> 상폐x

## 네이버 뉴스기사

In [109]:
query = '기아차'
from_date = '2002.07.07'
to_date = '2002.08.18'

In [110]:
url = f'https://search.naver.com/search.naver?where=news&query={query}&pd=3&ds={from_date}&de={to_date}'

In [112]:
url

'https://search.naver.com/search.naver?where=news&query=기아차&pd=3&ds=2002.07.07&de=2002.08.18'

### 거래주체(기관/외인)

In [7]:
engine = create_engine("mysql+pymysql://{user}:{pw}@localhost/{db}"
                       .format(user="root",
                               pw=PASSWORD,
                               db="stock"))

In [10]:
def getFullCode(code):
    checkNum = 0
    for idx, i in enumerate(code):
        if(idx%2==0):
            checkNum+=int(i)
        else:
            temp = int(i)*2
            if(temp>=10):
                temp = 1+temp%10
            checkNum +=temp
#    checkNum += 20#KR
    checkNum = checkNum%10
    if(checkNum!=0):
        checkNum = 10-checkNum
    code = f'KR7{code}00{checkNum}' #보통주(끝이0으로끝나면)면 00x
    return code

In [224]:
def tradeWho(code, fromDate):
    if(code[-1] !='0'):
        print('not valid code form, pleas enter xxxxx0')
        return -1
    fullCode = getFullCode(code)
    url = 'http://marketdata.krx.co.kr/contents/COM/GenerateOTP.jspx?'\
    'name=fileDown&filetype=xls&url=MKD/13/1302/13020304/mkd13020304_01'\
    '&isu_cd='+fullCode+\
    '&isu_nm=%EC%A0%84%EC%B2%B4&'\
    'type=D&period_selector=day'\
    '&fromdate='+fromDate+'&todate=20201228&pagePath=%2Fcontents%2FMKD%2F13%2F1302%2F13020304%2FMKD13020304.jsp'
    
    header_data = {
    'User-Agent': 'Chrome/78.0.3904.87 Safari/537.36',
    }
    r = requests.get(url, headers=header_data)
    
    
    form_data = {'code': r.text}
    url = 'http://file.krx.co.kr/download.jspx'

    header_data = {
            'Referer': 'http://marketdata.krx.co.kr/contents/MKD/13/1302/13020304/MKD13020304.jsp',
            'User-Agent': 'Chrome/78.0.3904.87 Safari/537.36',
        }
    r = requests.post(url, data=form_data, headers=header_data)
    try:
        df = pd.read_excel(io.BytesIO(r.content))
    except AssertionError:
        return 1
    if(len(df)==0):
        return 2
    df = df[:-1]
    df.columns = ['Date','Close','Change','Volume','inst_buy',
                     'inst_sell','inst_sum','foreign_buy','foreign_sell','foreign_sum']
    if(type(df.iloc[:,1][0]) == str):
        df.iloc[:,1] = df.iloc[:,1].str.replace(',','').astype(int)
    else:
        df.iloc[:,1] = df.iloc[:,1].astype(int)
    if(type(df['Change'][0]) == str):
        nc = 2
    else:
        nc=3
    df.iloc[:,nc:] = df.iloc[:,nc:].apply(lambda x:x.str.replace(',','').astype(int),axis=1)

    
    return df 

In [12]:
codesFiltered = [x for x in codes if x[-1] == '0']

In [None]:
weiredCodes = []

In [226]:
fromDate = '20100101'
for idx,code in tqdm(enumerate(codesFiltered[500:])):
    fullCode = getFullCode(code)
    df = tradeWho(code,fromDate)
    if(type(df)==int):
        if(df == 1):
            df = tradeWho(code, '20150101')
            weiredCodes.append(('error1',code))
            print('error1')
        if(df ==2):
            weiredCodes.append(('error2',code))
            print('error2')
            continue #회생불가
        
    df.to_sql('c'+str(code), con=engine, if_exists="append",chunksize=1000, index_label=None,
         index=False)
    time.sleep(10)

300it [1:33:19, 18.67s/it]


In [None]:
def codeWhich(code):
    return np.where(np.array(codesFiltered)==code)[0][0]

In [222]:
codeWhich(code)

475

### 업종별 per
이거도 sql한다음에 다시분석해보자

industry별로 list 만들어서 유사도 검정 or 몇개 테마만 골라

In [42]:
df.dropna()

Unnamed: 0,Symbol,Market,Name,Sector,Industry,ListingDate,SettleMonth,Representative,HomePage,Region
1,095570,KOSPI,AJ네트웍스,산업용 기계 및 장비 임대업,"렌탈(파렛트, OA장비, 건설장비)",2015-08-21,12월,이현우,http://www.ajnet.co.kr,서울특별시
2,006840,KOSPI,AK홀딩스,기타 금융업,지주사업,1999-08-11,12월,"채형석, 이석주(각자 대표이사)",http://www.aekyunggroup.co.kr,서울특별시
47,027410,KOSPI,BGF,기타 금융업,지주회사,2014-05-19,12월,홍정국,http://www.bgf.co.kr,서울특별시
48,282330,KOSPI,BGF리테일,종합 소매업,체인화 편의점,2017-12-08,12월,이건준,http://www.bgfretail.com,서울특별시
49,138930,KOSPI,BNK금융지주,기타 금융업,금융지주회사,2011-03-30,12월,김지완,http://www.bnkfg.com,부산광역시
...,...,...,...,...,...,...,...,...,...,...
7087,079980,KOSPI,휴비스,화학섬유 제조업,"합성섬유(폴리에스테르원사,원면),재생섬유,폴리에스텔 원사,원면,고상칩 제조,도소매",2012-02-23,12월,신유동,http://www.huvis.com,서울특별시
7089,005010,KOSPI,휴스틸,1차 철강 제조업,"강관(배관용,구조용,유정용) 제조,도매",1973-06-29,12월,박훈,http://www.husteel.com,서울특별시
7093,069260,KOSPI,휴켐스,기타 화학제품 제조업,"화합물,화학제품 제조",2002-10-07,12월,신진용,http://www.huchems.com,서울특별시
7098,000540,KOSPI,흥국화재,보험업,손해보험,1974-12-05,12월,권중원,http://www.insurance.co.kr,서울특별시


In [6]:
kospiDf = pd.read_csv("./data/upjong_0623.csv")

In [85]:
kospiDf=kospiDf.dropna()

In [87]:
kospiDf[kospiDf['Name']=='셀트리온헬스케어']

Unnamed: 0,Symbol,Market,Name,Sector,Industry,ListingDate,SettleMonth,Representative,HomePage,Region
1104,91990,KOSDAQ,셀트리온헬스케어,기타 전문 도매업,바이오의약품 마케팅 및 판매,2017-07-28,12월,김형기,http://www.celltrionhealthcare.com/kr/index.do,인천광역시


In [92]:
kospiDf[kospiDf.apply(lambda x:'자동차' in x['Industry'],axis=1)]

Unnamed: 0,Symbol,Market,Name,Sector,Industry,ListingDate,SettleMonth,Representative,HomePage,Region
30,005830,KOSPI,DB손해보험,보험업,"자동차보험,화재보험,해상보험,특종보험,장기보험",1973-06-28,12월,김정남,http://www.idbins.com,서울특별시
48,130500,KOSDAQ,GH신소재,기타 섬유제품 제조업,"자동차용 부직포, 폴리우레탄 폼",2013-08-02,12월,우희구,http://www.gumho-nt.com,경상북도
62,089470,KOSPI,HDC현대EP,기타 화학제품 제조업,"합성수지(PP컴파운딩가공,자동차범퍼,내장제) 제조,도소매",2006-09-25,12월,김 명 호(직무대행 선임),http://www.hyundai-ep.com,충청남도
64,011200,KOSPI,HMM,해상 운송업,"정기,부정기,전용선,콘테이너선,자동차선,LNG선 해운",1995-10-05,12월,배재훈,http://www.hmm21.com,서울특별시
97,009070,KOSPI,KCTC,도로 화물 운송업,"화물자동차운송,항만하역,창고보관,컨테이너조작,중량물해상운송",1978-09-29,12월,이준환,http://www.kctc.co.kr,서울특별시
...,...,...,...,...,...,...,...,...,...,...
2492,001450,KOSPI,현대해상,보험업,손해보험(자동차보험),1989-08-25,12월,"조용일, 이성재",http://www.hi.co.kr,서울특별시
2506,013520,KOSPI,화승알앤에이,자동차 신품 부품 제조업,"자동차용 고무제품(제조저압호스,고압호스,산업용고무호스,,WEATHER STIRP,고...",1991-02-22,12월,"현지호,이정두",http://www.hsrna.com,경상남도
2509,010690,KOSPI,화신,자동차 신품 부품 제조업,"자동차엔진부품(ARM류,C/MBR류),자동차부품 제조",1994-01-14,12월,"정서진, 장의호",http://www.hwashin.co.kr,경상북도
2510,126640,KOSDAQ,화신정공,자동차 신품 부품 제조업,자동차부품,2010-08-31,12월,정 서 진,http://hsp.hwashin.co.kr,경상북도


In [11]:
kospi = kospiDf[kospiDf['Market']=='KOSPI']

In [79]:
soft = kospiDf[kospiDf['Sector']=='특수 목적용 기계 제조업']
soft=soft.dropna()

In [22]:
jejo = kospiDf.dropna()[kospiDf.dropna().apply(lambda x:'자동차'  in x['Sector'], axis=1)]

In [24]:
kospiDf.groupby('Sector').size().reset_index().sort_values(0,ascending = False).head(30)


Unnamed: 0,Sector,0
141,특수 목적용 기계 제조업,137
78,소프트웨어 개발 및 공급업,129
125,전자부품 제조업,120
111,자동차 신품 부품 제조업,97
104,의약품 제조업,91
34,기타 금융업,88
47,기타 화학제품 제조업,86
25,금융 지원 서비스업,77
140,통신 및 방송 장비 제조업,73
57,반도체 제조업,57


In [178]:
code = ([x for x in jejo if x in codeTemp])

In [179]:
df = perTimeDf[code].mean().reset_index()

df.columns = ['code','per']

In [180]:
reluMean(df['per'])

  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until


(27.216100100284958, 0.15753424657534246)

In [114]:
def upjong(name):

    codeTemp = perTimeDf.columns.tolist()
    code = kospiDf[kospiDf['Sector'] == name]['Symbol'].tolist()

    code = ([x for x in code if x in codeTemp])

    df = perTimeDf[code].mean().reset_index()

    df.columns = ['code','per']
    return(df)

In [112]:
def reluMean(arr):
    arr=np.array(arr)
    return (np.mean(arr[arr>0]), len(arr[arr<0])/len(arr))

In [152]:
reluMean(upjong('기타 식품 제조업')['per'].tolist())

(36.94565108362261, 0.0)

In [130]:
temp = perTimeDf['272210'][:5].values

In [148]:
upjong('금융 지원 서비스업')

Unnamed: 0,code,per
0,16610,41.169215
1,30210,8.765298
2,5940,12.188581
3,1510,18.105961
4,30610,4.814256
5,3540,9.720416
6,8560,7.51514
7,6800,11.514433
8,1270,8.678502
9,16360,11.128393


In [121]:
reluMean(upjong('전자부품 제조업')['per'].tolist())

  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until


(27.68780637126737, 0.21052631578947367)

In [99]:
arr = medicalDf['per'].tolist()

### 2018년 per 

In [42]:
timePriceDf = pd.read_csv("kospi18_0519.csv", index_col = 0)

In [52]:
#per, pbr, eps 등 지수와 코드 가져오는 함수
def indexYear(index, year):
    codes = []
    lists = []

    for i in range(len(jaemu)):
        
        try: 
            jipyo = jaemu[i]['df'].loc[index, year]

            if(type(jipyo) == str):
                if(jipyo==''):
                    continue

                lists.append(jipyo)
                codes.append(jaemu[i]['code'])

            elif(jipyo.tolist()[0]!=''):
                lists.append(jipyo.tolist()[0])
                codes.append(jaemu[i]['code'])



        except KeyError:
            continue
    return(codes, lists)

In [45]:
with open('jaemu_0511.pkl','rb') as f:
    jaemu = pickle.load(f)

In [53]:
indexDf = pd.DataFrame({'code':indexYear('EPS(원)','2017.12')[0],
                        'index_2017':indexYear('EPS(원)','2017.12')[1]})

In [55]:
indexDf['index_2017'] = indexDf['index_2017'].str.replace(',','')
indexDf['index_2017'] = indexDf['index_2017'].astype(float)
indexDf = indexDf[indexDf['index_2017'] !=0]

In [57]:
perTimeDf = timePriceDf[indexDf['code']]/indexDf['index_2017'].tolist() #per 10기준으로 판단

In [61]:
perTimeDf = perTimeDf[perTimeDf.index<'2019']

In [62]:
perTimeDf

Unnamed: 0_level_0,095570,006840,027410,282330,138930,001460,001040,079160,000120,011150,...,004800,094280,093370,081660,005870,079980,005010,069260,000540,003280
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-02,21.781250,9.784632,0.206771,21.727911,7.623282,41.197994,12.800569,-1145.454545,70.243902,22.755906,...,8.872721,16.840731,32.131661,16.174757,8.974359,16.178737,29.362101,8.756254,4.056532,-1.563559
2018-01-03,21.625000,9.532039,0.206104,20.902147,7.526273,40.791435,12.909399,-1074.242424,70.731707,22.559055,...,9.415273,18.798956,32.601881,16.990291,9.014423,16.409861,28.705441,8.756254,4.033613,-1.583686
2018-01-04,22.093750,9.625100,0.200768,20.695706,7.502021,40.723675,12.764344,-1084.848485,70.731707,22.362205,...,9.064179,17.558747,31.661442,16.582524,8.814103,16.486903,28.799250,8.613295,3.964859,-1.555085
2018-01-05,22.656250,9.625100,0.194765,20.644096,7.502021,40.655915,12.655591,-1092.424242,69.024390,22.401575,...,9.128034,17.428198,31.818182,16.737864,8.733974,16.255778,29.174484,8.756254,4.094729,-1.567797
2018-01-08,22.281250,9.638394,0.194765,20.540875,7.663703,40.655915,12.583064,-1083.333333,67.804878,22.007874,...,8.745011,17.885117,31.065831,16.407767,8.453526,16.024653,28.799250,9.042173,4.094729,-1.555085
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-21,13.546875,7.351768,0.114458,22.502064,6.200485,33.066811,9.899631,-650.000000,83.170732,18.937008,...,6.363931,14.229765,23.636364,48.932039,6.314103,11.479199,22.420263,9.274482,3.422460,-0.859110
2018-12-24,13.484375,7.218825,0.111657,21.727911,6.200485,33.202331,9.718274,-640.909091,81.463415,18.543307,...,6.439435,13.838120,22.978056,47.572816,6.474359,11.679507,22.514071,9.399571,3.453018,-0.843220
2018-12-26,13.468750,7.125764,0.108855,21.676301,6.143897,33.676650,9.536994,-612.878788,80.487805,17.834646,...,6.417862,13.577023,22.319749,47.572816,6.498397,11.587057,21.857411,9.506791,3.414820,-0.809322
2018-12-27,13.796875,7.245413,0.108188,20.953757,5.941795,33.337851,9.267805,-622.727273,82.195122,17.874016,...,5.792255,13.577023,22.476489,49.514563,6.426282,11.432974,21.388368,8.881344,3.414820,-0.834746


In [None]:
#아이투자

# url = 'http://www.itooza.com/vclub/y10_page.php?cmp_cd=005930&mode=dy&ss=10&sv=2&lsmode=1&lkmode=1&pmode=1&exmode=1&accmode=1'

# source = requests.get(url).content

# soup= bs(source,'lxml')

# soup

# soup.findAll("div",{"class":"body"})

### 상장폐지 데이터

### Trash

In [None]:
temp = 'http://marketdata.krx.co.kr/contents/COM/GenerateOTP.jspx?name=fileDown&filetype=csv&url=MKD/13/1302/13020303/mkd13020303&isu_cdnm=A003550%2FLG&isu_cd=KR7003550001&isu_nm=LG&isu_srt_cd=A003550&period_selector=day&fromdate=20200801&todate=20200901&pagePath=%2Fcontents%2FMKD%2F13%2F1302%2F13020303%2FMKD13020303.jsp'


temp

url = 'http://marketdata.krx.co.kr/contents/COM/GenerateOTP.jspx?' \
'name=fileDown&filetype=xls&url=MKD/13/1302/13020303/mkd13020303&' \
'isu_cdnm=%2F%EC%82%BC%EC%84%B1%EC%A0%84%EC%9E%90&' \
'isu_cd=KR7003550002&isu_nm=%EC%82%BC%EC%84%B1%EC%A0%84%EC%9E%90&' \
'isu_srt_cd=&period_selector=day&fromdate=20200801&todate=20200901&' \
'pagePath=%2Fcontents%2FMKD%2F13%2F1302%2F13020303%2FMKD13020303.jsp'




len('KR 7 00355 000 2')

url

header_data = {
    'User-Agent': 'Chrome/78.0.3904.87 Safari/537.36',
}
r = requests.get(url, headers=header_data)

url = 'http://file.krx.co.kr/download.jspx'
form_data = {'code': r.text}
header_data = {
    'Referer': 'http://marketdata.krx.co.kr/contents/MKD/04/0406/04060600/MKD04060600.jsp',
    'User-Agent': 'Chrome/78.0.3904.87 Safari/537.36',
}
r = requests.post(url, data=form_data, headers=header_data)
df = pd.read_excel(io.BytesIO(r.content))
df=df.rename(columns = col_map)


In [None]:

url = "https://finance.naver.com/item/coinfo.nhn?code=005930&target=finsum_more"
url = "https://finance.naver.com/item/coinfo.nhn?code=005930"
url = "https://finance.naver.com/item/main.nhn?code=005930"

soup = bs(requests.get(url).text,'lxml')

table = soup.find("div",{"class":"section cop_analysis"})
table = table.find('div',{"class":"sub_section"})