In [1]:
import os
import time
import sqlite3
import numpy as np
import pandas as pd
from datetime import datetime

In [2]:
class StockPriceCrawler:
    
    def __init__(self):
        self.conn = sqlite3.connect('stockprice.db')
        self.cur = self.conn.cursor()
        self.delay = 0.05
        
    def run(self):
        sql = 'SELECT 코드 FROM CODE WHERE 수집여부 == "Y"'
        self.cur.execute(sql)
        codes = [x[0] for x in self.cur.fetchall()]
        for code in codes:
            self.get_sise_day_init(code)
            self.get_sise_time_init(code)
            print('수집 완료: {}'.format(code))
        while(True):
            for code in codes:
                now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                print('수집 완료: {} ({})'.format(code, now))
                self.get_sise_time(code)
            time.sleep(30)
        
    # Only once
    def get_sise_day_init(self, code):
        sql = 'SELECT COUNT(코드) FROM SISE_DAY WHERE 코드="{}"'.format(code)
        self.cur.execute(sql)
        if self.cur.fetchone()[0] != 0:
            return None
        
        page = 1
        result = []
        while(True):
            url = 'https://finance.naver.com/item/sise_day.nhn?code={}&page={}'.format(code, page)
            data = pd.read_html(url)[0].dropna()
            if page != 1:
                if data.iloc[-1, 0] == result[-1].iloc[-1, 0]:
                    break
            result.append(data)
            page += 1
            time.sleep(self.delay)

        df = pd.concat(result).reset_index(drop=True)
        df['전일비'] = (df['종가']-df['종가'].shift(-1)).fillna(0)
        df.insert(0, '코드', code)
        df['날짜'] = df['날짜'].str.replace('.', '')
        
        sql = 'INSERT OR REPLACE INTO SISE_DAY VALUES (?, ?, ?, ?, ?, ?, ?, ?)'
        self.cur.executemany(sql, [tuple(df.iloc[i]) for i in range(len(df))])
        self.conn.commit()

        return df

    # Daily
    def get_sise_day(self, code):
        sql = 'SELECT MAX(날짜) FROM SISE_DAY WHERE 코드="{}"'.format(code)
        self.cur.execute(sql)
        day_last = self.cur.fetchone()[0]

        today = datetime.now().strftime('%Y%m%d%H%M%S')[:8]
        page = 1
        result = []
        while(day_last < today):
            url = 'https://finance.naver.com/item/sise_day.nhn?code={}&page={}'.format(code, page)
            data = pd.read_html(url)[0].dropna()
            result.append(data)
            if data.iloc[-1, 0].replace('.', '') <= day_last:
                break
            page += 1
            time.sleep(self.delay)

        df = pd.concat(result).reset_index(drop=True)
        df['날짜'] = df['날짜'].str.replace('.', '')    
        df['전일비'] = (df['종가']-df['종가'].shift(-1)).fillna(0)
        df = df.query('날짜 > @day_last')
        df.insert(0, '코드', code)

        sql = 'INSERT OR REPLACE INTO SISE_DAY VALUES (?, ?, ?, ?, ?, ?, ?, ?)'
        self.cur.executemany(sql, [tuple(df.iloc[i]) for i in range(len(df))])
        self.conn.commit()

        return df

    # Daily
    def get_sise_time_init(self, code):
        now = datetime.now().strftime('%Y%m%d%H%M%S')
        today = now[:8]
        sql = 'SELECT COUNT(코드) FROM SISE_TIME WHERE 코드="{}" AND 날짜="{}"'.format(code, today)
        self.cur.execute(sql)
        if self.cur.fetchone()[0] != 0:
            return None
        
        page = 1
        result = []
        
        while(True):
            url = 'https://finance.naver.com/item/sise_time.nhn?code={}&thistime={}&page={}'.format(code, now, page)
            data = pd.read_html(url)[0].dropna()
            if page != 1:
                if data.iloc[-1, 0] == result[-1].iloc[-1, 0]:
                    break
            result.append(data)
            page += 1
            time.sleep(self.delay)

        df = pd.concat(result).reset_index(drop=True)
        df['전일비'] = (df['체결가']-df['체결가'].shift(-1)).fillna(0)
        df.insert(0, '날짜', today)
        df.insert(0, '코드', code)
        df['체결시각'] = df['체결시각'].str.replace(':', '')

        sql = 'INSERT OR REPLACE INTO SISE_TIME VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)'
        self.cur.executemany(sql, [tuple(df.iloc[i]) for i in range(len(df))])
        self.conn.commit()

        return df

    # Each time
    def get_sise_time(self, code):

        sql = 'SELECT MAX(날짜)||MAX(체결시각) FROM SISE_TIME WHERE 코드="{}"'.format(code)
        self.cur.execute(sql)
        time_last = self.cur.fetchone()[0]

        page = 1
        result = []
        now = datetime.now().strftime('%Y%m%d%H%M%S')
        today = now[:8]
        while(True):
            url = 'https://finance.naver.com/item/sise_time.nhn?code={}&thistime={}&page={}'.format(code, now, page)
            data = pd.read_html(url)[0].dropna()
            result.append(data)
            if today+data.iloc[-1, 0].replace(':', '') <= time_last:
                break
            page += 1
            time.sleep(self.delay)

        df = pd.concat(result).reset_index(drop=True)
        df['체결시각'] = df['체결시각'].str.replace(':', '')
        df['전일비'] = (df['체결가']-df['체결가'].shift(-1)).fillna(0)
        if time_last[:8] >= today:
            df = df.query('체결시각 > "{}"'.format(time_last[8:12]))
            
        df.insert(0, '날짜', today)
        df.insert(0, '코드', code)    

        sql = 'INSERT OR REPLACE INTO SISE_TIME VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)'
        self.cur.executemany(sql, [tuple(df.iloc[i]) for i in range(len(df))])
        self.conn.commit()

        return df

In [None]:
crawler = StockPriceCrawler()
crawler.run()

수집 완료: 001450
수집 완료: 005830
수집 완료: 000810
수집 완료: 000060
수집 완료: 001450 (2020-01-06 14:49:00)
수집 완료: 005830 (2020-01-06 14:49:00)
수집 완료: 000810 (2020-01-06 14:49:01)
수집 완료: 000060 (2020-01-06 14:49:01)
수집 완료: 001450 (2020-01-06 14:49:31)
수집 완료: 005830 (2020-01-06 14:49:31)
수집 완료: 000810 (2020-01-06 14:49:31)
수집 완료: 000060 (2020-01-06 14:49:31)
수집 완료: 001450 (2020-01-06 14:50:01)
수집 완료: 005830 (2020-01-06 14:50:01)
수집 완료: 000810 (2020-01-06 14:50:02)
수집 완료: 000060 (2020-01-06 14:50:02)
수집 완료: 001450 (2020-01-06 14:50:32)
수집 완료: 005830 (2020-01-06 14:50:32)
수집 완료: 000810 (2020-01-06 14:50:32)
수집 완료: 000060 (2020-01-06 14:50:32)
수집 완료: 001450 (2020-01-06 14:51:02)
수집 완료: 005830 (2020-01-06 14:51:02)
수집 완료: 000810 (2020-01-06 14:51:03)
수집 완료: 000060 (2020-01-06 14:51:03)
수집 완료: 001450 (2020-01-06 14:51:33)
수집 완료: 005830 (2020-01-06 14:51:33)
수집 완료: 000810 (2020-01-06 14:51:33)
수집 완료: 000060 (2020-01-06 14:51:33)
수집 완료: 001450 (2020-01-06 14:52:03)
수집 완료: 005830 (2020-01-06 14:52:03)
수집 완료: 0