In [3]:
import requests
import json
import pymongo
from bs4 import BeautifulSoup
from sqlalchemy import *
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base

In [4]:
mysql_client = create_engine("mysql://root:dss@13.125.109.6/world?charset=utf8")
base = declarative_base()
mysql_client

Engine(mysql://root:***@13.125.109.6/world?charset=utf8)

In [5]:
mongo_client = pymongo.MongoClient('mongodb://13.125.109.6:27017')
mongo_client

MongoClient('13.125.109.6', 27017)

In [6]:
class NaverKeyword(base):
    __tablename__ = "naver"

    id = Column(Integer, primary_key=True)
    rank = Column(Integer, nullable=False)
    keyword = Column(String(50), nullable=False)
    rdate = Column(TIMESTAMP, nullable=False)

    def __init__(self, rank, keyword):
        self.rank = rank
        self.keyword = keyword

    def __repr__(self):
        return "<NaverKeyword {}, {}>".format(self.rank, self.keyword)

In [7]:
def crawling():
    response = requests.get("https://www.naver.com/")
    dom = BeautifulSoup(response.content, "html.parser")
    keywords = dom.select(".ah_roll_area > .ah_l > .ah_item")
    datas = []
    for keyword in keywords:
        rank = keyword.select_one(".ah_r").text
        keyword = keyword.select_one(".ah_k").text
        datas.append((rank, keyword))
    return datas

In [14]:
datas = crawling()
datas

[('1', '양예원'),
 ('2', '소고기모둠편백찜'),
 ('3', '국가비'),
 ('4', '더 이퀄라이저'),
 ('5', '편백미가'),
 ('6', '이언주'),
 ('7', '편백찜'),
 ('8', '삼겹살 간장조림'),
 ('9', '스윙엔터테인먼트'),
 ('10', '심석희 성폭행'),
 ('11', '제이쓴'),
 ('12', '통갈비짬뽕'),
 ('13', '베리베리'),
 ('14', '송광민'),
 ('15', '생생정보통 맛집오늘'),
 ('16', '방학동 도깨비시장'),
 ('17', '박종철'),
 ('18', '쇼챔피언'),
 ('19', '2tv 저녁 생생정보'),
 ('20', '조재범 코치 나이')]

In [15]:
def mysql_save(datas):
    
    keywords = [NaverKeyword(rank, keyword) for rank, keyword in datas]
    
    # make session
    maker = sessionmaker(bind=mysql_client)
    session = maker()

    # save datas
    session.add_all(keywords)
    session.commit()

    # close session
    session.close()

In [16]:
mysql_save(datas)

In [10]:
def mongo_save(datas):
    querys = [{"rank":rank, "keyword":keyword} for rank, keyword in datas]
    mongo_client.crawling.naver_keywords.insert(querys)

In [17]:
mongo_save(datas)

In [11]:
def send_slack(msg, channel="#dss", username="provision_bot" ):
    webhook_URL = "https://hooks.slack.com/services/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"
    payload = {
        "channel": channel,
        "username": username,
        "icon_emoji": ":provision:",
        "text": msg,
    }
    response = requests.post(
        webhook_URL,
        data = json.dumps(payload),
    )
    return response

In [12]:
def run():
    # 데이터 베이스에 테이블 생성
    base.metadata.create_all(mysql_client)

    # 네이버 키워드 크롤링
    datas = crawling()

    # 데이터 베이스에 저장
    mysql_save(datas)
    mongo_save(datas)

    # 슬랙으로 메시지 전송
    send_slack("naver crawling done!")

In [13]:
run()