In [1]:
import requests
import json
import pymongo
from bs4 import BeautifulSoup
from sqlalchemy import *
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base

In [2]:
mysql_client = create_engine("mysql://root:3328@13.209.74.77/world?charset=utf8")
base = declarative_base()
mysql_client

Engine(mysql://root:***@13.209.74.77/world?charset=utf8)

In [3]:
mongo_client = pymongo.MongoClient('mongodb://13.209.74.77:27017')
mongo_client

MongoClient('13.209.74.77', 27017)

In [4]:
class NaverKeyword(base):
    __tablename__ = "naver"

    id = Column(Integer, primary_key=True)
    rank = Column(Integer, nullable=False)
    keyword = Column(String(50), nullable=False)
    rdate = Column(TIMESTAMP, nullable=False)

    def __init__(self, rank, keyword):
        self.rank = rank
        self.keyword = keyword

    def __repr__(self):
        return "<NaverKeyword {}, {}>".format(self.rank, self.keyword)

In [5]:
def crawling():
    response = requests.get("https://www.naver.com/")
    dom = BeautifulSoup(response.content, "html.parser")
    keywords = dom.select(".ah_roll_area > .ah_l > .ah_item")
    datas = []
    for keyword in keywords:
        rank = keyword.select_one(".ah_r").text
        keyword = keyword.select_one(".ah_k").text
        datas.append((rank, keyword))
    return datas

In [6]:
datas = crawling()
datas

[('1', '양승태'),
 ('2', '에릭남'),
 ('3', '김예령'),
 ('4', '파인텍'),
 ('5', '고주원'),
 ('6', '김보름'),
 ('7', '윤소희'),
 ('8', '현빈 손예진'),
 ('9', '정영주'),
 ('10', '손예진'),
 ('11', 'txt'),
 ('12', '아이돌 아버지 셰프'),
 ('13', '행복주택'),
 ('14', '텔루스'),
 ('15', '왜그래 풍상씨'),
 ('16', '한국 키르기스스탄'),
 ('17', '박유나'),
 ('18', '외모지상주의'),
 ('19', '김학용'),
 ('20', '욕지도')]

In [7]:
def mysql_save(datas):
    
    keywords = [NaverKeyword(rank, keyword) for rank, keyword in datas]
    
    # make session
    maker = sessionmaker(bind=mysql_client)
    session = maker()

    # save datas
    session.add_all(keywords)
    session.commit()

    # close session
    session.close()

In [8]:
mysql_save(datas)

In [9]:
def mongo_save(datas):
    querys = [{"rank":rank, "keyword":keyword} for rank, keyword in datas]
    mongo_client.crawling.naver_keywords.insert(querys)

In [10]:
mongo_save(datas)

In [14]:
def send_slack(msg, channel="#dss", username="provision_bot" ):
    webhook_URL = "https://hooks.slack.com/services/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"
    payload = {
        "channel": channel,
        "username": username,
        "icon_emoji": ":provision:",
        "text": msg,
    }
    response = requests.post(
        webhook_URL,
        data = json.dumps(payload),
    )
    return response

In [15]:
def run():
    # 데이터 베이스에 테이블 생성
    base.metadata.create_all(mysql_client)

    # 네이버 키워드 크롤링
    datas = crawling()

    # 데이터 베이스에 저장
    mysql_save(datas)
    mongo_save(datas)

    # 슬랙으로 메시지 전송
    send_slack("naver crawling done!")

In [16]:
run()