In [1]:
from bs4 import BeautifulSoup
import requests
import json
import pandas as pd
import re

In [2]:
def getNames(url):
    df = pd.read_html(requests.get(url).text)[2].loc[:, :'전일거래량']
    df = df[df.종목명.isna() == False]
    return [i for i in df.종목명]

In [3]:
def getIndustryData():
    url = 'https://finance.naver.com/sise/sise_group.nhn?type=upjong'
    html = requests.get(url).text
    soup = BeautifulSoup(html, 'html.parser')
    업종 = pd.read_html(html)[0].dropna().reset_index(drop=True)
    업종.columns = [i[1] for i in 업종.columns]
    업종링크 = {}
    for index, link in enumerate(soup.findAll("a", href=re.compile("^(/sise/sise_group_detail.nhn)((?!:).)*$"))):
        if 'href' in link.attrs:
            업종링크[업종.업종명[index]] = 'https://finance.naver.com' + link.attrs['href']
    업종['전일비'] = 업종.전일대비.apply(lambda x: float(x.replace('%', '')))
    업종['링크'] = 업종.업종명.apply(lambda x: 업종링크.get(x))
    업종['대표종목'] = 업종.링크.apply(lambda x: getNames(x))
    업종.sort_values('전일비', ascending=False, inplace=True)
    return pd.concat([업종.head(10), 업종.tail(10)])

In [4]:
getIndustryData()

Unnamed: 0,업종명,전일대비,전체,상승,보합,하락,등락그래프,전일비,링크,대표종목
0,가정용품,+5.16%,7.0,6.0,0.0,1.0,89%,5.16,https://finance.naver.com/sise/sise_group_deta...,"[모나리자, 메디앙스 *, 바이오제네틱스 *, 애경산업, 삼정펄프, 태양 *, 케이..."
1,디스플레이패널,+4.39%,2.0,2.0,0.0,0.0,76%,4.39,https://finance.naver.com/sise/sise_group_deta...,"[LG디스플레이, 일진디스플]"
2,카드,+2.02%,1.0,1.0,0.0,0.0,35%,2.02,https://finance.naver.com/sise/sise_group_deta...,[삼성카드]
3,담배,+1.28%,1.0,1.0,0.0,0.0,22%,1.28,https://finance.naver.com/sise/sise_group_deta...,[KT&G]
4,석유와가스,+0.89%,21.0,3.0,1.0,17.0,15%,0.89,https://finance.naver.com/sise/sise_group_deta...,"[SK이노베이션, S-Oil, SK이노베이션우, 에스아이리소스 *, S-Oil우, ..."
5,상업서비스와공급품,+0.82%,23.0,8.0,1.0,14.0,14%,0.82,https://finance.naver.com/sise/sise_group_deta...,"[NICE평가정보 *, NICE, 한국기업평가 *, 아이마켓코리아, 제넨바이오 *,..."
6,생명보험,+0.62%,5.0,3.0,1.0,1.0,10%,0.62,https://finance.naver.com/sise/sise_group_deta...,"[미래에셋생명, 동양생명, 삼성생명, 오렌지라이프, 한화생명]"
7,전문소매,+0.61%,4.0,2.0,1.0,1.0,10%,0.61,https://finance.naver.com/sise/sise_group_deta...,"[롯데하이마트, 도이치모터스 *, 포스링크 *, 씨유메디칼 *]"
8,교육서비스,+0.58%,20.0,7.0,1.0,12.0,10%,0.58,https://finance.naver.com/sise/sise_group_deta...,"[메가엠디 *, 아이스크림에듀 *, 이퓨쳐 *, NE능률 *, 메가스터디교육 *, ..."
9,손해보험,+0.40%,12.0,4.0,1.0,7.0,6%,0.4,https://finance.naver.com/sise/sise_group_deta...,"[메리츠금융지주, 흥국화재, 메리츠화재, 삼성화재, 삼성화재우, 흥국화재우, DB손..."


In [5]:
def getThemeDataPages(pageNums):
    themes = pd.DataFrame()
    linkDict = {}
    
    for pageNum in pageNums:
        url = f'https://finance.naver.com/sise/theme.nhn?&page={pageNum}'
        html = requests.get(url).text
        soup = BeautifulSoup(html, 'html.parser')
        
        df = pd.read_html(html)[0].reset_index(drop=True)
        df.columns = [i[1] for i in df.columns]
        df = df[df.테마명.isna() == False]
        themes = pd.concat([themes, df])
        
        # findLink
        for link in soup.findAll("a", href=re.compile("^(/sise/sise_group_detail.nhn)((?!:).)*$")):
            if 'href' in link.attrs:
                    linkDict[link.text] = 'https://finance.naver.com' + link.attrs['href']
        
    themes = themes.reset_index(drop=True)
    themes['링크'] = themes.테마명.apply(lambda x: linkDict.get(x))
    
    return themes

In [6]:
def getThemeData():
    url = 'https://finance.naver.com/sise/theme.nhn?&page=1'
    html = requests.get(url).text
    soup = BeautifulSoup(html, 'html.parser')

    pageNums = pd.read_html(html)[1].columns[1:]
    
    df = getThemeDataPages(pageNums)
    df['종목'] = df.링크.apply(lambda x: getNames(x))
    summary = pd.concat([df.head(10), df.tail(10)])
    return summary, df

In [7]:
summary, df = getThemeData()

In [8]:
df[df.테마명 == '황사/미세먼지'].종목.tolist()

[['웰크론 *',
  '모나리자',
  '오공 *',
  '케이엠 *',
  '휴비츠 *',
  '크린앤사이언스 *',
  'JW중외제약',
  '포스코 ICT *',
  '국제약품',
  '롯데하이마트',
  '한국테크놀로지 *',
  '비디아이 *',
  '웅진코웨이',
  '락앤락',
  '파세코 *',
  '누리플랜 *',
  '나노 *',
  'KC코트렐',
  '상아프론테크 *',
  '위니아딤채 *',
  '위닉스 *',
  '보령제약',
  '성창오토텍 *',
  '삼일제약',
  '디에이치피코리아 *',
  '안국약품 *',
  '하츠 *',
  '에스디생명공학 *']]

In [9]:
summary

Unnamed: 0,테마명,전일대비,최근3일등락률(평균),상승,보합,하락,주도주,주도주.1,링크,종목
0,마스크,+2.51%,+4.86%,12.0,0.0,11.0,웰크론,모나리자,https://finance.naver.com/sise/sise_group_deta...,"[웰크론 *, 모나리자, 오공 *, 케이엠 *, 메디앙스 *, 카스 *, 한송네오텍..."
1,생명보험,+2.36%,-1.25%,3.0,1.0,2.0,미래에셋생..,동양생명,https://finance.naver.com/sise/sise_group_deta...,"[미래에셋생명, 동양생명, 삼성생명, 오렌지라이프, 한화생명, 코리안리]"
2,골판지 제조,+2.08%,-1.49%,5.0,0.0,7.0,대영포장,대림제지,https://finance.naver.com/sise/sise_group_deta...,"[대영포장, 대림제지 *, 태림포장, 수출포장, 아세아제지, 대양제지 *, 영풍제지..."
3,황사/미세먼지,+1.77%,+1.31%,11.0,0.0,17.0,웰크론,모나리자,https://finance.naver.com/sise/sise_group_deta...,"[웰크론 *, 모나리자, 오공 *, 케이엠 *, 휴비츠 *, 크린앤사이언스 *, J..."
4,정유,+1.01%,-1.58%,2.0,0.0,1.0,SK이노베..,S-Oil,https://finance.naver.com/sise/sise_group_deta...,"[SK이노베이션, S-Oil, GS]"
5,광고,+0.40%,-1.15%,4.0,0.0,10.0,지어소프트,에코마케팅,https://finance.naver.com/sise/sise_group_deta...,"[지어소프트 *, 에코마케팅 *, 인크로스 *, 지투알, 나스미디어 *, 이엠넷 *..."
6,모바일솔루션(스마트폰),+0.36%,-1.42%,6.0,1.0,12.0,키네마스터,지어소프트,https://finance.naver.com/sise/sise_group_deta...,"[키네마스터 *, 지어소프트 *, 다날 *, 이미지스 *, 갤럭시아컴즈 *, 카페2..."
7,3D 프린터,+0.30%,-1.46%,4.0,0.0,9.0,맥스로텍,디오,https://finance.naver.com/sise/sise_group_deta...,"[맥스로텍 *, 디오 *, 모아텍 *, 한국테크놀로지 *, 신도리코, 로보스타 *,..."
8,교육/온라인 교육,+0.21%,-0.78%,7.0,1.0,15.0,메가엠디,아이스크림..,https://finance.naver.com/sise/sise_group_deta...,"[메가엠디 *, 아이스크림에듀 *, 이퓨쳐 *, NE능률 *, 메가스터디교육 *, ..."
9,2020 상반기 신규상장,+0.13%,0.00%,1.0,1.0,0.0,하나금융1..,케이비제2..,https://finance.naver.com/sise/sise_group_deta...,"[하나금융15호스팩 *, 케이비제20호스팩 *]"
