#### BeautifulSoup 사용법

##### Local HTML 파일 열기

In [1]:
from bs4 import BeautifulSoup
with open('00_Example.html') as fp:
    soup = BeautifulSoup(fp, 'html.parser')

In [2]:
soup

<!DOCTYPE html>

<html lang="en">
<head>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<title>Web Crawling Example</title>
</head>
<body>
<div>
<p>a</p><p>b</p><p>c</p>
</div>
<div class="ex_class sample">
<p>1</p><p>2</p><p>3</p>
</div>
<div id="ex_id">
<p>X</p><p>Y</p><p>Z</p>
</div>
<h1>This is a heading.</h1>
<p>This is a paragraph.</p>
<p>This is another paragraph.</p>
<a class="a sample" href="www.naver.com">Naver</a>
</body>
</html>

##### find - 한 개만 찾는 메서드

In [3]:
first_div = soup.find('div')      # find : 한 개만 찾는 메서드
first_div

<div>
<p>a</p><p>b</p><p>c</p>
</div>

##### find_all - 모두를 찾는 메서드

In [4]:
all_divs = soup.find_all('div')     # find_all : 모두를 찾는 메서드
all_divs

[<div>
 <p>a</p><p>b</p><p>c</p>
 </div>,
 <div class="ex_class sample">
 <p>1</p><p>2</p><p>3</p>
 </div>,
 <div id="ex_id">
 <p>X</p><p>Y</p><p>Z</p>
 </div>]

In [5]:
len(all_divs)

3

In [6]:
for div in all_divs:
    print(div)

<div>
<p>a</p><p>b</p><p>c</p>
</div>
<div class="ex_class sample">
<p>1</p><p>2</p><p>3</p>
</div>
<div id="ex_id">
<p>X</p><p>Y</p><p>Z</p>
</div>


In [8]:
all_ps = soup.find_all('p')
print(len(all_ps))
print(all_ps)

11
[<p>a</p>, <p>b</p>, <p>c</p>, <p>1</p>, <p>2</p>, <p>3</p>, <p>X</p>, <p>Y</p>, <p>Z</p>, <p>This is a paragraph.</p>, <p>This is another paragraph.</p>]


##### select_one : CSS Selector로 하나만 찾는 메서드

In [9]:
ex_id_div = soup.select_one('#ex_id')     # id
ex_id_div

<div id="ex_id">
<p>X</p><p>Y</p><p>Z</p>
</div>

In [14]:
ex_sample_div = soup.select_one('.ex_class.sample')   # class
ex_sample_div

<div class="ex_class sample">
<p>1</p><p>2</p><p>3</p>
</div>

##### select : CSS Selector로 모두를 찾는 메서드

In [15]:
ex_id_div = soup.select('#ex_id')         # 결과는 리스트로
ex_id_div

[<div id="ex_id">
 <p>X</p><p>Y</p><p>Z</p>
 </div>]

In [16]:
sample_divs = soup.select('.sample')
sample_divs

[<div class="ex_class sample">
 <p>1</p><p>2</p><p>3</p>
 </div>,
 <a class="a sample" href="www.naver.com">Naver</a>]

In [17]:
ex_class_divs = soup.select('.ex_class')
ex_class_divs

[<div class="ex_class sample">
 <p>1</p><p>2</p><p>3</p>
 </div>]

##### 결과 가져오기

In [18]:
# <a class="a sample" href="www.naver.com">Naver</a>
result = soup.select_one('.a.sample').get_text()
result

'Naver'

In [19]:
result = soup.select_one('.a.sample').string
result

'Naver'

In [20]:
# 속성 값
href = soup.select_one('.a.sample')['href']
href

'www.naver.com'

In [21]:
# id="ex_id"인 div에서 p내용물 가져오기
'''
 <div id="ex_id">
 <p>X</p><p>Y</p><p>Z</p>
 </div>
'''

'\n <div id="ex_id">\n <p>X</p><p>Y</p><p>Z</p>\n </div>\n'

In [22]:
ex_id_div = soup.select_one('#ex_id')
print(ex_id_div)

<div id="ex_id">
<p>X</p><p>Y</p><p>Z</p>
</div>


In [24]:
all_ps = ex_id_div.find_all('p')
all_ps

[<p>X</p>, <p>Y</p>, <p>Z</p>]

In [26]:
for p in all_ps:
    print(p.string)

X
Y
Z
