# Grabbing a title

In [3]:
import requests

In [5]:
result = requests.get("http://www.example.com")

In [6]:
type(result)

requests.models.Response

In [7]:
result.text

'<!doctype html>\n<html>\n<head>\n    <title>Example Domain</title>\n\n    <meta charset="utf-8" />\n    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />\n    <meta name="viewport" content="width=device-width, initial-scale=1" />\n    <style type="text/css">\n    body {\n        background-color: #f0f0f2;\n        margin: 0;\n        padding: 0;\n        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;\n        \n    }\n    div {\n        width: 600px;\n        margin: 5em auto;\n        padding: 2em;\n        background-color: #fdfdff;\n        border-radius: 0.5em;\n        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);\n    }\n    a:link, a:visited {\n        color: #38488f;\n        text-decoration: none;\n    }\n    @media (max-width: 700px) {\n        div {\n            margin: 0 auto;\n            width: auto;\n        }\n    }\n    </style>    \n</head>\n\n<body>\n<div>\n    <

In [4]:
import bs4

In [9]:
soup = bs4.BeautifulSoup(result.text,"lxml")

In [10]:
soup

<!DOCTYPE html>
<html>
<head>
<title>Example Domain</title>
<meta charset="utf-8"/>
<meta content="text/html; charset=utf-8" http-equiv="Content-type"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<style type="text/css">
    body {
        background-color: #f0f0f2;
        margin: 0;
        padding: 0;
        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
        
    }
    div {
        width: 600px;
        margin: 5em auto;
        padding: 2em;
        background-color: #fdfdff;
        border-radius: 0.5em;
        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
    }
    a:link, a:visited {
        color: #38488f;
        text-decoration: none;
    }
    @media (max-width: 700px) {
        div {
            margin: 0 auto;
            width: auto;
        }
    }
    </style>
</head>
<body>
<div>
<h1>Example Domain</h1>
<p>This domain is for use in illustrative examples

In [11]:
soup.select('title')

[<title>Example Domain</title>]

In [12]:
soup.select('p')

[<p>This domain is for use in illustrative examples in documents. You may use this
     domain in literature without prior coordination or asking for permission.</p>,
 <p><a href="https://www.iana.org/domains/example">More information...</a></p>]

In [13]:
soup.select('h1')

[<h1>Example Domain</h1>]

In [14]:
soup.select('title')[0].getText()

'Example Domain'

In [17]:
type(soup.select('title')[0])

bs4.element.Tag

# Grabbing a class

In [5]:
res = requests.get("https://en.wikipedia.org/wiki/Enzo_Ferrari")

In [6]:
soup = bs4.BeautifulSoup(res.text,'lxml')

In [7]:
# soup

In [8]:
soup.select('.toctext')

[<span class="toctext">Early life</span>,
 <span class="toctext">Racing career</span>,
 <span class="toctext">Building Ferrari</span>,
 <span class="toctext">The Great Walkout</span>,
 <span class="toctext">Merging with Fiat</span>,
 <span class="toctext">The Modena Autodrome</span>,
 <span class="toctext">Final years</span>,
 <span class="toctext">Racing and management controversies</span>,
 <span class="toctext">Personal life</span>,
 <span class="toctext">Death</span>,
 <span class="toctext">Racing record</span>,
 <span class="toctext">Grand Prix wins</span>,
 <span class="toctext">In popular culture</span>,
 <span class="toctext">See also</span>,
 <span class="toctext">Notes</span>,
 <span class="toctext">References</span>,
 <span class="toctext">External links</span>]

In [9]:
for item in soup.select('.toctext'):
    # Below 2 lines are the same (.getText() == .text)
    # print(item.getText())
    print(item.text)

Early life
Racing career
Building Ferrari
The Great Walkout
Merging with Fiat
The Modena Autodrome
Final years
Racing and management controversies
Personal life
Death
Racing record
Grand Prix wins
In popular culture
See also
Notes
References
External links


# Grabbing an image

In [25]:
ferrari_p = requests.get("https://en.wikipedia.org/wiki/Ferrari_P#330_P4")

In [27]:
ferrari_p_bs = bs4.BeautifulSoup(ferrari_p.text,'lxml')

In [29]:
# ferrari_p_bs

In [31]:
# ferrari_p_bs.select('img')
# This is not the best approach since it will return 
# all images, including icons, reference images, etc.

In [38]:
ferrari_p_bs.select('.thumbimage')[8]

<img alt="" class="thumbimage" data-file-height="460" data-file-width="990" decoding="async" height="102" src="//upload.wikimedia.org/wikipedia/commons/thumb/1/13/Ferrari_330_P4_1967.jpg/220px-Ferrari_330_P4_1967.jpg" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/1/13/Ferrari_330_P4_1967.jpg/330px-Ferrari_330_P4_1967.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/1/13/Ferrari_330_P4_1967.jpg/440px-Ferrari_330_P4_1967.jpg 2x" width="220"/>

In [39]:
ferrari_p4 = ferrari_p_bs.select('.thumbimage')[8]

In [40]:
ferrari_p4

<img alt="" class="thumbimage" data-file-height="460" data-file-width="990" decoding="async" height="102" src="//upload.wikimedia.org/wikipedia/commons/thumb/1/13/Ferrari_330_P4_1967.jpg/220px-Ferrari_330_P4_1967.jpg" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/1/13/Ferrari_330_P4_1967.jpg/330px-Ferrari_330_P4_1967.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/1/13/Ferrari_330_P4_1967.jpg/440px-Ferrari_330_P4_1967.jpg 2x" width="220"/>

In [42]:
type(ferrari_p4)

bs4.element.Tag

In [45]:
print(ferrari_p4['src'] == ferrari_p4.get('src'))
print(ferrari_p4['src'])

True
//upload.wikimedia.org/wikipedia/commons/thumb/1/13/Ferrari_330_P4_1967.jpg/220px-Ferrari_330_P4_1967.jpg


##### Images can be included using markdown cells (similar to html) - Example below

<img src="//upload.wikimedia.org/wikipedia/commons/thumb/1/13/Ferrari_330_P4_1967.jpg/220px-Ferrari_330_P4_1967.jpg">

In [46]:
ferrari_p4_image_link = requests.get('https:' + ferrari_p4['src'])

In [48]:
ferrari_p4_image_link.content

b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xfe\x00MFile source: http://commons.wikimedia.org/wiki/File:Ferrari_330_P4_1967.jpg\xff\xdb\x00C\x00\x06\x04\x05\x06\x05\x04\x06\x06\x05\x06\x07\x07\x06\x08\n\x10\n\n\t\t\n\x14\x0e\x0f\x0c\x10\x17\x14\x18\x18\x17\x14\x16\x16\x1a\x1d%\x1f\x1a\x1b#\x1c\x16\x16 , #&\')*)\x19\x1f-0-(0%()(\xff\xdb\x00C\x01\x07\x07\x07\n\x08\n\x13\n\n\x13(\x1a\x16\x1a((((((((((((((((((((((((((((((((((((((((((((((((((\xff\xc0\x00\x11\x08\x00f\x00\xdc\x03\x01"\x00\x02\x11\x01\x03\x11\x01\xff\xc4\x00\x1c\x00\x00\x01\x05\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x03\x04\x06\x07\x08\x02\x01\xff\xc4\x00G\x10\x00\x02\x01\x03\x03\x02\x02\x07\x05\x06\x03\x06\x03\t\x00\x00\x01\x02\x03\x04\x05\x11\x00\x12!\x061\x13A\x07\x14"Qaq\x812\x91\xa1\xb1\xd1\x15#3Bb\xc1Rr\x82\x16$%\x92\xa2\xe1\x17CDETcs\x83\x93\xb2\xd2\xe2\xff\xc4\x00\x1a\x01\x00\x02\x03\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x02\x00\x03\x04\x05\x06\xff\xc4\x0

In [51]:
# NOTE: the file format should be the same as the one within the original link
with open('ferrari_p4.jpg', 'wb') as f:
    f.write(ferrari_p4_image_link.content)