### Importing the packages

In [1]:
# Load the packages
import requests
from bs4 import BeautifulSoup

### Making a get request

In [2]:
# Defining the url of the site
base_site = "https://en.wikipedia.org/wiki/Music"

# Making a get request
response = requests.get(base_site)
response

<Response [200]>

In [4]:
# Extracting the HTML
html = response.content

### Making the soup

In [5]:
# Convert HTML to a BeautifulSoup object. This will allow us to parse out content from the HTML more easily.
# Using the default parser as it is included in Python
soup = BeautifulSoup(html, "html.parser")

### 1. Extract all existing titles of links

In [6]:
# Find all links on the page 
links = soup.find_all('a')
links

[<a id="top"></a>,
 <a href="/wiki/Wikipedia:Protection_policy#semi" title="This article is semi-protected."><img alt="Page semi-protected" data-file-height="512" data-file-width="512" decoding="async" height="20" src="//upload.wikimedia.org/wikipedia/en/thumb/1/1b/Semi-protection-shackle.svg/20px-Semi-protection-shackle.svg.png" srcset="//upload.wikimedia.org/wikipedia/en/thumb/1/1b/Semi-protection-shackle.svg/30px-Semi-protection-shackle.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/1/1b/Semi-protection-shackle.svg/40px-Semi-protection-shackle.svg.png 2x" width="20"/></a>,
 <a class="mw-jump-link" href="#mw-head">Jump to navigation</a>,
 <a class="mw-jump-link" href="#p-search">Jump to search</a>,
 <a class="mw-disambig" href="/wiki/Music_(disambiguation)" title="Music (disambiguation)">Music (disambiguation)</a>,
 <a class="image" href="/wiki/File:Music_lesson_Staatliche_Antikensammlungen_2421.jpg"><img alt="Music lesson Staatliche Antikensammlungen 2421.jpg" data-file-hei

In [7]:
# Dropping the links without 'href' attribute
links = [link for link in links if link.get('href') is not None]
links

[<a href="/wiki/Wikipedia:Protection_policy#semi" title="This article is semi-protected."><img alt="Page semi-protected" data-file-height="512" data-file-width="512" decoding="async" height="20" src="//upload.wikimedia.org/wikipedia/en/thumb/1/1b/Semi-protection-shackle.svg/20px-Semi-protection-shackle.svg.png" srcset="//upload.wikimedia.org/wikipedia/en/thumb/1/1b/Semi-protection-shackle.svg/30px-Semi-protection-shackle.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/1/1b/Semi-protection-shackle.svg/40px-Semi-protection-shackle.svg.png 2x" width="20"/></a>,
 <a class="mw-jump-link" href="#mw-head">Jump to navigation</a>,
 <a class="mw-jump-link" href="#p-search">Jump to search</a>,
 <a class="mw-disambig" href="/wiki/Music_(disambiguation)" title="Music (disambiguation)">Music (disambiguation)</a>,
 <a class="image" href="/wiki/File:Music_lesson_Staatliche_Antikensammlungen_2421.jpg"><img alt="Music lesson Staatliche Antikensammlungen 2421.jpg" data-file-height="1849" data-fil

In [8]:
# Getting all titles
all_titles = [link.string for link in links]
all_titles

[None,
 'Jump to navigation',
 'Jump to search',
 'Music (disambiguation)',
 None,
 'Paleolithic',
 'Performing arts',
 'Acrobatics',
 'Ballet',
 'Circus skills',
 'Clown',
 'Dance',
 'General Gymnastics',
 'Magic',
 'Mime',
 'Opera',
 'Professional wrestling',
 'Puppetry',
 'Speech',
 'Theatre',
 'Ventriloquism',
 'v',
 't',
 'e',
 'art form',
 'cultural',
 'medium',
 'definitions of music',
 'pitch',
 'melody',
 'harmony',
 'rhythm',
 'tempo',
 'meter',
 'articulation',
 'dynamics',
 'timbre',
 'texture',
 'styles or types',
 'instruments',
 'rapping',
 'instrumental pieces',
 'solely vocal pieces',
 'accompaniment',
 'Greek',
 'μουσική',
 'Muses',
 '[1]',
 'glossary of musical terminology',
 'cultural',
 'songs',
 'symphonies',
 'criticism of music',
 'study of the history of music',
 'aesthetic examination of music',
 'Ancient Greek',
 'Indian philosophers',
 'harmony of the spheres',
 'John Cage',
 'noise',
 '[2]',
 'Beethoven',
 'Grosse Fuge',
 'string quartet',
 '[3]',
 'jazz',


In [9]:
# Removing the 'None' titles
all_titles = [link.string for link in links if link.string is not None]
all_titles

['Jump to navigation',
 'Jump to search',
 'Music (disambiguation)',
 'Paleolithic',
 'Performing arts',
 'Acrobatics',
 'Ballet',
 'Circus skills',
 'Clown',
 'Dance',
 'General Gymnastics',
 'Magic',
 'Mime',
 'Opera',
 'Professional wrestling',
 'Puppetry',
 'Speech',
 'Theatre',
 'Ventriloquism',
 'v',
 't',
 'e',
 'art form',
 'cultural',
 'medium',
 'definitions of music',
 'pitch',
 'melody',
 'harmony',
 'rhythm',
 'tempo',
 'meter',
 'articulation',
 'dynamics',
 'timbre',
 'texture',
 'styles or types',
 'instruments',
 'rapping',
 'instrumental pieces',
 'solely vocal pieces',
 'accompaniment',
 'Greek',
 'μουσική',
 'Muses',
 '[1]',
 'glossary of musical terminology',
 'cultural',
 'songs',
 'symphonies',
 'criticism of music',
 'study of the history of music',
 'aesthetic examination of music',
 'Ancient Greek',
 'Indian philosophers',
 'harmony of the spheres',
 'John Cage',
 'noise',
 '[2]',
 'Beethoven',
 'Grosse Fuge',
 'string quartet',
 '[3]',
 'jazz',
 '[4]',
 'hard

### 2. Extract all heading 2 strings.

In [10]:
# Inspect all h2 tags
h2_list = soup.find_all('h2')
h2_list

[<h2 id="mw-toc-heading">Contents</h2>,
 <h2><span class="mw-headline" id="Etymology">Etymology</span></h2>,
 <h2><span class="mw-headline" id="As_a_form_of_art_or_entertainment">As a form of art or entertainment</span></h2>,
 <h2><span class="mw-headline" id="Elements">Elements</span></h2>,
 <h2><span class="mw-headline" id="History">History</span></h2>,
 <h2><span class="mw-headline" id="Performance">Performance</span></h2>,
 <h2><span class="mw-headline" id="Philosophy_and_aesthetics">Philosophy and aesthetics</span></h2>,
 <h2><span class="mw-headline" id="Psychology">Psychology</span></h2>,
 <h2><span class="mw-headline" id="Sociological_aspects">Sociological aspects</span></h2>,
 <h2><span class="mw-headline" id="Media_and_technology">Media and technology</span></h2>,
 <h2><span class="mw-headline" id="Business">Business</span></h2>,
 <h2><span class="mw-headline" id="Education">Education</span></h2>,
 <h2><span class="mw-headline" id="Music_therapy">Music therapy</span></h2>,
 <

In [11]:
# Get the text
h2_list_value = [item.string for item in h2_list]
h2_list_value

['Contents',
 'Etymology',
 'As a form of art or entertainment',
 'Elements',
 'History',
 'Performance',
 'Philosophy and aesthetics',
 'Psychology',
 'Sociological aspects',
 'Media and technology',
 'Business',
 'Education',
 'Music therapy',
 'See also',
 'References',
 'Further reading',
 'External links',
 'Navigation menu']

### 3. Print the whole footer text.

In [16]:
# By inspection: we see that the footer is contained inside a ...
footer_text = soup.find('div', id='footer' ).text
footer_text
print(footer_text)



 This page was last edited on 12 April 2020, at 14:11 (UTC).
Text is available under the Creative Commons Attribution-ShareAlike License;
additional terms may apply.  By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.


Privacy policy
About Wikipedia
Disclaimers
Contact Wikipedia
Developers
Statistics
Cookie statement
Mobile view







