# Drilling Down With Beautiful Soup
### Preliminaries

In [2]:
# Import required modules
import pandas as pd
import requests

from bs4 import BeautifulSoup

### Download the HTML and create a Beautiful Soup object

In [3]:
# Create a variable with the URL to this tutorial
url = 'http://en.wikipedia.org/wiki/List_of_A_Song_of_Ice_and_Fire_characters'

# Scrape the HTML at the url
r = requests.get(url)

# Turn the HTML into Beautiful Soup object
soup = BeautifulSoup(r.text, 'lxml')

### Settinp up where to put the results

In [9]:
# Create a variable to score the scraped data in
character_name = []

#### Drilling down with a forloop

In [13]:
# for each item in all the toclevel-2 li items
# (except the last three because they are not character names), 
for item in soup.find_all('li',{'class':'toclevel-2'})[:-3]: 
    # find each span with class=toctext,
    for post in item.find_all('span',{'class':'toctext'}): 
        # add the stripped string of each to character_name, one by one
        character_name.append(post.string.strip())

### Results

In [14]:
# View all character names
character_name

['Eddard Stark',
 'Catelyn Stark',
 'Robb Stark',
 'Sansa Stark',
 'Arya Stark',
 'Bran Stark',
 'Rickon Stark',
 'Jon Snow',
 'Benjen Stark',
 'Lyanna Stark',
 'Roose Bolton',
 'Ramsay Bolton',
 'Rickard Karstark',
 'Alys Karstark',
 'Wyman Manderly',
 'Hodor',
 'Osha',
 'Jeyne Poole',
 'Jojen and Meera Reed',
 'Jeyne Westerling',
 'Aegon V Targaryen',
 'Aerys II Targaryen',
 'Rhaegar Targaryen',
 'Viserys Targaryen',
 'Daenerys Targaryen',
 'Aegon VI Targaryen',
 'Jon Connington',
 'Jorah Mormont',
 'Brynden Rivers',
 'Missandei',
 'Daario Naharis',
 'Grey Worm',
 'Maekar I Targaryen',
 'House Blackfyre',
 'Tywin Lannister',
 'Cersei Lannister',
 'Jaime Lannister',
 'Tyrion Lannister',
 'Joffrey Baratheon',
 'Myrcella Baratheon',
 'Tommen Baratheon',
 'Kevan Lannister',
 'Lancel Lannister',
 'Bronn',
 'Gregor Clegane',
 'Sandor Clegane',
 'Podrick Payne',
 'Robert Baratheon',
 'Stannis Baratheon',
 'Selyse Florent',
 'Renly Baratheon',
 'Shireen Baratheon',
 'Melisandre',
 'Davos Sea

### Quick Analysis: Which house has the most main characters

In [15]:
# Create a list object where to store the for loop results
houses = []

In [16]:
# For each element in the character_name limit
for name in character_name:
    # split up the names by a blank space and select the last element
    # this works because it is the last name if they are a house, 
    # but the first name if they only have one name,
    # Then append each last name to the houses list
    houses.append(name.split(' ')[-1])

In [17]:
# Convert houses into a pandas series (so we can use value_counts())
houses = pd.Series(houses)

# Count the number of times each name/house name appears
houses.value_counts()

Stark         9
Targaryen     7
Baratheon     7
Lannister     6
Martell       6
Greyjoy       6
Tyrell        4
Arryn         3
Tully         3
Bolton        2
Mormont       2
Payne         2
Royce         2
Clegane       2
Karstark      2
Tarly         2
Manderly      1
Reed          1
Blackfyre     1
Tollett       1
Baelish       1
Naharis       1
Sand          1
Poole         1
Tarth         1
Varys         1
Snow          1
Gendry        1
Val           1
Mopatis       1
             ..
Pycelle       1
Slynt         1
Waynwood      1
Osha          1
Bronn         1
Thorne        1
Sparrow       1
Waif          1
Seaworth      1
Melisandre    1
Frey          1
Unella        1
Craster       1
Hotah         1
Loraq         1
Connington    1
Rayder        1
Forel         1
Drogo         1
Dondarrion    1
Storm         1
Worm          1
Nestoris      1
Gilly         1
Harlaw        1
Sixskins      1
Swann         1
Myr           1
Meribald      1
Trant         1
Length: 78, dtype: int64

Source: https://chrisalbon.com/python/beautiful_soup_drill_down.html