# Web Scraping People

## Imports

In [1]:
import requests
import bs4

## Mini Exercise:  



#### 1. Inspect the page at https://web-scraping-demo.zgulde.net/people.  What classes and html structure can you use to extract the information you want?

- people: <div id="people" class="grid grid-cols-2 gap-x-12 gap-y-16">
- name: <h2 class="text-2xl text-purple-800 name col-span-full border-b">Laura Galvan</h2>
- quote: <p class="quote col-span-full px-5 py-5 text-center text-gray-500">
            "Optional client-driven artificial intelligence"
        </p>
- email:  <p class="email col-span-8">cgallegos@gmail.com</p>
- phone: <p class="phone col-span-8">+1-616-087-3568x389</p>
- address:             <p class="col-span-8">
                643 Troy Streets <br />
                North Brianstad, WY 35542
            </p>

#### 2. Write python code to turn the page contents into a soup object.

In [3]:
# Make the http request and turn the response into a beautiful soup object
response = requests.get('https://web-scraping-demo.zgulde.net/people')

In [4]:
response

<Response [200]>

In [5]:
html = response.text
html

'<!DOCTYPE html>\n<html lang="en">\n<head>\n    <meta charset="UTF-8">\n    <meta http-equiv="X-UA-Compatible" content="IE=edge">\n    <meta name="viewport" content="width=device-width, initial-scale=1.0">\n    <title>Example People Page</title>\n    <link href="https://unpkg.com/tailwindcss@^2/dist/tailwind.min.css" rel="stylesheet" />\n    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.4.1/font/bootstrap-icons.css" />\n</head>\n<body class="mx-auto max-w-screen-lg pb-32">\n    \n<h1 class="my-5 text-4xl text-center">People</h1>\n\n<div class="my-5 text-red-800 px-5 py-3 bg-red-100 font-bold">\n    <p>\n        <i class="bi bi-exclamation-circle text-xl"></i>\n        All data on this page is strictly for demonstration purposes and fake.\n    </p>\n</div>\n\n<div id="people" class="grid grid-cols-2 gap-x-12 gap-y-16">\n    \n    <div class="person border rounded px-3 py-5 grid grid-cols-2 gap-x-3 bg-purple-50 hover:shadow-lg transition duration-500">\n    

In [6]:
soup = bs4.BeautifulSoup(html)

In [7]:
soup

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<title>Example People Page</title>
<link href="https://unpkg.com/tailwindcss@^2/dist/tailwind.min.css" rel="stylesheet"/>
<link href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.4.1/font/bootstrap-icons.css" rel="stylesheet"/>
</head>
<body class="mx-auto max-w-screen-lg pb-32">
<h1 class="my-5 text-4xl text-center">People</h1>
<div class="my-5 text-red-800 px-5 py-3 bg-red-100 font-bold">
<p>
<i class="bi bi-exclamation-circle text-xl"></i>
        All data on this page is strictly for demonstration purposes and fake.
    </p>
</div>
<div class="grid grid-cols-2 gap-x-12 gap-y-16" id="people">
<div class="person border rounded px-3 py-5 grid grid-cols-2 gap-x-3 bg-purple-50 hover:shadow-lg transition duration-500">
<h2 class="text-2xl text-purple-800 name col-span-full border-b">Kathy Martinez<

#### 3. Write the code necessary to loop through all of the poeple and extract their information.



In [20]:
#names of people
soup.select('h2')

[<h2 class="text-2xl text-purple-800 name col-span-full border-b">Kathy Martinez</h2>,
 <h2 class="text-2xl text-purple-800 name col-span-full border-b">Ralph Owen</h2>,
 <h2 class="text-2xl text-purple-800 name col-span-full border-b">Julie Smith</h2>,
 <h2 class="text-2xl text-purple-800 name col-span-full border-b">John Jordan</h2>,
 <h2 class="text-2xl text-purple-800 name col-span-full border-b">Joshua Powell</h2>,
 <h2 class="text-2xl text-purple-800 name col-span-full border-b">Ashley Obrien</h2>,
 <h2 class="text-2xl text-purple-800 name col-span-full border-b">Robert Hughes</h2>,
 <h2 class="text-2xl text-purple-800 name col-span-full border-b">Gregory Holt</h2>,
 <h2 class="text-2xl text-purple-800 name col-span-full border-b">Amy Maxwell</h2>,
 <h2 class="text-2xl text-purple-800 name col-span-full border-b">Brooke White</h2>]

In [26]:
soup.select('.grid.grid-cols-2.gap-x-12')[0]

<div class="grid grid-cols-2 gap-x-12 gap-y-16" id="people">
<div class="person border rounded px-3 py-5 grid grid-cols-2 gap-x-3 bg-purple-50 hover:shadow-lg transition duration-500">
<h2 class="text-2xl text-purple-800 name col-span-full border-b">Kathy Martinez</h2>
<p class="quote col-span-full px-5 py-5 text-center text-gray-500">
            "Programmable background workforce"
        </p>
<div class="grid grid-cols-9">
<i class="bi bi-envelope-fill text-purple-800"></i>
<p class="email col-span-8">hward@sanders.com</p>
<i class="bi bi-telephone-fill text-purple-800"></i>
<p class="phone col-span-8">562.869.8732x549</p>
</div>
<div class="address grid grid-cols-9">
<i class="bi bi-geo-fill text-purple-800"></i>
<p class="col-span-8">
                9790 Jeremiah Walks <br/>
                Mullinsside, MI 20894
            </p>
</div>
</div>
<div class="person border rounded px-3 py-5 grid grid-cols-2 gap-x-3 bg-purple-50 hover:shadow-lg transition duration-500">
<h2 class="text

In [27]:
people_container = soup.select('.grid.grid-cols-2.gap-x-12')[0]

In [25]:
soup.select('.grid.grid-cols-2.gap-x-3')[0]

<div class="person border rounded px-3 py-5 grid grid-cols-2 gap-x-3 bg-purple-50 hover:shadow-lg transition duration-500">
<h2 class="text-2xl text-purple-800 name col-span-full border-b">Kathy Martinez</h2>
<p class="quote col-span-full px-5 py-5 text-center text-gray-500">
            "Programmable background workforce"
        </p>
<div class="grid grid-cols-9">
<i class="bi bi-envelope-fill text-purple-800"></i>
<p class="email col-span-8">hward@sanders.com</p>
<i class="bi bi-telephone-fill text-purple-800"></i>
<p class="phone col-span-8">562.869.8732x549</p>
</div>
<div class="address grid grid-cols-9">
<i class="bi bi-geo-fill text-purple-800"></i>
<p class="col-span-8">
                9790 Jeremiah Walks <br/>
                Mullinsside, MI 20894
            </p>
</div>
</div>

In [28]:
peoples = soup.select('.grid.grid-cols-2.gap-x-3')

In [31]:
person = peoples[0]
print(person.prettify())

<div class="person border rounded px-3 py-5 grid grid-cols-2 gap-x-3 bg-purple-50 hover:shadow-lg transition duration-500">
 <h2 class="text-2xl text-purple-800 name col-span-full border-b">
  Kathy Martinez
 </h2>
 <p class="quote col-span-full px-5 py-5 text-center text-gray-500">
  "Programmable background workforce"
 </p>
 <div class="grid grid-cols-9">
  <i class="bi bi-envelope-fill text-purple-800">
  </i>
  <p class="email col-span-8">
   hward@sanders.com
  </p>
  <i class="bi bi-telephone-fill text-purple-800">
  </i>
  <p class="phone col-span-8">
   562.869.8732x549
  </p>
 </div>
 <div class="address grid grid-cols-9">
  <i class="bi bi-geo-fill text-purple-800">
  </i>
  <p class="col-span-8">
   9790 Jeremiah Walks
   <br/>
   Mullinsside, MI 20894
  </p>
 </div>
</div>



In [43]:
def process_people(person):
    name = person.find('h2').text
    quote = person.find(class_='quote').text.strip()
    email = person.find(class_='email').text
    phone_number = person.find(class_='phone').text
    address = person.find(class_='address').text.strip()
    
    return {
        "name": name,
        "quote": quote,
        "email": email,
        "phone_number": phone_number,
        "address": address
    }

#### 4. The result should be a pandas DataFrame.

In [44]:
import pandas as pd
pd.DataFrame([process_people(person) for person in peoples])


Unnamed: 0,name,quote,email,phone_number,address
0,Kathy Martinez,"""Programmable background workforce""",hward@sanders.com,562.869.8732x549,9790 Jeremiah Walks \n Mullinss...
1,Ralph Owen,"""Multi-lateral high-level policy""",hbrennan@thomas.com,001-462-801-4632x12055,"096 Edward Gateway \n Danport, ..."
2,Julie Smith,"""Proactive client-driven conglomeration""",mporter@brandt.info,727.736.2101,"77944 James Way \n East David, ..."
3,John Jordan,"""Compatible actuating software""",bellrobert@espinoza.biz,001-872-372-4391x111,50628 Jason Mountain Apt. 397 \n ...
4,Joshua Powell,"""Cross-platform static monitoring""",novakjason@yahoo.com,207-089-5010,9368 Clayton Mill \n Marcboroug...
5,Ashley Obrien,"""Horizontal content-based complexity""",jasonsalinas@hotmail.com,6887183591,492 Evans Walk Apt. 163 \n Nort...
6,Robert Hughes,"""Persistent analyzing throughput""",charlesanderson@yahoo.com,749-845-1819,121 Long Islands Apt. 698 \n So...
7,Gregory Holt,"""Monitored eco-centric implementation""",jenniferavila@gmail.com,073-098-4181x44916,940 Jessica Crossroad \n Lake K...
8,Amy Maxwell,"""Distributed fresh-thinking hardware""",chavezleonard@gmail.com,(369)355-6245,51115 Pace Streets Apt. 626 \n ...
9,Brooke White,"""Profit-focused analyzing paradigm""",emilywalker@hubbard-miller.info,001-318-456-6607,02063 Ferguson Inlet Suite 356 \n ...
