In [2]:
from bs4 import BeautifulSoup
import requests
import time

## Part 1: Scraping and Saving HTML Content

In [113]:
# 1. Identify the target
url = 'https://sfbay.craigslist.org/search/zip?sort=date#search=1~gallery~0~0'

2. Interact with the Page-Sorting
 - When changing the sorting from newest to oldest, a segment of "?sort=dateoldest" showed up in the URL.
 - Sorting change can also be triggered by changing the segment to "?sort=date" directly in the URL.
 - A GET request is made when changing the sort order, as we are requesting data from the web server.
 - In this case, the variable in the URL associated with sorting is "sort".

3. Interact with the Page-Pagination
 - The way to move betweeen pages from the URL is to change the number at this segment: "gallery~0".
 - In this case, the variable in the URL associated with page changes is "gallery". Changing the variable value can change the page and position of the page you see.

In [114]:
# 4. Fetch Listing URLs
headers = {'User-agent': 'Mozilla/5.0'}
page = requests.get(url, headers = headers)
soup = BeautifulSoup(page.content)
print(soup.prettify())

<!DOCTYPE html>
<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="IE=Edge" http-equiv="X-UA-Compatible"/>
  <meta content="width=device-width,initial-scale=1" name="viewport"/>
  <meta content="craigslist" property="og:site_name"/>
  <meta content="preview" name="twitter:card"/>
  <meta content="SF bay area free stuff - craigslist" property="og:title"/>
  <meta content="SF bay area free stuff - craigslist" name="description"/>
  <meta content="SF bay area free stuff - craigslist" property="og:description"/>
  <meta content="https://sfbay.craigslist.org/search/zip" property="og:url"/>
  <title>
   SF bay area free stuff - craigslist
  </title>
  <link href="https://sfbay.craigslist.org/search/zip" rel="canonical"/>
  <link href="https://sfbay.craigslist.org/search/zip" hreflang="x-default" rel="alternate"/>
  <link href="/favicon.ico" id="favicon" rel="icon"/>
  <script id="ld_searchpage_data" type="application/ld+json">
   {"description":"Free Stuff in SF Bay Area","@context":"

What selector to grab the links:
1. Looking at the results above, I would be selecting the list ('li') element with the class of 'cl-static-search-result', and then select the anchor ('a') element and get 'href'.
2. Another way is to select the 'div' element with the class 'title', and select their parents(which are the anchors), then finally get 'href'.

In [115]:
# find the listing url
listings = soup.select('li.cl-static-search-result > a')

# create an empty list
listing_url = []

# write a loop to extract 250 listings and save them to a list
for i in range(250):
    listing_url.append(listings[i].get('href'))
    
listing_url

['https://sfbay.craigslist.org/eby/zip/d/el-cerrito-bolster-pillow/7714551361.html',
 'https://sfbay.craigslist.org/eby/zip/d/ankle-foot-medical-cam-boots-unisex/7714551359.html',
 'https://sfbay.craigslist.org/nby/zip/d/santa-rosa-nice-sleeper-sofa/7710871185.html',
 'https://sfbay.craigslist.org/eby/zip/d/berkeley-unopened-baby-diaper-ointment/7713234275.html',
 'https://sfbay.craigslist.org/eby/zip/d/berkeley-clearlax-miralax-generic/7713208049.html',
 'https://sfbay.craigslist.org/sby/zip/d/sunnyvale-two-drawer-file-cabinet/7714549739.html',
 'https://sfbay.craigslist.org/sby/zip/d/san-jose-free-outdoor-tiles/7714549146.html',
 'https://sfbay.craigslist.org/sby/zip/d/sunnyvale-nightstand/7714549199.html',
 'https://sfbay.craigslist.org/eby/zip/d/san-lorenzo-free-futon-mattress-72x52/7706646621.html',
 'https://sfbay.craigslist.org/nby/zip/d/novato-four-kitchen-chairs/7713913055.html',
 'https://sfbay.craigslist.org/eby/zip/d/fremont-free-and-great-condition-cal/7714546351.html',
 '

Explanation: The URL automatically returns 360 listings, so I simply write a for loop to extract the first 250 links

In [116]:
# 5. Save HTML Pages
import re

time.sleep(5) # add delay of 5 seconds

for x in range(250):
    # get the new url
    url_new = listing_url[x]
    page_new = requests.get(url_new, headers = headers)
    soup_new = BeautifulSoup(page_new.content)
    
    # find the listing id
    temp = str(soup_new.find('div',class_='postinginfos'))
    pattern = r'\d{9,11}' # find the continuous digits within the returned piece of html
    listing_id = re.findall(pattern, temp)[0]
    
    # Name the file with listing id
    file_name = f"{listing_id}.html"
    
    # write in the html file
    f = open(file_name, "w", encoding='utf-8')
    f.write(str(soup_new))
    f.close()
    
    time.sleep(5) # add delay of 5 seconds


## Part 2: Parsing and Displaying Information from Saved HTML

In [375]:
# 1. Read Saved HTML Files
import os,sys

# Loop through each file in the directory

for filename in os.listdir('HTML Files'):

    # Check if the file ends with .html
    if filename.endswith(".html"):

        # Construct the full file path
        filepath = os.path.join('HTML Files', filename)

        # Read file to string
        file = open(filepath, 'r', encoding='utf-8')
        html = file.read()
        soup2 = BeautifulSoup(html, 'lxml')
        
        # Find title
        title = soup2.select('title')
        title_print = title[0].text[:-25]
        print('\033[1mTitle:\033[0m', title_print)
        
        # Find image URL
        img_url = str(soup2.select_one('img[src]'))
        #pattern_2 = r'[/:._a-zA-Z0-9]*.jpg'
        #img_url_print = re.findall(pattern_2, img_url)[0]
        
        if img_url != 'None':
            pattern_2 = r'[/:._a-zA-Z0-9]*.jpg'
            img_url_print = re.findall(pattern_2, img_url)[0]
            print(img_url_print)
            
        else:
            print("Image URL not found")
        
        # Find description
        description = soup2.select('section #postingbody')
        description_print = description[0].text[30:]
        print(description_print)
        
        # Find Post ID (similar to the listing id from above)
        post_id = str(soup2.select_one('div.postinginfos > p'))
        pattern = r'\d{9,11}' # find the continuous digits
        post_id_print = re.findall(pattern, post_id)[0]
        print('Post ID: ',post_id_print)
        
        # Find Posted Date
        post_date = soup2.find_all('p',class_ = 'postinginfo reveal')
        print(post_date[1].text)
        
        # Find Updated Date, if no update yet, return no update yet
        updated_date = soup2.find_all('p',class_ = 'postinginfo reveal')
        if len(updated_date) >= 3:
            print(updated_date[2].text)
        else:
            print("No update yet")
                
        print() #add a space between each listing for better readability
            
        

[1mTitle:[0m Rawlings baseball helmet 6 1/4-6 7/8 
https://images.craigslist.org/00p0p_hZSMbBjPoJT_0tE0CI_600x450.jpg
Used two seasons in little league

Rawlings baseball helmet 6 1/4-6 7/8

Post ID:  7704690348
posted: 2024-01-05 19:19
updated: 2024-02-04 17:35

[1mTitle:[0m Oak chair 
https://images.craigslist.org/00n0n_baDAS4segUc_0t20CI_600x450.jpg
Swivels, height adjusts, great condition

Post ID:  7705079606
posted: 2024-01-07 06:58
updated: 2024-02-04 17:51

[1mTitle:[0m Micro USB Car Charger 
https://images.craigslist.org/00v0v_coblI9aG3YU_0t20CI_600x450.jpg
Don't need this

Post ID:  7705881682
posted: 2024-01-09 12:45
updated: 2024-02-04 18:09

[1mTitle:[0m FREE Istanbul Metro Card 
https://images.craigslist.org/00h0h_lM7qn9s6Spm_0t20CI_600x450.jpg
Might be some money left on it, not sure.

Post ID:  7706359982
posted: 2024-01-10 22:00
updated: 2024-02-04 19:20

[1mTitle:[0m Free beautiful antique twin bed frame 
https://images.craigslist.org/00J0J_anTNI7LgRnq_0CI0t

    
Post ID:  7711920009
posted: 2024-01-27 16:12
updated: 2024-02-04 18:31

[1mTitle:[0m Desk hutch 
https://images.craigslist.org/01212_1saoQwBgc9e_0t20CI_600x450.jpg
Desk hutch
70x42x13
    
Post ID:  7712015602
posted: 2024-01-28 06:36
updated: 2024-02-04 14:26

[1mTitle:[0m Non working chest freezer 
https://images.craigslist.org/00D0D_bb0FOETletO_0CI0t2_600x450.jpg
Hisense 7.0 cu ft. It's about 5 years old. Just stopped. Yours for free, if you're handy, it's probably the relay or the thermostat that conked out. I don't have the time to troubleshoot.

Free pickup only. I can help you load. Will fit in minivan easily. Pickup truck obviously 

https://youtu.be/zEsj1y2Q_8o?si=7HKrmjjH3VQC8xXW
    
Post ID:  7712074474
posted: 2024-01-28 09:51
updated: 2024-02-04 19:45

[1mTitle:[0m Swap cooler / Evaportavie Cooler Pads - Free 
https://images.craigslist.org/00a0a_6NtCiN9OSIo_0cj0q0_600x450.jpg
Swap cooler / Evaportavie Cooler Pads Replacement - Free. Description on pic.

Post I

[1mTitle:[0m Free Dynex 40 Inch 1080p Full HD LCD TV With Remote 
https://images.craigslist.org/00101_TzNYidMlBt_0CI0t2_600x450.jpg
I am giving away the Free Dynex 40 Inch 1080p Full HD LCD TV With Remote in the images shown. The TV works and is functional. However, it does have a line of dead pixels down the right side as you can see. This does not affect usability but it is obviously noticeable.

I am located in Antioch.
    
Post ID:  7713100999
posted: 2024-01-31 11:31
updated: 2024-02-04 15:48

[1mTitle:[0m Costume Paint and Fake Blood 
https://images.craigslist.org/00p0p_9BTnFtuoqYU_0CI0t2_600x450.jpg
Free costume paint and fake blood including a tub of vaseline to help with the latex paint.

Checkout the other stuff I'm giving away for free here: https://sfbay.craigslist.org/search/sss?query=isaacwfreestuffsf

No holding unless I talked with you and you are on your way within the hour.

Text, call, or email me.

isaacwfreestuffsf
    
Post ID:  7713154629
posted: 2024-01-31 

[1mTitle:[0m Sonoma County vintage visitors' brochure 
https://images.craigslist.org/00b0b_1Q3BfXj7jPv_0mL0t2_600x450.jpg
Illustrated brochure for visitors to Sonoma County, published in 1972 by the Santa Rosa Chamber of Commerce. Sixteen pages of text and photos. Map and listing of the county's 28 wineries (425 now!).

Post ID:  7714428428
posted: 2024-02-04 12:48
No update yet

[1mTitle:[0m Free Heater Filters 
https://images.craigslist.org/00v0v_ftVSqY3e9GY_0t20CI_600x450.jpg
We replaced our heater and no longer need these. Please let me know when you can come by, and that you are a human, and I’ll leave them on my porch for you.

Post ID:  7714429176
posted: 2024-02-04 12:51
updated: 2024-02-04 12:51

[1mTitle:[0m FREE Mahjong / Poker / Chinese Chess / Game Table in Good Condition 
https://images.craigslist.org/00u0u_4y7RbGyXhK7_0CI0t2_600x450.jpg
FREE Mahjong / Poker / Chinese Chess / Game Table

Good Condition
36.25” x 36.25” x 29.5” high
Can be disassembled and assembled e

Post ID:  7714443784
posted: 2024-02-04 13:32
No update yet

[1mTitle:[0m KEEP YOUR STUFF DRY: plastic bags. Lg. sheets of plastic, ect. 
Image URL not found
come see, and pick up at my place on Petaluma Blvd, south 
across from warehouses just past stop light at I st.
show contact info
 Land Line 
NO e-mail NO Morning calls, even just to leave msg. 
But OK to call evenings til 11:00
    
Post ID:  7714444144
posted: 2024-02-04 13:33
No update yet

[1mTitle:[0m Free Drop-in Bathtub Available 
https://images.craigslist.org/00M0M_3QnUKvJ76O1_0oR0CI_600x450.jpg
Are you in the process of remodeling your bathroom, searching for that perfect centerpiece for your home spa, or looking for a project that inspires your creativity? We have an exciting offer for you: a free drop-in bathtub waiting to be the highlight of your next home improvement project.

What’s Available?
One Drop-in Bathtub: This bathtub is a generous size, perfect for soaking and relaxation, and ready to be integrated into

[1mTitle:[0m Desk Hutch or headboard 
https://images.craigslist.org/00D0D_gQaUdQ9iSlJ_0t20CI_600x450.jpg
Desk hutch or headboard

Post ID:  7714461656
posted: 2024-02-04 14:28
No update yet

[1mTitle:[0m bathroom cabinet 
https://images.craigslist.org/01313_bSUuNXFv5cs_0lM0t2_600x450.jpg
ikea 7 ' tall 1 'wide 1'deep cabinet from ikea it is on the sidewalk covers up from the rain come and get it

Post ID:  7714462115
posted: 2024-02-04 14:29
No update yet

[1mTitle:[0m Little Tykes Turtle Sandbox 
https://images.craigslist.org/00m0m_cNJX3mnJKmL_0CI0t2_600x450.jpg
Heavy plastic. Used but good condition.  Does not come with sand.
Center shows where old duct tape was used to cover drainage hole in bottom.
    
Post ID:  7714462153
posted: 2024-02-04 14:29
No update yet

[1mTitle:[0m Oversized books - history, geography 
https://images.craigslist.org/00t0t_kgxk6miu8MJ_0ak07K_600x450.jpg
Oversized books fun to peruse, but a "little" chewed on by our dog. 🙄

California by Dewitt Jones

[1mTitle:[0m Mirror 
https://images.craigslist.org/00w0w_grnQ1MU411G_0t20CI_600x450.jpg
This wood-framed mirror is from a chest of drawers or vanity. It’s already been painted brown, you can easily repaint it another color. 

Glasses case for scale.
    
Post ID:  7714479279
posted: 2024-02-04 15:26
No update yet

[1mTitle:[0m Set of Arabic books 
https://images.craigslist.org/00Y0Y_dJ6ixL7cz3W_0t20CI_600x450.jpg
Selection of books for Arabic learners - includes instructional books plus regular books in Arabic, a few CDs with accompanying audio lessons, a few maps of the Middle East, and even a Harry Potter book 1 and Little Prince in Arabic.

Post ID:  7714480230
posted: 2024-02-04 15:30
No update yet

[1mTitle:[0m 2 Garden Pots - Drainage holes both 
https://images.craigslist.org/00606_durA93luosb_0CI0t2_600x450.jpg
Polymer pots - lightweight- One is 20" tall the other 14" tall
Must take both with the dirt in the one.
Please send your phone # if interested.  Thanks.

Post ID:  

posted: 2024-02-04 16:34
No update yet

[1mTitle:[0m Navy Blue PBK Camp Bunk Bed RAILS and LADDER (No bed) 
Image URL not found
We have the PBK Camp Bunk Bed in Navy Blue. These are used to turn two of the Camp beds into one bunk bed.  

They are stored in our garage so I don't have photos right now but can pull them out if you are interested. 


Post ID:  7714498039
posted: 2024-02-04 16:35
updated: 2024-02-04 16:35

[1mTitle:[0m Free Olive Green Velvet Chair (PENDING) 
https://images.craigslist.org/00j0j_dSnbjeg4Ajv_07K0ak_600x450.jpg
Overall in good condition - one small discoloration on seat and a small rip in the front - but if you're OK with that, you're welcome to have it! We do have a dog in the house, in case of allergies.

Please let me know what day/time you could pickup and we can leave it out on the porch. Will confirm with our address. Thanks!

Post ID:  7714499488
posted: 2024-02-04 16:41
updated: 2024-02-04 18:32

[1mTitle:[0m Free O'Keefe & Merritt Stove 
https:/

https://images.craigslist.org/00z0z_7uSHxEj0mty_0gW0mO_600x450.jpg
Chicco Keyfit car seat + base are both expired

Can use them for Target's trade-in event to get a coupon off for new car seat
    
Post ID:  7714506798
posted: 2024-02-04 17:10
No update yet

[1mTitle:[0m FREE BLENDER Waring Pro 
https://images.craigslist.org/00R0R_ho6HBAbXhY0_0t20CI_600x450.jpg
Great “pro” blender just needs clear new plastic plug on top .
I traded up to A Vitamix and am getting rid of stuff I don’t use

Post ID:  7714506823
posted: 2024-02-04 17:10
updated: 2024-02-04 17:11

[1mTitle:[0m Little Bench (PENDING) 
https://images.craigslist.org/00t0t_hFdjSxRSF3a_0ww0oo_600x450.jpg
Little metal bench - we tried to reupholster and cut the faux leather fabric incorrectly so it does need to be reupholstered still, but free if you'd like it. 

Please let me know what day/time you could pickup and we can leave it out on the porch. Will confirm with our address. Thanks!

Post ID:  7714506962
posted: 2024-02-

[1mTitle:[0m 3 sinks * Free delivery Monday 2/5 10 am Berkeley, Oakland, Emeryville 
https://images.craigslist.org/00j0j_dU5CDQvwNNH_0CI0m3_600x450.jpg
To your place or to the dump. 

Delivery only Monday morning ~10 am February 5.

Take One * Take All! 

Are you in the midst of renovating your home, starting an upcycling project, or simply in need of a high-quality sink upgrade? We are offering three free bathroom sinks to a good home in the Bay Area - an opportunity not to be missed by enthusiasts who appreciate value and sustainability.

What’s Available?
* Two Single Sinks (Approximately 35" and 46" in length)
* One Double Sink (Approximately 66" in length)

These sinks are perfect for homeowners looking to enhance their bathroom aesthetics or for creative individuals interested in repurposing or upcycling. Each piece is a testament to quality and design, ready to complement your space or project.

Requirements:

Bring a Buddy: Due to their size and to ensure safe handling, we re

Post ID:  7714546351
posted: 2024-02-04 21:22
updated: 2024-02-04 21:24

[1mTitle:[0m Garage cabinets (2) 
https://images.craigslist.org/00m0m_10vdYMrpdQc_0CI0t2_600x450.jpg
Giving away two sturdy 4'x4'x1' (exterior dimensions) cabinets that came with the house.

Can deliver (also free) most anywhere on the Peninsula.
    
Post ID:  7714546515
posted: 2024-02-04 21:23
No update yet

[1mTitle:[0m FREE Outdoor Tiles 
https://images.craigslist.org/00n0n_eSRel07O8f9_0mr0eI_600x450.jpg
I have about 10 pieces of 16x16 outdoor tiles unused from a previous project and I hate to dump them in the trash. The tile is thick. If you can repurpose it, send me a message.

Pick up is in S. San Jose (Santa Teresa & Snell Ave)

Post ID:  7714549146
posted: 2024-02-04 21:51
updated: 2024-02-04 22:07

[1mTitle:[0m Nightstand 
https://images.craigslist.org/00v0v_9uIeXfAmKOH_0lM0t2_600x450.jpg
White, one drawer nightstand
Height: 24"
Depth (front to back):15-3/4"
Width: 17-3/4"

Post ID:  7714549199
po

## Part 3: Automating Login on The Old Reader

In [3]:
#2. Exploring the Login Mechanism
headers = {'User-agent': 'Mozilla/5.0'}
url_part3 = 'https://theoldreader.com/users/sign_in'
page_3 = requests.get(url_part3, headers = headers)
soup3 = BeautifulSoup(page_3.content, "html.parser")

print(soup3.prettify())

<!DOCTYPE html>
<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
  <link href="https://fonts.googleapis.com/css?family=Montserrat:400,600" rel="stylesheet" type="text/css"/>
  <!-- Latest compiled and minified JavaScript -->
  <script src="https://code.jquery.com/jquery.js">
  </script>
  <script src="//netdna.bootstrapcdn.com/bootstrap/3.0.1/js/bootstrap.min.js">
  </script>
  <link href="https://fonts.googleapis.com/css?family=Source+Code+Pro" rel="stylesheet" type="text/css"/>
  <link href="https://fonts.googleapis.com/css?family=Open+Sans:400,800" rel="stylesheet"/>
  <link href="//s.theoldreader.com/assets/reader/public-c7869a909c7b119a27fb646003828344.css" media="screen" rel="stylesheet" type="text/css">
   <link href="//s.theoldreader.com/assets/

In [4]:
# Document all `<input>` fields within the login form, paying special attention to their name attributes.
# These fields are crucial for submitting the login request programmatically.

inputs = soup3.select('input')

for i in range(len(inputs)):
    print(i+1,inputs[i])

# We can see the input fields for utf8, authenticity_token, user[login], user[password], and commit

1 <input name="utf8" type="hidden" value="✓"/>
2 <input name="authenticity_token" type="hidden" value="owGBoofmwMxiSQc/4isCRN3mIalZjeX8r8eVpRXjDMo="/>
3 <input autocapitalize="off" autocorrect="off" autofocus="autofocus" class="form-control" id="user_login" name="user[login]" placeholder="Username/Email" size="30" spellcheck="false" type="text"/>
4 <input class="form-control" id="user_password" name="user[password]" placeholder="Password" size="30" type="password"/>
5 <input class="btn btn-primary btn-block" name="commit" type="submit" value="Sign In"/>


3. Analyzing Network Traffic for Login Request
 - The network request I made during login is a POST request. This is because that I am sending my login information such as username and password to the web server. 

 - After manually logging in the website, I noticed that there are 5 variables passed, which are:
     - utf8
     - authenticity_token
     - user[login]
     - user[password]
     - commit
    
 - These variables are also the same as the names of the input elements under form. Therefore, when I intend to make the POST request here, I also need to submit all these variables with values.

In [11]:
# find the inputs beside username and password
input1 = soup3.select_one('input[name=utf8]')
utf8 = input1.get('value')

input2 = soup3.select_one('input[name=authenticity_token]')
auth_token = input2.get('value')

input3 = soup3.select_one('input[name=commit]')
commit = input3.get('value')

print(utf8)
print(auth_token)
print(commit)

✓
owGBoofmwMxiSQc/4isCRN3mIalZjeX8r8eVpRXjDMo=
Sign In


In [7]:
# 4. Automating the Login Process
time.sleep(5) # add delay of 5 seconds

# An open session carries the cookies and allows you to make post requests
session = requests.session()

# Send POST request with all 5 variables to the login form’s action URL to log in
res = session.post(url_part3, 
                        data = {'utf8': utf8,
                                'authenticity_token': auth_token,
                                'user[login]': 'leoliao@ucdavis.edu',
                                'user[password]': 'Password123',
                                'commit': commit
                               },
                        timeout = 20)

# get cookies and inspect
cookies = session.cookies.get_dict()
print(cookies)

{'_new_reader_session': 'BAh7CkkiD3Nlc3Npb25faWQGOgZFVEkiJWM3ZjI3MjVhYjdiZmYzNjMzODQyMjU4N2VhYzRhOTY5BjsAVEkiGXdhcmRlbi51c2VyLnVzZXIua2V5BjsAVFsHWwZVOhpNb3BlZDo6QlNPTjo6T2JqZWN0SWQiES64DJ%2B4Po9VrZBChkkiIiQyYSQwNSRyL3FZWTVhbi9SSmNpTnhzSWJoM2hPBjsAVEkiDWxhbmd1YWdlBjsARjoHZW5JIhByZWRpcmVjdF90bwY7AEZJIgYvBjsARkkiEF9jc3JmX3Rva2VuBjsARkkiMVl0dHR6UGpPZG5rbDROQldKZHJaV1RCbm9CRDh2UzFBU1pQLzVYV3M3YVk9BjsARg%3D%3D--bd91cecded92607844340bd32d79fa93e6507636', 'i_know_you': 'Leo', 'remember_user_token': 'BAhbB1sGVToaTW9wZWQ6OkJTT046Ok9iamVjdElkIhEuuAyfuD6PVa2QQoZJIiIkMmEkMDUkci9xWVk1YW4vUkpjaU54c0liaDNoTwY6BkVU--45e49958d371e56b1da04c43e04a0f49f4c7c680', 'signed_at': '1707346427'}


In [20]:
# 5. Verifying Successful Login
time.sleep(5) # add delay of 5 seconds

# Use cookies to log into the user edit page, which is only available when logged in.
page4 = requests.get('https://theoldreader.com/users/edit', cookies=cookies)
soup4 = BeautifulSoup(page4.content, "html.parser")

print(soup4.prettify())

<!DOCTYPE html>
<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
  <link href="//s.theoldreader.com/assets/application-befb06d5a14978388154b51422cef437.css" media="all" rel="stylesheet" type="text/css"/>
  <link href="//s.theoldreader.com/assets/apple-touch-icon-57x57-86fe1176e14af4907a6fecfe5ca7e3f1.png" rel="apple-touch-icon-precomposed" sizes="57x57"/>
  <link href="//s.theoldreader.com/assets/apple-touch-icon-114x114-bae89acc41c93261dd962ea6ade08d22.png" rel="apple-touch-icon-precomposed" sizes="114x114"/>
  <link href="//s.theoldreader.com/assets/apple-touch-icon-72x72-f248503edfa3676f8d58af531aff7e88.png" rel="apple-touch-icon-precomposed" sizes="72x72"/>
  <link href="//s.theoldreader.com/assets/apple-touch-icon-144x144-510415291cae9b46a9ca4ac398

In [41]:
# verify successful login: find email and print
import re

email = str(soup4.select('div.col-md-8 > #user_email'))
pattern4 = r'\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,6}' # find email
email_print = re.findall(pattern4, email)[0]

print(email_print)

leoliao@ucdavis.edu
