#Web Scraping Recycling Guides

In [1]:
# Palm Desert (sparse): https://www.palmdesert.gov/our-city/departments/public-works/waste-management-and-recycling-services
# Cook County (descriptive): https://www.cookcountyil.gov/content/green-guide-library

In [2]:
%%capture
!apt-get update
!apt-get install -y tesseract-ocr

In [3]:
%%capture
!pip install pytesseract

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import csv
import os
import pytesseract
from io import BytesIO
import re
from PIL import Image

In [5]:
# double checking
print(pytesseract.get_tesseract_version())

4.1.1


## Palm Desert

In [None]:
# palm desert url
url = 'https://www.palmdesert.gov/our-city/departments/public-works/waste-management-and-recycling-services'

# device-specific headers
referer = 'https://www.google.com/'
accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'
accept_language = 'en-US,en;q=0.9'
cookie = 'nmstat=97064d92-b2c1-c7df-7042-e6e5108d3e47; _ga=GA1.1.1533015218.1727206955; ASP.NET_SessionId=xfih4yoegvstrscu1m5b5vgj; BIGipServer~AUTO-VISION~visionlive~www.palmdesert.gov_443=!DfseGiW3CTr0HSNedm1Xf9THDYxJhBz9ZWmCmQGMp0b7vS/AhhMgSedVI6MZJXRKxc1Hb9pPFDNwlZ4=; ShowTopTips=True; ShowTopTipsPublishDate=638570030605130000; __RequestVerificationToken=9EgpZ5ulsbYrjEQPCsbrx8H8YH_KlzUO9XOvtGEwllTX5WWPTXvVcBbNgWLph9eiBUBxJyioAiunHZ17YZz1WIpet6LpCbR4WT-day4ZqV81; _ga_G3HQ90G5HJ=GS1.1.1727457387.2.0.1727457387.0.0.0; TS01af151e=0106cf681b5a7e445fabcc5426e2418d53e2c95246798aa8d57ea8f7299d461ada66b55f0b5a81576c3b702950af224cfc8f85de6307f4f2b2340ede7b675c6d5cdf61357386a253481328ef6132d3b6bdfca715d96627c31bdaca21944103c6cf0c0727692d987e667ec1b0b59564eed671e71365960b516a85658bec0cf4d756a6040d3e; TS3b44c919027=08b9428c85ab20007d7d3b70e6b2ea37dbf198cb3d075c4ed383cdfa7978616103f4493b3162ac5f08bfd8f208113000b7de0132475b2f9a92b0ec50dbdbfb4900899b1c1f619c2ff34e4ffc842e1ceb168244300062f252e5ba704add2320d1; RT="z=1&dm=www.palmdesert.gov&si=af291a1d-2bf2-40e6-9918-e8c673199980&ss=m1kziu6j&sl=1&tt=80r&rl=1&ul=7vwb"'
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'

custom_headers = {
    "User-Agent": user_agent,
    "Accept": accept,
    "Accept-Language": accept_language,
    "Cookie": cookie,
    "Referer": referer
}

# getting website
r = requests.get(url, headers=custom_headers)

In [None]:
# checking response (should be 200)
r

<Response [200]>

In [None]:
soup = BeautifulSoup(r.content, 'html5lib')
#print(soup.prettify())

In [None]:
# just get item types (Paper, Plastic, etc.)

#for tr in soup.findAll('tr'):
#    for td in tr.findAll('td'):
#      for st in td.findAll('strong'):
#        print(st)


In [None]:
# trying to get names with correct values attached
d = {}
e = 0 # counter
items = []
for tr in soup.findAll('tr'):
    for td in tr.findAll('td'):
      for st in td.findAll('strong'):
        items.append(td)
      if len(td) >= 1:
        # go into middle loops to get lists and titles separately
        for ul in td.findAll('ul'):
          it = items[e].text.strip()
          if len(it) > 50:
            # take out whatever has '\n' before and after
            it_parse = it[151:200]
            d[it_parse.replace("\n", "").replace(' ','')] = ul.text.replace("\n", ",").replace('   ','')
          else:
            d[it] = ul.text.replace("\n", ",").replace('   ','')
          e += 1


In [None]:
print(d)

{'Paper': ', White and colored paper, Phone books, Magazines, Junk Mail, Envelopes, File folders, Cardboard (flattened), Cartons (milk, juice, & broth), Books (remove hard covers or binding), Cardboard boxes, e.g. cereal, etc.,', 'Plastic': ', Styrofoam packing blocks, Plastic medicine bottles (empty), Plastic bottles numbered 1-7 (remove plastic film or wrap),', 'Metal&Glass': ', Aluminum, steel, and tin cans, Clean aluminum foil, Glass bottles and jars (remove lids),'}


In [None]:
with open('palm_desert.txt','w') as data:
      data.write(str(d))

In [None]:
with open("palm_desert_file.csv", "w", newline="") as f:
    w = csv.DictWriter(f, d.keys())
    w.writeheader()
    w.writerow(d)

## Cook County

### Text on Page

In [None]:
#<img loading="lazy" src="/sites/g/files/ywwepo161/files/images/2023-11-20/recycling.png" width="872" height="1032">

In [6]:
# cook county url
url = 'https://www.cookcountyil.gov/content/green-guide-library'

# device-specific headers
referer = 'https://www.google.com/'
accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'
accept_language = 'en-US,en;q=0.9'
cookie = '_ga_LMEDTH47YF=GS1.1.1727581048.1.0.1727581048.0.0.0; _ga=GA1.1.2063294340.1727581048; nmstat=9c43f93d-7190-d5ed-2dc9-b8ef49416169'
user_agent = 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Mobile Safari/537.36'

custom_headers = {
    "User-Agent": user_agent,
    "Accept": accept,
    "Accept-Language": accept_language,
    "Cookie": cookie,
    "Referer": referer
}

# getting website
r = requests.get(url, headers=custom_headers)

In [7]:
r

<Response [200]>

In [8]:
soup2 = BeautifulSoup(r.content, 'html5lib')
#print(soup2.prettify())

In [9]:
#<div id="3408390695-3627088798-11" class="coh-accordion-tabs-content is-active" style="display: block;"> <div class="coh-wysiwyg">    <h4><span class="TextRun SCXW13227692 BCX0 NormalTextRun" lang="EN-US" xml:lang="EN-US"><strong>FAQs: Recycling and Proper Disposal</strong></span><span class="EOP SCXW13227692 BCX0">&nbsp;</span></h4><h4><span class="TextRun SCXW13227692 BCX0 NormalTextRun" lang="EN-US" xml:lang="EN-US"><strong>CAPS</strong></span><span class="EOP SCXW13227692 BCX0">&nbsp;</span></h4><article class="align-center"><div class="field field--media-image field--name-image field--type-image field--label-hidden field__item">  <img loading="lazy" src="/sites/g/files/ywwepo161/files/images/2023-11-21/Caps%20Recycling.png" width="376" height="376"></div>
#</article><p><span class="TextRun SCXW27379192 BCX0 NormalTextRun" lang="EN-US" xml:lang="EN-US">Even though pizza boxes are cardboard, they can cause contamination due to grease and food particles. Try ripping off the clean top portion of the box and recycling that instead. Ideally, rip off the top of the box even if the pizza box has no food particles or grease. This helps the chances that your box will be recycled because workers at recycling facilities do not have time to check every pizza box that comes down their conveyor and may instinctually pull and throw away pizza boxes that they see.</span><span class="EOP SCXW27379192 BCX0">&nbsp;</span></p><p><em><span class="TextRun SCXW27379192 BCX0 NormalTextRun" lang="EN-US" xml:lang="EN-US">The Department of Environment and Sustainability does not accept or pick-up recyclable goods or waste. If you have questions on how to recycle or dispose of an item after reviewing our library, contact us at </span></em><a class="Hyperlink SCXW27379192 BCX0" href="mailto:wasteandrecycling@cookcountyil.gov" target="_blank" rel="noreferrer noopener"><em><span class="TextRun Underlined SCXW27379192 BCX0 NormalTextRun" lang="EN-US" xml:lang="EN-US">WasteandRecycling@CookCountyil.gov</span></em></a><span class="EOP SCXW27379192 BCX0">&nbsp;</span></p> </div> </div>

In [10]:
recycling_instr = soup2.find(id="3408390695-3627088798-11").text.strip()
#print(recycling_instr)

In [17]:
recycling_instr = recycling_instr.replace('\n', '').replace('  ', '')

In [None]:
with open('cook_county_file.csv', 'w', newline = '') as csvfile:
    my_writer = csv.writer(csvfile, delimiter='\n')
    my_writer.writerow([recycling_instr])

In [None]:
# write to txt file instead of csv -- Annie
file = open("cook_county_recycling_instruc.txt", "a")
file.write(recycling_instr)
file.close()


### PDF on Page (main)

In [47]:
!pip install pdfplumber

Collecting pdfplumber
  Downloading pdfplumber-0.11.4-py3-none-any.whl.metadata (41 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pdfminer.six==20231228 (from pdfplumber)
  Downloading pdfminer.six-20231228-py3-none-any.whl.metadata (4.2 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.5/48.5 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Downloading pdfplumber-0.11.4-py3-none-any.whl (59 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.2/59.2 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pdfminer.six-20231228-py3-none-any.whl (5.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [60]:
import pdfplumber

# URL of the PDF
pdf_url = "https://swancc.org/resources/educational-resources/handouts/499-curbside-recycling-guidelines-in-english/file"

# Step 1: Download the PDF
response = requests.get(pdf_url)

# Save the PDF to a local file
pdf_path = "curbside_recycling_guidelines.pdf"
with open(pdf_path, 'wb') as f:
    f.write(response.content)

# Step 2: Extract text from the PDF using pdfplumber
with pdfplumber.open(pdf_path) as pdf:
    text = ""
    for page in pdf.pages:
        text += page.extract_text()

# Print the extracted text
print(text)

Curbside Recycling Guidelines
Put in Recycling Cart LOOSE! - Empty & Clean
sparkling
cider
Glass Plastic Metal Paper
Bottles & Jars Bottles & Jugs - Caps On Steel & Aluminum Boxes, Magazines, Paper,
No Metal Caps Jars & Tubs Depressurize Aerosols Cups & Cartons
Metal Lids 3" plus
No Bags, Film, or Foam Flatten Boxes
DON’T Put in Recycling Cart!
No Batteries, No Plastic No Food, Liquids, No Clothing No Hoses,
Electronics, Bags or Wrap Diapers, or or Shoes Wires, Cords, or
or Sharps Shredded Paper Hangers


In [61]:
text = text[:text.find('DON')]

In [62]:
print(text)

Curbside Recycling Guidelines
Put in Recycling Cart LOOSE! - Empty & Clean
sparkling
cider
Glass Plastic Metal Paper
Bottles & Jars Bottles & Jugs - Caps On Steel & Aluminum Boxes, Magazines, Paper,
No Metal Caps Jars & Tubs Depressurize Aerosols Cups & Cartons
Metal Lids 3" plus
No Bags, Film, or Foam Flatten Boxes



In [63]:
recycling_guide = text# + recycling_instr

In [65]:
print(recycling_guide)

Curbside Recycling Guidelines
Put in Recycling Cart LOOSE! - Empty & Clean
sparkling
cider
Glass Plastic Metal Paper
Bottles & Jars Bottles & Jugs - Caps On Steel & Aluminum Boxes, Magazines, Paper,
No Metal Caps Jars & Tubs Depressurize Aerosols Cups & Cartons
Metal Lids 3" plus
No Bags, Film, or Foam Flatten Boxes



In [66]:
# export !!!!! (combine?)
# combine with the rest of the web page and vectorize
# put in a vector store

file = open("cook_county_recycling_instruc_new.txt", "a")
file.write(recycling_guide)
file.close()

### Entire Recycling Guide

In [None]:
# entire guide
webpage = soup2.find(class_="coh-accordion-tabs ssa-component coh-component ssa-component-instance-eb12b400-56f8-4543-a9f1-9f3fb3d903de coh-component-instance-eb12b400-56f8-4543-a9f1-9f3fb3d903de")

# grabbing groups in the dropdowns
groups = {group.text.strip():[] for group in webpage.findAll('h4')}
print(groups)

{'Location': [], 'Hours': [], 'Materials Accepted': [], 'REDUCE': [], 'REUSE': [], 'CURBSIDE RECYCLING': [], 'Recycling Drop-off Centers': [], 'Permanent Household Hazardous Waste Drop-off Centers': [], 'Alternatives to Household Hazardous Waste': [], 'REUSE Your Construction & Demolition Materials': [], 'REDUCE Your Electronic Waste': [], 'REUSE Your Electronic Waste': [], 'Permanent Electronic Waste Collection Sites': [], 'One-Day Electronic Waste Collection Events': [], 'Battery Disposal': [], 'REDUCE Your Food Waste': [], 'Compost 101': [], 'Composting at Home': [], 'Municipal Composting': [], 'REDUCE Your Textile Waste': [], 'REUSE Your Textiles': [], 'RECYCLE Your Textiles': [], 'Sharps Disposal': [], 'FAQs: Recycling and Proper Disposal': [], 'CAPS': [], 'CLEAN, EMPTY AND DRY RECYCLABLES': [], 'SHREDDED PAPER': [], 'BAGGED RECYCLABLES': [], 'PLASTIC BAGS': [], 'PLASTIC STORAGE BAGS AND WRAP': [], 'PLASTIC PACKAGING MATERIALS': [], 'TIRES': [], 'WASTE COLLECTIONS FOR RESIDENTIAL 

In [None]:
unfiltered_recycling = webpage.findAll('ul')
print(unfiltered_recycling)

[<ul class="coh-accordion-tabs-nav"></ul>, <ul><li>Every Tuesday: 8 a.m. - noon </li><li>Every Thursday: 1 p.m. – 5 p.m. </li><li>2nd and 4th Saturday every month: 9 a.m. – 1 p.m. (TVs accepted during Saturday hours only)</li></ul>, <ul><li>Household recyclables (paper, plastic bottles and containers, metal cans and glass bottles and jars)</li><li>Electronic waste and TVs</li><li>Clothing and textiles</li><li>PS Foam (Styrofoam)</li><li>Personal healthcare equipment</li><li>Small appliances and small furniture </li></ul>, <ul><li><span class="TextRun SCXW177614313 BCX0 NormalTextRun" lang="EN-US" xml:lang="EN-US">Challenge yourself to find another purpose for a material before you throw it away or place in the recycling bin. Can your egg carton be used as a seed starter for your garden or can your old jars house pens on your desk?</span><span class="EOP SCXW177614313 BCX0"> </span></li><li><span class="TextRun SCXW177614313 BCX0 NormalTextRun" lang="EN-US" xml:lang="EN-US">Check out th

In [None]:
# grabbing what is inside groups within dropdowns
chunk = {i:[] for i in range(40)}

for e,vals in enumerate(webpage.findAll('ul')):
  if len(chunk[e]) > 0:
    chunk[e].append(vals.text.strip())
  else:
    chunk[e] = vals.text.strip()
  #print(vals)

In [None]:
webpage.find('ul', )

In [None]:
print(chunk)

{0: '', 1: 'Every Tuesday:\xa08 a.m. - noon\xa0Every Thursday:\xa01 p.m. – 5 p.m.\xa02nd and 4th Saturday every month:\xa09 a.m. – 1 p.m. (TVs accepted during\xa0Saturday hours only)', 2: 'Household recyclables (paper, plastic bottles and containers, metal cans and glass bottles and jars)Electronic waste and TVsClothing and textilesPS Foam (Styrofoam)Personal healthcare equipmentSmall appliances and small furniture', 3: 'Challenge yourself to find another purpose for a material before you throw it away or place in the recycling bin. Can your egg carton be used as a seed starter for your garden or can your old jars house pens on your desk?\xa0Check out the following common household items that can easily be transformed:\xa0Milk jugs/plastic bottles => planters\xa0Dryer sheets => dust cloths\xa0Newspapers => drop cloths for painting\xa0Food cans => change holder\xa0Old toothbrush => brush for cleaning grout in bathroom and kitchenEmpty paper towel rolls => holder for hair bands or form t

In [None]:
with open("cook_county_file.csv", "w", newline="") as f2:
    w = csv.DictWriter(f2, chunk.keys())
    w.writeheader()
    w.writerow(chunk)

In [None]:
#<div class="coh-accordion-tabs ssa-component coh-component ssa-component-instance-eb12b400-56f8-4543-a9f1-9f3fb3d903de coh-component-instance-eb12b400-56f8-4543-a9f1-9f3fb3d903de"> <div class="coh-accordion-tabs-inner coh-accordion-tabs-horizontal-left coh-accordion-tabs-display-accordion-xl coh-style-accordion coh-accordion-tabs-display-accordion" data-coh-accordion="{&quot;title&quot;:&quot;Accordion tabs container&quot;,&quot;styles&quot;:{&quot;xl&quot;:{&quot;accordionOrTab&quot;:&quot;accordion&quot;,&quot;collapsible&quot;:true,&quot;startCollapsed&quot;:true,&quot;animation&quot;:&quot;slide&quot;,&quot;offsetPositionAgainst&quot;:&quot;px&quot;,&quot;duration&quot;:100,&quot;scrollToAccordionOffset&quot;:0,&quot;accordionTabWidth&quot;:-2,&quot;accordionTabBleed&quot;:&quot;retain_gutters&quot;}},&quot;scrollToAccordion&quot;:false,&quot;setHash&quot;:false,&quot;horizontalVertical&quot;:&quot;horizontal_top&quot;,&quot;HorizontalPosition&quot;:&quot;left_aligned&quot;,&quot;VerticalPosition&quot;:&quot;left&quot;,&quot;settings&quot;:{&quot;styles&quot;:{&quot;xl&quot;:{&quot;accordionOrTab&quot;:&quot;accordion&quot;,&quot;collapsible&quot;:true,&quot;animation&quot;:&quot;slide&quot;,&quot;duration&quot;:700,&quot;startCollapsed&quot;:false,&quot;active&quot;:1,&quot;offsetPositionAgainst&quot;:&quot;px&quot;,&quot;scrollToAccordionOffset&quot;:0}},&quot;scrollToAccordion&quot;:false,&quot;setHash&quot;:false,&quot;horizontalVertical&quot;:&quot;horizontal_top&quot;,&quot;HorizontalPosition&quot;:&quot;left_aligned&quot;}}" data-once="cohAccordionTabs"> <ul class="coh-accordion-tabs-nav"><li><a href="#3408390695-3627088798-1" data-once="loadEvent">Center for Hard to Recycle Materials (CHaRM Center)</a></li><li><a href="#3408390695-3627088798-2" data-once="loadEvent">Reduce, Reuse and Recycle</a></li><li><a href="#3408390695-3627088798-3" data-once="loadEvent">Recycling Drop-Off Centers</a></li><li><a href="#3408390695-3627088798-4" data-once="loadEvent">Household Hazardous Waste (HHW)</a></li><li><a href="#3408390695-3627088798-5" data-once="loadEvent">Construction and Demolition Waste</a></li><li><a href="#3408390695-3627088798-6" data-once="loadEvent">Electronic Waste (E-Waste)</a></li><li><a href="#3408390695-3627088798-7" data-once="loadEvent">Food Waste</a></li><li><a href="#3408390695-3627088798-8" data-once="loadEvent">Composting</a></li><li><a href="#3408390695-3627088798-9" data-once="loadEvent">Clothing and Textile Waste</a></li><li><a href="#3408390695-3627088798-10" data-once="loadEvent">Medication/Prescription and Sharps Disposal</a></li><li><a href="#3408390695-3627088798-11" data-once="loadEvent">Recycling and Disposal Tips</a></li></ul> <div class="coh-accordion-tabs-content-wrapper">  <div class="coh-accordion-title" data-coh-tab-settings="[]" data-once="tab-init"><a href="#3408390695-3627088798-1" data-once="loadEvent" aria-expanded="false">Center for Hard to Recycle Materials (CHaRM Center)</a></div> <div id="3408390695-3627088798-1" class="coh-accordion-tabs-content" style="display: none;"> <div class="coh-wysiwyg">    <p>Cook County has partnered with South Suburban College to create the Center for Hard to Recycle Materials (CHaRM Center). This is a permanent recycling drop-off facility available to Cook County residents that helps divert millions of pounds of waste and other hard-to-recycle items from regional landfills and water systems. More information about materials accepted at the CHaRM Center can be found below.&nbsp;</p><p><span>This project is being supported, in whole or in part, by federal award number ALN 21.027 awarded to Cook County by the U.S. Department of the Treasury.</span></p><h4><strong>Location&nbsp;</strong></h4><p>15800 State St., South Holland, IL 60473. The drop-off site is located on north side of campus in the overflow parking lot. Enter at traffic signal on State Street.</p><h4><strong>Hours&nbsp;</strong></h4><ul><li>Every Tuesday:&nbsp;8 a.m. - noon&nbsp;</li><li>Every Thursday:&nbsp;1 p.m. – 5 p.m.&nbsp;</li><li>2nd and 4th Saturday every month:&nbsp;9 a.m. – 1 p.m. (TVs accepted during&nbsp;Saturday hours only)</li></ul><h4><strong>Materials Accepted</strong></h4><ul><li>Household recyclables (paper, plastic bottles and containers, metal cans and glass bottles and jars)</li><li>Electronic waste and TVs</li><li>Clothing and textiles</li><li>PS Foam (Styrofoam)</li><li>Personal healthcare equipment</li><li>Small appliances and small furniture&nbsp;</li></ul><p><a href="https://www.ssc.edu/charm/" target="_blank">See the full details</a>.&nbsp;</p> </div> </div>  <div class="coh-accordion-title" data-coh-tab-settings="[]" data-once="tab-init"><a href="#3408390695-3627088798-2" data-once="loadEvent" aria-expanded="false">Reduce, Reuse and Recycle</a></div> <div id="3408390695-3627088798-2" class="coh-accordion-tabs-content" style="display: none;"> <div class="coh-wysiwyg">    <h4><span class="TextRun SCXW141957897 BCX0 NormalTextRun" lang="EN-US" xml:lang="EN-US"><strong>REDUCE</strong></span><span class="EOP SCXW141957897 BCX0">&nbsp;</span></h4><p><span class="TextRun SCXW141957897 BCX0 NormalTextRun" lang="EN-US" xml:lang="EN-US">Interested in ways to reduce plastic consumption in your life? Take </span><span class="TextRun SCXW141957897 BCX0 ContextualSpellingAndGrammarError" lang="EN-US" xml:lang="EN-US">our</span><span class="TextRun SCXW141957897 BCX0 NormalTextRun" lang="EN-US" xml:lang="EN-US"> </span><a class="Hyperlink SCXW141957897 BCX0" href="http://www.cookcountyil.gov/conservecook" target="_blank" rel="noreferrer noopener"><span class="TextRun Underlined SCXW141957897 BCX0 NormalTextRun" lang="EN-US" xml:lang="EN-US">Conserve Cook County pledge</span></a><span class="TextRun SCXW141957897 BCX0 NormalTextRun" lang="EN-US" xml:lang="EN-US">&nbsp;</span><span class="EOP SCXW141957897 BCX0">&nbsp;</span></p><p><span class="TextRun SCXW141957897 BCX0 NormalTextRun" lang="EN-US" xml:lang="EN-US">When you use single-use items, do you ever stop to think about where they go?&nbsp;</span><span class="EOP SCXW141957897 BCX0">&nbsp;</span></p><article class="align-center"><div class="field field--media-image field--name-image field--type-image field--label-hidden field__item">  <img loading="lazy" src="/sites/g/files/ywwepo161/files/images/2023-10-30/Journey%20of%20Single-Use%20Items%20Side%201.png" width="540" height="1350"></div>

#for tag in soup2.find_all(class_="coh-accordion-tabs ssa-component coh-component ssa-component-instance-eb12b400-56f8-4543-a9f1-9f3fb3d903de coh-component-instance-eb12b400-56f8-4543-a9f1-9f3fb3d903de"):
#    print(tag.text)

## Oregon

In [None]:
# oregon website
url = 'https://www.oregon.gov/deq/recycling/Pages/Recycle-Right.aspx'

# device-specific headers
referer = 'https://www.google.com/'
accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'
accept_language = 'en-US,en;q=0.9'
cookie = 'BIGipServer~Oregon~OR-prd-SP-txdc.pool=rd1530o00000000000000000000ffffac1f2149o80; _ga=GA1.1.1630292430.1727735852; _ga_8VWBKP4KJ1=GS1.1.1727735851.1.0.1727735851.0.0.0; WSS_FullScreenMode=false'
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'

custom_headers = {
    "User-Agent": user_agent,
    "Accept": accept,
    "Accept-Language": accept_language,
    "Cookie": cookie,
    "Referer": referer
}

# getting website
r = requests.get(url, headers=custom_headers)

In [None]:
r

<Response [200]>

In [None]:
soup1 = BeautifulSoup(r.content, 'html5lib')
#print(soup2.prettify())

In [None]:
for p in soup1.findAll('p'):
  print(p)

<p>
                    <img alt="Oregon State Flag" src="https://images.oregon.gov/Portal/Images/Flags/oregon-flag.jpg"/>
                    <span class="hidden-phone hidden-xs">An official website of the State of Oregon
                         <span class="sr-only">Learn</span>
                        <a aria-haspopup="dialog" class="btn btn-link" data-toggle="modal" href="#or-official-modal" role="button">How you know »</a>
                        <span class="sr-only">(how to identify a Oregon.gov website)</span>
                    </span>
                    <a class="hidden-tablet hidden-desktop visible-xs-inline" data-toggle="modal" href="#or-official-modal" role="button">An official website of the State of Oregon »
                    </a>
                </p>
<p class="sr-only">You are here:</p>
<p>To protect the environment and reduce waste, it's important to learn how to recycle right. With so many items to sort, it can be confusing to know what is and isn't recyclable. I