diff --git a/scraper.py b/scraper.py index 18f9cdb..6bfbf45 100644 --- a/scraper.py +++ b/scraper.py @@ -6,22 +6,7 @@ import lxml.html import requests -#Some example URLs -schoolurl = "https://reports.ofsted.gov.uk/inspection-reports/find-inspection-report/provider/ELS/140407" -pdfurl = "https://reports.ofsted.gov.uk/provider/files/2631211/urn/103980.pdf" -searchurl = "https://reports.ofsted.gov.uk/inspection-reports/find-inspection-report/results/any/21/any/any/any/any/any/any/any/week/0/0#search4" -# Read in a page -html = requests.get(searchurl).content -#convert to lxml object -root = lxml.html.fromstring(html) -#grab any content that is within