Skip to content

Commit

Permalink
Seems like the server is trying to defend against scraper requests...
Browse files Browse the repository at this point in the history
  • Loading branch information
psychemedia committed Aug 4, 2018
1 parent b818f5d commit 87fc3e0
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions scraper.py
Expand Up @@ -155,7 +155,12 @@ def OSGB36toWGS84(E,N):
def getCurrApplications():
#Get base page
url='https://www.iwight.com/planning/planAppSearch.aspx'
response =requests.get(url)
session = requests.Session()
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'}
session.headers.update(headers)


response =session.get(url)
soup=BeautifulSoup(response.content)
viewstate = soup.find('input' , id ='__VIEWSTATE')['value']
eventvalidation=soup.find('input' , id ='__EVENTVALIDATION')['value']
Expand All @@ -164,7 +169,10 @@ def getCurrApplications():
'__VIEWSTATEGENERATOR':viewstategenerator,
'__EVENTVALIDATION':eventvalidation,'q':'Search the site...'}
#Get all current applications
r=requests.post(url,data=params)
headers['Referer'] = response.request.url
headers['Origin']= 'https://www.iow.gov.uk'
headers['Host']= 'www.iow.gov.uk'
r=session.post(url,headers=headers,data=params)
soup=BeautifulSoup(r.content)
t=soup.find('table',id='dgResults')
data=[]
Expand Down

0 comments on commit 87fc3e0

Please sign in to comment.