Skip to content

Commit

Permalink
Merge pull request #69 from jtweeder/scrapperMod
Browse files Browse the repository at this point in the history
add agent headers to request to reduce failures
  • Loading branch information
jtweeder committed May 13, 2023
2 parents fe7b1b6 + 8f84626 commit d3b4055
Showing 1 changed file with 11 additions and 1 deletion.
12 changes: 11 additions & 1 deletion meals/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from fractions import Fraction
import requests
import random
from bs4 import BeautifulSoup
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
Expand Down Expand Up @@ -67,7 +68,16 @@ def pull_mstr(self):
return False

def _make_soup(self):
page = requests.get(self.rec_url)
user_agents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36',
'Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148',
'Mozilla/5.0 (Linux; Android 11; SM-G960U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 Mobile Safari/537.36'
]
user_agent = random.choice(user_agents)
headers = {'User-Agent': user_agent}
page = requests.get(self.rec_url, headers=headers)
if not page.ok:
raise ValueError(f'URL did not resolve to OK: {page.status_code}')
return BeautifulSoup(page.content, 'html.parser')
Expand Down

0 comments on commit d3b4055

Please sign in to comment.