Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
rdmaloney committed Aug 12, 2019
1 parent 9d8ad7d commit d75f383
Showing 1 changed file with 20 additions and 19 deletions.
39 changes: 20 additions & 19 deletions scraper.py
Expand Up @@ -18,35 +18,36 @@

def scrape_data():

data = requests.get("http://ufcstats.com/statistics/events/upcoming")
soup = BeautifulSoup(data.text, 'html.parser')
table = soup.find('table', {"class": "b-statistics__table-events"})
links = table.find_all('a', href=True)
for alpha in alphabets:
links.append("http://www.fightmetric.com/statistics/fighters?char=" + alpha + "&page=all")

# now that we have a list of links we need to iterate it with BeautifulSoup
for link in links:
all_links.append(link.get('href'))
print(f"Now currently scraping link: {link}")
print(f"Currently on this link: {link}")

data = requests.get(link)
soup = BeautifulSoup(data.text, 'html.parser')
names = soup.find_all('a', href=True)

# list to store url page of fighters
fighters = []

for name in names:
fighters.append(name['href'])

fighters = sorted(set(fighters))

data = requests.get(link)
soup = BeautifulSoup(data.text, 'html.parser')
time.sleep(1)
for fighter in fighters:


h2 = soup.find("h2")
e_name.append(h2.text.strip())

rows = []


rows = soup.find('table', {"class": "b-fight-details__table b-fight-details__table_style_margin-top b-fight-details__table_type_event-details js-fight-table"})



for row in rows:

data = requests.get(rows)
data = requests.get(fighter)
soup = BeautifulSoup(data.text, 'html.parser')

fighters = row.find_all('a', {"href": re.compile("http://ufcstats.com/fighter-details")})
fighters = fighter.find_all('a', {"href": re.compile("http://ufcstats.com/fighter-details")})

try:
f1.append(fighters[0].text.strip())
Expand Down

0 comments on commit d75f383

Please sign in to comment.