Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
rdmaloney committed Aug 30, 2019
1 parent 710c217 commit 25d3a3f
Showing 1 changed file with 33 additions and 34 deletions.
67 changes: 33 additions & 34 deletions scraper.py
Expand Up @@ -14,57 +14,56 @@
f2 = []


def scrape_data():
data = requests.get("http://ufcstats.com/statistics/events/upcoming")
soup = BeautifulSoup(data.text, 'html.parser')
table = soup.find('table', {"class": "b-statistics__table-events"})
links = table.find_all('a', href=True)

for link in links:
all_links.append(link.get('href'))

for link in all_links:
print(f"Now currently scraping link: {link}")
def scrape_data():

data = requests.get(link)
data = requests.get("http://ufcstats.com/statistics/events/upcoming")
soup = BeautifulSoup(data.text, 'html.parser')
time.sleep(1)
table = soup.find('table', {"class": "b-statistics__table-events"})
links = table.find_all('a', href=True)

for link in links:
all_links.append(link.get('href'))

for link in all_links:
print(f"Now currently scraping link: {link}")

rows = soup.find_all('tr', {
"class": "b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click"})
data = requests.get(link)
soup = BeautifulSoup(data.text, 'html.parser')
time.sleep(1)

for row in rows:

h2 = soup.find("h2")
e_name.append(h2.text.strip())
rows = soup.find_all('tr', {"class": "b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click"})

fighters = row.find_all('a', {"href": re.compile("http://ufcstats.com/fighter-details")})
for row in rows:

h2 = soup.find("h2")
e_name.append(h2.text.strip())

try:
f1.append(fighters[0].text.strip())
f2.append(fighters[1].text.strip())
except IndexError:
f1.append("null")
f2.append("null")
continue
fighters = row.find_all('a', {"href": re.compile("http://ufcstats.com/fighter-details")})

return None
try:
f1.append(fighters[0].text.strip())
f2.append(fighters[1].text.strip())
except IndexError:
f1.append("null")
f2.append("null")
continue


# preprocessing
# remove rows where DOB is null
# impute stance as orthodox for missing stances
return None

def create_df():
# create empty dataframe
df = pd.DataFrame()

df["Event"] = e_name
df["Fighter_1"] = f1
df["Fighter_2"] = f2
df["Fighter1"] = f1
df["Fighter1_Odds"] = f1_odds
df["Fighter2"] = f2
df["Fighter2_Odds"] = f2_odds

return df


def merge_data(df):


Expand Down

0 comments on commit 25d3a3f

Please sign in to comment.