Update scraper.py

rdmaloney · Aug 12, 2019 · d75f383 · d75f383
1 parent 9d8ad7d
commit d75f383
Showing 1 changed file with 20 additions and 19 deletions.
diff --git a/scraper.py b/scraper.py
@@ -18,35 +18,36 @@
 
 def scrape_data():
 
-        data = requests.get("http://ufcstats.com/statistics/events/upcoming")
-        soup = BeautifulSoup(data.text, 'html.parser')
-        table = soup.find('table', {"class": "b-statistics__table-events"})
-        links = table.find_all('a', href=True)
+        for alpha in alphabets:
+        links.append("http://www.fightmetric.com/statistics/fighters?char=" + alpha + "&page=all")
 
+        # now that we have a list of links we need to iterate it with BeautifulSoup
         for link in links:
-            all_links.append(link.get('href'))
-            print(f"Now currently scraping link: {link}")
+        print(f"Currently on this link: {link}")
+
+        data = requests.get(link)
+        soup = BeautifulSoup(data.text, 'html.parser')
+        names = soup.find_all('a', href=True)
+
+        # list to store url page of fighters
+        fighters = []
+
+        for name in names:
+            fighters.append(name['href'])
+
+        fighters = sorted(set(fighters))
 
-            data = requests.get(link)
-            soup = BeautifulSoup(data.text, 'html.parser')
-            time.sleep(1)
+        for fighter in fighters:
+
 
             h2 = soup.find("h2")
             e_name.append(h2.text.strip())
 
-            rows = []
-
-
-            rows = soup.find('table', {"class": "b-fight-details__table b-fight-details__table_style_margin-top b-fight-details__table_type_event-details js-fight-table"})
-
-
-
-            for row in rows:
 
-                    data = requests.get(rows)
+                    data = requests.get(fighter)
                     soup = BeautifulSoup(data.text, 'html.parser')
 
-                    fighters = row.find_all('a', {"href": re.compile("http://ufcstats.com/fighter-details")})
+                    fighters = fighter.find_all('a', {"href": re.compile("http://ufcstats.com/fighter-details")})
 
                     try:
                         f1.append(fighters[0].text.strip())