<a href="https://colab.research.google.com/github/lukas-weiss/webscrape_example/blob/main/bs_webscrape_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen as uReq

def get_raw_html_content(url):
  client = uReq(url)
  raw_content = client.read()
  client.close()
  return raw_content

# get html content
html_content = get_raw_html_content('https://www.classcentral.com/subject/data-science')

# load html content in bs4
page_soup = soup(html_content, 'html.parser')

# ratings should be always floats (star rating)
def rating_value(rating):
  try:
    return float(rating)
  except ValueError:
    return -1

def get_text_from_elements(elements):
  results = []
  for element in elements:
    # strip the whitespaces
    results.append(element.text.strip())
  return results
    

# load courses
course_elements = page_soup.findAll("span",{'class' : 'text-1 weight-semi line-tight'})
courses = get_text_from_elements(course_elements)


# load provider
provider_elements = page_soup.findAll('a',href=True, attrs={'class':'color-charcoal italic'})
providers = get_text_from_elements(provider_elements)


# load ratings
ratings = []
rating_elements = page_soup.findAll('div', attrs={'class':'col border-box text-center nowrap row large-up-text-right padding-horz-small push'})
for rating in rating_elements:
  value = rating.find('span', attrs={'class':'xlarge-up-hidden color-charcoal text-center'})
  ratings.append(rating_value(value.text.strip()))


# export data with pandas
df = pd.DataFrame({'course':courses,'ratings':ratings,'provider':providers})
print(df)
df.to_csv('classcentral_ds_courses.csv')

                                               course  ratings     provider
0                                       R Programming      2.8     Coursera
1                        The Data Scientist’s Toolbox      3.3     Coursera
2                        Computational Social Science      4.8     Coursera
3                           Getting and Cleaning Data      3.5     Coursera
4                                  The Analytics Edge      4.7          edX
5                           Exploratory Data Analysis      3.9     Coursera
6                               Become a Data Analyst      4.5      Udacity
7                            Introduction to Big Data      2.7     Coursera
8              Introduction to Data Science in Python      2.4     Coursera
9                             Python for Data Science      4.4          edX
10                   Mastering Data Analysis in Excel      1.8     Coursera
11                     A Crash Course in Data Science      3.5     Coursera
12          