In [1]:
import pandas as pd
import re
import datetime
import requests
import bs4

In [2]:
# Scrape coursereport.com
url = "https://www.coursereport.com/best-coding-bootcamps"
r = requests.get(url)
soup = bs4.BeautifulSoup(r.content, "html.parser")
school_list_items = soup.find("ul", id="schools").find_all("li")
school_list_item = school_list_items[0]

In [3]:
school_list_item

<li><div class="info-container"><a href="/schools/flatiron-school"><div class="school-image"><img alt="flatiron-school-logo" src="https://course_report_production.s3.amazonaws.com/rich/rich_files/rich_files/999/s100/flatironschool.png" title="Flatiron School Logo"/></div></a><h3><a href="/schools/flatiron-school">1. Flatiron School</a></h3><span class="banner-container"><img alt="Established school badge" class="banner" src="https://coursereport-production-herokuapp-com.global.ssl.fastly.net/assets/established_school_badge-d099e568a815b527a609dbea7ca07bb9.png"/><img alt="Large alumni network badge" class="banner" src="https://coursereport-production-herokuapp-com.global.ssl.fastly.net/assets/large_alumni_network_badge-00d96602124ae1fffd2e993b116ea803.png"/><img alt="Transparent outcomes badge" class="banner" src="https://coursereport-production-herokuapp-com.global.ssl.fastly.net/assets/transparent_outcomes_badge-d69b653a170c89c52079317b1b985664.png"/><div class="ratings title-rating">

## Define functions to parse each field

In [4]:
# rank 
def get_rank(school_li):
    rank_pattern = r"^(\d+)\."
    school_rank = str(school_li.h3.a.contents[0])
    return int(re.findall(rank_pattern, school_rank)[0])

# school name
def get_name(school_li):
    name_pattern = r"^\d+\.\s(.*)$"
    school_rank = str(school_li.h3.a.contents[0])
    return re.findall(name_pattern, school_rank)[0]

In [5]:
print(get_rank(school_list_item))
print(get_name(school_list_item))

1
Flatiron School


In [7]:
# rating
def get_rating(school_li):
    rating_pattern = r"\((.*)\)"
    rating_string = (school_li
                     .find("span", class_="longform-rating-text")
                     .contents)[0]
    return float(re.findall(rating_pattern, rating_string)[0])

In [8]:
get_rating(school_list_item)

4.73

In [41]:
# stars
def get_stars(school_li):
    # translate class names to number of stars
    stars_dict = {"icon-full_star": 1,
                  "icon-half_star": .5,
                  "icon-empty_star": 0}
    
    # get a list of all star span tags
    star_tag_list = (school_li
                     .find("div", class_="ratings title-rating")
                     .find_all("span"))[1:]
    return sum([stars_dict[star_tag["class"][0]] for star_tag in star_tag_list])

In [13]:
get_stars(school_list_item)

4.5

In [14]:
# reviews
def get_reviews(school_li):
    reviews_pattern = r"^(\d+)\s"
    reviews_string = (school_li
                      .find_all("span", class_="longform-rating-text")[1] # get 2nd element
                      .a
                      .contents)[0]
    return int(re.findall(reviews_pattern, reviews_string)[0])

In [15]:
get_reviews(school_list_item)

426

In [24]:
# locations
def get_locations(school_li):
    return [location.contents[0] for location in (school_li
                                                  .find("span", class_="location")
                                                  .find_all("a"))]

In [17]:
get_locations(school_list_item)

['London',
 'Washington',
 'Austin',
 'Houston',
 'Atlanta',
 'New York City',
 'Chicago',
 'San Francisco',
 'Online',
 'Seattle',
 'Denver']

In [21]:
",".join(["bye","adios"])

'bye,adios'

In [25]:
# locations
def get_locations(school_li):
    return "|".join([location.contents[0] for location in (school_li
                                                           .find("span", class_="location")
                                                           .find_all("a"))])

In [23]:
get_locations(school_list_item)

'London|Washington|Austin|Houston|Atlanta|New York City|Chicago|San Francisco|Online|Seattle|Denver'

In [28]:
# description
def get_description(school_li):
    # TODO: check encoding
    return (school_li
            .find("div", class_="desc-container")
            .p
            .p
            .contents)[0]

In [27]:
get_description(school_list_item)

'Flatiron School offers immersive on-campus and online programs in software engineering, data science, and UX/UI Design in NYC, Brooklyn, Washington DC, London, Houston, Atlanta, Austin, Seattle, Chicago, Denver, and Online. Flatiron School’s immersive courses aim to launch students into fulfilling careers as software engineers, data scientists, and UX/UI designers through rigorous, market-aligned curricula, and the support of seasoned instructors and personal career coaches. Through test-driven labs and portfolio projects, Flatiron teaches students\xa0to think\xa0and build\xa0like software engineers and data scientists. Flatiron School’s UX/UI Design Immersive includes a client project to give students client-facing experience and an industry-vetted portfolio.'

## Stitch the pieces together

In [29]:
# create a row:

row1 = {"rank": get_rank(school_list_item),
        "name": get_name(school_list_item),
        "rating": get_rating(school_list_item),
        "stars": get_stars(school_list_item),
        "reviews": get_reviews(school_list_item),
        "locations": get_locations(school_list_item),
        "description": get_description(school_list_item)}

In [30]:
row1

{'rank': 1,
 'name': 'Flatiron School',
 'rating': 4.73,
 'stars': 4.5,
 'reviews': 426,
 'locations': 'London|Washington|Austin|Houston|Atlanta|New York City|Chicago|San Francisco|Online|Seattle|Denver',
 'description': 'Flatiron School offers immersive on-campus and online programs in software engineering, data science, and UX/UI Design in NYC, Brooklyn, Washington DC, London, Houston, Atlanta, Austin, Seattle, Chicago, Denver, and Online. Flatiron School’s immersive courses aim to launch students into fulfilling careers as software engineers, data scientists, and UX/UI designers through rigorous, market-aligned curricula, and the support of seasoned instructors and personal career coaches. Through test-driven labs and portfolio projects, Flatiron teaches students\xa0to think\xa0and build\xa0like software engineers and data scientists. Flatiron School’s UX/UI Design Immersive includes a client project to give students client-facing experience and an industry-vetted portfolio.'}

In [32]:
pd.DataFrame([row1])

Unnamed: 0,rank,name,rating,stars,reviews,locations,description
0,1,Flatiron School,4.73,4.5,426,London|Washington|Austin|Houston|Atlanta|New Y...,Flatiron School offers immersive on-campus and...


In [39]:
def get_row(school_li):
     return {"rank": get_rank(school_li),
             "name": get_name(school_li),
             "rating": get_rating(school_li),
             "stars": get_stars(school_li),
             "reviews": get_reviews(school_li),
             "locations": get_locations(school_li),
             "description": get_description(school_li)}

In [36]:
get_row(school_list_items[2])

{'rank': 3,
 'name': 'Codesmith',
 'rating': 4.89,
 'stars': 5,
 'reviews': 328,
 'locations': 'Los Angeles|New York City|Online',
 'description': 'Codesmith offers a full-time, 12-week full stack software engineering bootcamp in Los Angeles\xa0and New York City. Codesmith is a selective, need-blind program focusing largely on computer science and full-stack JavaScript, with an emphasis on\xa0technologies like React, Redux, Node, build tools, Dev Ops and machine learning. This program enables Codesmith students (known as Residents) to build\xa0open-source projects, with the aim of moving into positions as skilled software engineers. Codesmith Residents become engineers, not technicians, through a deep understanding of advanced JavaScript practices, fundamental computer science concepts (such as algorithms and data structures),\xa0and object-oriented and functional programming.\xa0Codesmith helps residents develop strong problem-solving abilities and technical communication skills – val

In [42]:
[get_row(school_list_item) for school_list_item in school_list_items]

[{'rank': 1,
  'name': 'Flatiron School',
  'rating': 4.73,
  'stars': 4.5,
  'reviews': 426,
  'locations': 'London|Washington|Austin|Houston|Atlanta|New York City|Chicago|San Francisco|Online|Seattle|Denver',
  'description': 'Flatiron School offers immersive on-campus and online programs in software engineering, data science, and UX/UI Design in NYC, Brooklyn, Washington DC, London, Houston, Atlanta, Austin, Seattle, Chicago, Denver, and Online. Flatiron School’s immersive courses aim to launch students into fulfilling careers as software engineers, data scientists, and UX/UI designers through rigorous, market-aligned curricula, and the support of seasoned instructors and personal career coaches. Through test-driven labs and portfolio projects, Flatiron teaches students\xa0to think\xa0and build\xa0like software engineers and data scientists. Flatiron School’s UX/UI Design Immersive includes a client project to give students client-facing experience and an industry-vetted portfolio.'

In [43]:
df = pd.DataFrame([get_row(school_list_item) for school_list_item in school_list_items])

In [44]:
df

Unnamed: 0,rank,name,rating,stars,reviews,locations,description
0,1,Flatiron School,4.73,4.5,426,London|Washington|Austin|Houston|Atlanta|New Y...,Flatiron School offers immersive on-campus and...
1,2,Hack Reactor,4.68,4.5,261,Austin|Boulder|Los Angeles|New York City|Phoen...,"[[Founded in 2012, Hack Reactor is a 12-week i..."
2,3,Codesmith,4.89,5.0,328,Los Angeles|New York City|Online,"Codesmith offers a full-time, 12-week full sta..."
3,4,App Academy,4.7,4.5,700,New York City|San Francisco|Online,App Academy is an immersive 12-week Ruby-focus...
4,5,Turing,4.77,4.5,156,Denver,Turing School of Software & Design is a 7-mont...
5,6,Fullstack Academy,4.91,5.0,254,New York City|Chicago|Online,Fullstack Academy offers full-time and part-ti...
6,7,General Assembly,4.32,4.5,429,Washington|San Diego|Stamford|Orlando|Detroit|...,General Assembly offers short and long courses...
7,8,Tech Elevator,4.95,5.0,194,Cincinnati|Pittsburgh|Philadelphia|Cleveland|C...,Tech Elevator is an immersive 14-week coding b...
8,9,DigitalCrafts,4.88,5.0,194,Houston|Atlanta|Online,DigitalCrafts offers a 16-week full-time and a...
9,10,Software Guild,4.68,4.5,142,Minneapolis|Louisville|Atlanta|Online,"The Software Guild offers immersive full-time,..."


Assume, we now pull the most up-to-date ranking every day, so that we can track movements.

--> We'd need to add a date to it

In [47]:
print(datetime.date.today())

2020-01-20


In [50]:
def get_row(school_li):
     return {"date_id": str(datetime.date.today()),
             "rank": get_rank(school_li),
             "name": get_name(school_li),
             "rating": get_rating(school_li),
             "stars": get_stars(school_li),
             "reviews": get_reviews(school_li),
             "locations": get_locations(school_li),
             "description": get_description(school_li)}

In [51]:
get_row(school_list_items[10])

{'date_id': '2020-01-20',
 'rank': 11,
 'name': 'Holberton School',
 'rating': 4.66,
 'stars': 4.5,
 'reviews': 80,
 'locations': 'Bogotá|Medellín|New Haven|Tunis|Cali|Tulsa|San Francisco',
 'description': "Holberton School is a two-year software engineering school with campuses in San Francisco, New Haven, Tulsa, Bogotá, Medellin, Cali, and Tunisia that trains individuals to become Full Stack Software Engineers. The school's mission is to train the next generation of software developers through\xa0100% hands-on learning."}

In [52]:
df = pd.DataFrame([get_row(school_list_item) for school_list_item in school_list_items])

In [53]:
df

Unnamed: 0,date_id,rank,name,rating,stars,reviews,locations,description
0,2020-01-20,1,Flatiron School,4.73,4.5,426,London|Washington|Austin|Houston|Atlanta|New Y...,Flatiron School offers immersive on-campus and...
1,2020-01-20,2,Hack Reactor,4.68,4.5,261,Austin|Boulder|Los Angeles|New York City|Phoen...,"[[Founded in 2012, Hack Reactor is a 12-week i..."
2,2020-01-20,3,Codesmith,4.89,5.0,328,Los Angeles|New York City|Online,"Codesmith offers a full-time, 12-week full sta..."
3,2020-01-20,4,App Academy,4.7,4.5,700,New York City|San Francisco|Online,App Academy is an immersive 12-week Ruby-focus...
4,2020-01-20,5,Turing,4.77,4.5,156,Denver,Turing School of Software & Design is a 7-mont...
5,2020-01-20,6,Fullstack Academy,4.91,5.0,254,New York City|Chicago|Online,Fullstack Academy offers full-time and part-ti...
6,2020-01-20,7,General Assembly,4.32,4.5,429,Washington|San Diego|Stamford|Orlando|Detroit|...,General Assembly offers short and long courses...
7,2020-01-20,8,Tech Elevator,4.95,5.0,194,Cincinnati|Pittsburgh|Philadelphia|Cleveland|C...,Tech Elevator is an immersive 14-week coding b...
8,2020-01-20,9,DigitalCrafts,4.88,5.0,194,Houston|Atlanta|Online,DigitalCrafts offers a 16-week full-time and a...
9,2020-01-20,10,Software Guild,4.68,4.5,142,Minneapolis|Louisville|Atlanta|Online,"The Software Guild offers immersive full-time,..."


# Can we do the same with data science bootcamps?

In [None]:
url = "https://www.coursereport.com/best-coding-bootcamps"
r = requests.get(url)
soup = bs4.BeautifulSoup(r.content, "html.parser")
school_list_items = soup.find("ul", id="schools").find_all("li")

In [57]:
def get_school_list(ranking="coding"):
    url = f"https://www.coursereport.com/best-{ranking}-bootcamps"
    r = requests.get(url)
    soup = bs4.BeautifulSoup(r.content, "html.parser")
    return soup.find("ul", id="schools").find_all("li")

In [58]:
pd.DataFrame([get_row(school_list_item) for school_list_item in get_school_list("data-science")])

Unnamed: 0,date_id,rank,name,rating,stars,reviews,locations,description
0,2020-01-20,1,BrainStation,4.66,4.5,315,Vancouver|Toronto|New York City|Online,BrainStation offers full-time and part-time co...
1,2020-01-20,2,Coding Temple,4.89,5.0,97,Dallas|Online|Washington|Boston|Chicago,"Coding Temple offers 10-week, full-stack codin..."
2,2020-01-20,3,Divergence Academy,5.0,5.0,20,Dallas,Divergence Academy is a 12-week full-time data...
3,2020-01-20,4,Flatiron School,4.73,4.5,426,London|Austin|Atlanta|Houston|Seattle|New York...,Flatiron School offers immersive on-campus and...
4,2020-01-20,5,Galvanize,4.47,4.5,177,Boulder|Austin|Seattle|New York City|Los Angel...,Galvanize offers a 13-week full-time and a 26-...
5,2020-01-20,6,General Assembly,4.32,4.5,429,Sydney|Austin|Atlanta|Singapore|Melbourne|Dall...,General Assembly offers short and long courses...
6,2020-01-20,7,Metis,4.89,5.0,92,Seattle|New York City|Online|Chicago|San Franc...,Metis offers data science training via 12-week...
7,2020-01-20,8,NYC Data Science Academy,4.83,5.0,281,New York City|Online,NYC Data Science Academy offers 12-week data s...
8,2020-01-20,9,Principal Analytics Prep,4.88,5.0,16,New York City,"Principal Analytics Prep offers a 12-week, ful..."
9,2020-01-20,10,Propulsion Academy,5.0,5.0,42,Zurich,Propulsion Academy offers 12-week full-time bo...


In [59]:
pd.DataFrame([get_row(school_list_item) for school_list_item in get_school_list("online")])

Unnamed: 0,date_id,rank,name,rating,stars,reviews,locations,description
0,2020-01-20,1,AcadGild,4.3,4.0,172,Bangalore|Online,AcadGild is an online coding bootcamp offering...
1,2020-01-20,2,Actualize,4.87,5.0,216,Online|Chicago,Actualize is a 12-week software development bo...
2,2020-01-20,3,Altcademy,5.0,5.0,42,Hong Kong|Online,Altcademy (formerly Hack Pacific) is an educat...
3,2020-01-20,4,Bloc,4.66,4.5,444,Online,Bloc is an online coding bootcamp that incorpo...
4,2020-01-20,5,Bottega,4.49,4.5,53,Salt Lake City|Online,Bottega offers part-time and full-time tech bo...
5,2020-01-20,6,CareerFoundry,4.52,4.5,336,Berlin|Online,CareerFoundry is an online coding school that ...
6,2020-01-20,7,Coding Dojo,4.41,4.5,396,Tulsa|Boise|Orange County|Silicon Valley|Arlin...,Coding Dojo is a unique coding bootcamp that t...
7,2020-01-20,8,Covalence,4.65,4.5,49,Online,"Covalence provides career-changing 12-week, fu..."
8,2020-01-20,9,Designlab,4.67,4.5,98,Online,[Designlab]
9,2020-01-20,10,Flatiron School,4.73,4.5,426,Houston|Austin|New York City|London|Atlanta|Se...,Flatiron School offers immersive on-campus and...


Great! Works with all 'similar' rankings (coding, data-science, online).
But we currently no means to differentiate the dataframes other than the name we assign to them, which would cause problems if we concatenate them.

In [60]:
def get_row(school_li, ranking):
     return {"date_id": str(datetime.date.today()),
             "ranking": ranking,
             "rank": get_rank(school_li),
             "name": get_name(school_li),
             "rating": get_rating(school_li),
             "stars": get_stars(school_li),
             "reviews": get_reviews(school_li),
             "locations": get_locations(school_li),
             "description": get_description(school_li)}

In [61]:
get_row(school_list_item, "coding")

{'date_id': '2020-01-20',
 'ranking': 'coding',
 'rank': 1,
 'name': 'Flatiron School',
 'rating': 4.73,
 'stars': 4.5,
 'reviews': 426,
 'locations': 'London|Washington|Austin|Houston|Atlanta|New York City|Chicago|San Francisco|Online|Seattle|Denver',
 'description': 'Flatiron School offers immersive on-campus and online programs in software engineering, data science, and UX/UI Design in NYC, Brooklyn, Washington DC, London, Houston, Atlanta, Austin, Seattle, Chicago, Denver, and Online. Flatiron School’s immersive courses aim to launch students into fulfilling careers as software engineers, data scientists, and UX/UI designers through rigorous, market-aligned curricula, and the support of seasoned instructors and personal career coaches. Through test-driven labs and portfolio projects, Flatiron teaches students\xa0to think\xa0and build\xa0like software engineers and data scientists. Flatiron School’s UX/UI Design Immersive includes a client project to give students client-facing expe

In [62]:
def get_ranking(ranking):
    return pd.DataFrame([get_row(school_list_item, ranking) for school_list_item in get_school_list(ranking)])

In [63]:
rankings = ["coding", "data-science", "online"]

get_ranking(rankings[0])

Unnamed: 0,date_id,ranking,rank,name,rating,stars,reviews,locations,description
0,2020-01-20,coding,1,Flatiron School,4.73,4.5,426,London|Washington|Austin|Houston|Atlanta|New Y...,Flatiron School offers immersive on-campus and...
1,2020-01-20,coding,2,Hack Reactor,4.68,4.5,261,Austin|Boulder|Los Angeles|New York City|Phoen...,"[[Founded in 2012, Hack Reactor is a 12-week i..."
2,2020-01-20,coding,3,Codesmith,4.89,5.0,328,Los Angeles|New York City|Online,"Codesmith offers a full-time, 12-week full sta..."
3,2020-01-20,coding,4,App Academy,4.7,4.5,700,New York City|San Francisco|Online,App Academy is an immersive 12-week Ruby-focus...
4,2020-01-20,coding,5,Turing,4.77,4.5,156,Denver,Turing School of Software & Design is a 7-mont...
5,2020-01-20,coding,6,Fullstack Academy,4.91,5.0,254,New York City|Chicago|Online,Fullstack Academy offers full-time and part-ti...
6,2020-01-20,coding,7,General Assembly,4.32,4.5,429,Washington|San Diego|Stamford|Orlando|Detroit|...,General Assembly offers short and long courses...
7,2020-01-20,coding,8,Tech Elevator,4.95,5.0,194,Cincinnati|Pittsburgh|Philadelphia|Cleveland|C...,Tech Elevator is an immersive 14-week coding b...
8,2020-01-20,coding,9,DigitalCrafts,4.88,5.0,194,Houston|Atlanta|Online,DigitalCrafts offers a 16-week full-time and a...
9,2020-01-20,coding,10,Software Guild,4.68,4.5,142,Minneapolis|Louisville|Atlanta|Online,"The Software Guild offers immersive full-time,..."


In [64]:
get_ranking(rankings[1])

Unnamed: 0,date_id,ranking,rank,name,rating,stars,reviews,locations,description
0,2020-01-20,data-science,1,BrainStation,4.66,4.5,315,Vancouver|Toronto|New York City|Online,BrainStation offers full-time and part-time co...
1,2020-01-20,data-science,2,Coding Temple,4.89,5.0,97,Dallas|Online|Washington|Boston|Chicago,"Coding Temple offers 10-week, full-stack codin..."
2,2020-01-20,data-science,3,Divergence Academy,5.0,5.0,20,Dallas,Divergence Academy is a 12-week full-time data...
3,2020-01-20,data-science,4,Flatiron School,4.73,4.5,426,London|Austin|Atlanta|Houston|Seattle|New York...,Flatiron School offers immersive on-campus and...
4,2020-01-20,data-science,5,Galvanize,4.47,4.5,177,Boulder|Austin|Seattle|New York City|Los Angel...,Galvanize offers a 13-week full-time and a 26-...
5,2020-01-20,data-science,6,General Assembly,4.32,4.5,429,Sydney|Austin|Atlanta|Singapore|Melbourne|Dall...,General Assembly offers short and long courses...
6,2020-01-20,data-science,7,Metis,4.89,5.0,92,Seattle|New York City|Online|Chicago|San Franc...,Metis offers data science training via 12-week...
7,2020-01-20,data-science,8,NYC Data Science Academy,4.83,5.0,281,New York City|Online,NYC Data Science Academy offers 12-week data s...
8,2020-01-20,data-science,9,Principal Analytics Prep,4.88,5.0,16,New York City,"Principal Analytics Prep offers a 12-week, ful..."
9,2020-01-20,data-science,10,Propulsion Academy,5.0,5.0,42,Zurich,Propulsion Academy offers 12-week full-time bo...
