<a href="https://colab.research.google.com/github/pbeens/python/blob/master/OISE_AQ_ABQ_Courses.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This program scans all the URLs to compile a list of all AQ/ABQ courses offered by OISE, then scans each of those courses to see if they are offered in the prescribed term (see # global variables). The name and URL of each course offered that term is then stored locally in a webpage. 

GitHub URL: https://github.com/pbeens/python/blob/master/OISE_AQ_ABQ_Courses.ipynb

Colab URL: https://colab.research.google.com/drive/18DxRzxTiDYHEOQ6ZlE4qqQO8C0_2r0t-

In [None]:
# imports
from bs4 import BeautifulSoup
import urllib.request

In [None]:
# global variable(s)
urls = ['https://cpl.oise.utoronto.ca/program_certificate/abq-primary-junior/',
        'https://cpl.oise.utoronto.ca/program_certificate/abq-intermediate/',
        'https://cpl.oise.utoronto.ca/program_certificate/abq-senior/',
        'https://cpl.oise.utoronto.ca/program_certificate/one-session-additional-qualifications/',
        'https://cpl.oise.utoronto.ca/program_certificate/three-session-additional-qualifications/',
        'https://cpl.oise.utoronto.ca/program_certificate/honour-specialist/',
        'https://cpl.oise.utoronto.ca/program_certificate/technological-education/']
term = '2021 Late Summer'

In [None]:
# grab each URL
all_courses = []
for url in urls:
  print(f'Grabbing courses from {url}... ')
  html_page = urllib.request.urlopen(url)
  soup = BeautifulSoup(html_page, "html.parser")
  # find all the links with /course/ in the link
  for link in soup.findAll('a'):
    s = str(link.get('href'))
    if s.find('/course/') > 1:
      all_courses.append(s)
all_courses.sort()
print('Done.')

In [None]:
# warm fuzzy feeling that it grabbed all the courses
for course in all_courses:
  print(course)

In [None]:
# inspect each page for course date (Late Summer 2021 as an example)
term_courses = {}
for course in all_courses:
  print(f'Processing {course}...')
  html_page = urllib.request.urlopen(course)
  soup = BeautifulSoup(html_page, "html.parser")
  # need to do some magic to find the term text
  divs = soup.find_all('div', {'class':'grid--auto'}) # where the term is stored
  for div in divs:
    if len(div.text) > 0: # skip the empty ones
      if div.text == term:
        # clean up the title (course name) for use in the HTML file
        title = str(soup.title) \
          .replace('<title>','') \
          .replace(' - OISE Continuing and Professional Learning</title>','')
        print(f'{term}: {title}') # tell us which courses were found
        term_courses[title] = course # add to dict of term_courses
        break # once found we can move on
print('Done.')

In [None]:
# test section to test the term_courses dict
for (k, v) in term_courses.items():
  print(f'{k}: {v}')

In [None]:
# create html file with desired course listings
file = './courses.html'
with open(file, 'w') as f:
  s = f'''<HTML>\n<HEAD>\n\t<TITLE>{term}</TITLE>\n</HEAD>\n<BODY>\n'''
  f.write(s)
  for k, v in term_courses.items():
    f.write(f'\t<a href="{v}">{k}</a><br>\n')
  s = '''</BODY>\n<HTML>'''
  f.write(s)
f.close()
print(f'{file} created.')