In [8]:
from sectiontally import SectionTally
from tqdm.autonotebook import tqdm
from bs4 import BeautifulSoup
import requests

In [4]:
st = SectionTally(term='Spring 2025')
df = st.df
subjs = df['Subj'].unique()
courses = {}
for subj in subjs:
    courses[subj] = df[df['Subj'] == subj]['Crse'].unique()

In [6]:
catalog = {}
for subj in tqdm(courses):
    for crse in courses[subj]:
        response = requests.post(url='https://banner9.rowan.edu/ords/ssb/bwckctlg.p_disp_course_detail',
            data= {'cat_term_in': st.term,
                    'subj_code_in': subj,
                    'crse_numb_in': crse})

        assert (response.status_code == 200), f"error: {response.status_code}, unable to find course"
        
        catalog[f'{subj} {crse}'] = response.content

100%|██████████| 145/145 [04:59<00:00,  2.07s/it]


In [9]:
import re

class CourseExtractor():
    def __init__(self, soup: BeautifulSoup) -> None:
        self.soup = soup
        
    def extract_preqs(self) -> str:
            """
                extracts prerequisites from Rowan's detailed course information website
                
                Args:
                    soup: BeautifulSoup of Rowan course information HTML
                Returns:
                    preqs: Preq parser
            """
            preq_head = self.soup.find('span', 'fieldlabeltext', string=re.compile('Prerequisites', re.IGNORECASE))
            if preq_head == None:
                return None

            assert (preq_head.next_siblings != None), "nothing found after 'Prerequisites: '"
                
            res = []
            prev_tag = ''
            for sibling in preq_head.next_siblings:
                if sibling == '\n':
                    continue

                if (sibling.name == 'br') and (prev_tag == 'br'):
                    break

                prev_tag = sibling.name

                s = sibling.string
                if s != None:
                    res.append(s)

            return ''.join(res)
            # try:
            #     return PreqParser(''.join(res))
            # except:
            #     return ''.join(res)

    def extract_desc(self) -> str:
        _found = self.soup.find('td', 'ntdefault')
        if not _found:
            return None
        _found = _found.findNext(string=True)
        if not _found or _found == '\n':
            return None
        return _found
    def extract_title(self) -> str:
        _found = self.soup.find('td', 'nttitle')
        if not _found:
            return None
        return _found.string

    def extract_credits(self) -> str:
        # ensures it is credits
        _match = re.search(r'(\d{1}\.\d{,4}) (Credit)', self.soup.text)
        if _match:
            return _match.group(1)
        return None

In [14]:
catalog_dict = []

for elm in tqdm(catalog):
    soup = BeautifulSoup(catalog[elm], features="html.parser")
    course = CourseExtractor(soup)
    subj, crse = elm.split()
    catalog_dict.append({
        "subj": subj,
        "crse": crse,
        "title": course.extract_title(),
        "desc": course.extract_desc(),
        "preqs": course.extract_preqs(),
        "creds": course.extract_credits(),
    })

100%|██████████| 2089/2089 [00:09<00:00, 225.42it/s]


In [16]:
import json

with open('catalog.json', 'w') as f:
    json.dump(catalog_dict, f)