In [1]:
import re

In [8]:
test =  [
    "Mon COMPSCI 2XC3\n- C01\nLecture\n10:30AM -\n11:20AM\nHealth Science\nCentre 1A1\n",
    "Mon COMPSCI 2DB3\n- C01\nLecture\n11:30AM -\n12:20PM\nJohn Hodgins\nEngineer Bldg\n376\n",
    "Mon COMPSCI 2XC3\n- LO3\nLaboratory\n2:30PM - 5:20PM\nBurke Science\nBldg. 249\n",
    "Mon INNOVATE 2C03\n- C01\nLecture\n7:00PM -\n10:00PM\nKenneth Taylor\nHall 109\n",
    "Wed COMPSCI 2DB3\n- C01\nLecture\n11:30AM -\n12:20PM\nJohn Hodgins\nEngineer Bldg\n376\n",
    "Thu COMPSCI 2DB3\n- T02\nTutorial\n9:30AM -\n10:20AM\nA.N. Bourns Bldg\n270\n",
    "Thu COMPSCI 2XC3\n- C01\nLecture\n10:30AM -\n11:20AM\nHealth Science\nCentre 1A1\n",
    "Thu COMPSCI 2SD3\n- C01\nLecture\n11:30AM -\n12:20PM\nHealth Science\nCentre 1A1\n",
    "Thu COMPSCI 2AC3\n-T01\nTutorial\n12:30PM -\n2:20PM\nChester New Hall\nB107\n",
    "Thu COMPSCI 2AC3\n- C01\nLecture\n2:30PM - 3:20PM\nBurke Science\nBldg. 147\n",
    "Fri COMPSCI 2SD3\n- C01\nLecture\n11:30AM -\n12:20PM\nHealth Science\nCentre 1A1\n",
    "Fri COMPSCI 2DB3\n- C01\nLecture\n1:30PM - 2:20PM\nJohn Hodgins\nEngineer Bldg\n376\n",
    "Fri COMPSCI 2AC3\n- C01\nLecture\n2:30PM - 3:20PM\nBurke Science\nBldg. 147\n"
]

In [16]:
def parse_class(text):
    # Clean and normalize
    text = re.sub(r'\s+', ' ', text.strip())
    
    result = {key: '' for key in ['day', 'course', 'class_code', 'class_type', 'start_time', 'end_time', 'location']}
    
    # 1. DAY: First 3-letter at start
    day_match = re.search(r'^(Mon|Tue|Wed|Thu|Fri|Sat|Sun)', text)
    if day_match:
        result['day'] = day_match.group(1)
    
    # 2. COURSE: Letters+digits pattern
    course_match = re.search(r'([A-Z]{4,}\s*[0-9A-Z]{4})', text)
    if course_match:
        result['course'] = course_match.group(1)
    
    # 3. CLASS_CODE: -C01, -T02, TO2
    code_match = re.search(r'-?\s*(C|T|L|c)\d{2}|-?\s*TO\d|-?\s*(C|T|L|c)*(O|o|0)\d{1}|-?\s*(C|T|L|c)*(O|0|o)*(t|a)', text)
    # code_match = re.search(r'-?\s*(C|T|L|c)\d{2}|-?\s*TO\d|-?\s*(C|T|L|c)O\d{1}|-?\s*(C|T|L|c)Ot|-?\s*(C|T|L|c)0t', text)
    if code_match:
        result['class_code'] = code_match.group(0).strip('- ').replace('t', '1').replace('a', '4').upper().replace('O', '0')
    
    # 4. CLASS_TYPE: Known types
    for typ in ['Lecture', 'Tutorial', 'Laboratory', 'Lab']:
        if typ in text and result['class_type'] == '':
            result['class_type'] = typ
            break
    
    # 5. TIMES: Extract start and end times, even if split across lines
    time_matches = re.findall(r'(\d{1,2}:\d{2}\s*(?:AM|PM)?)', text)
    if len(time_matches) >= 2:
        result['start_time'], result['end_time'] = time_matches[:2]
    elif len(time_matches) == 1:
        result['start_time'] = time_matches[0]
    
    # 6. LOCATION: Everything AFTER last time that looks like a room/building
    # Find last time position
    last_time_pos = 0
    for time_match in re.finditer(r'\d{1,2}:\d{2}\s*(?:AM|PM)?', text):
        last_time_pos = time_match.end()
    
    # Take text after last time + has building/room indicators
    if last_time_pos > 0:
        after_times = text[last_time_pos:].strip()
        # Room indicators: 3-digit numbers, Bldg, Centre, Hall, etc.
        if re.search(r'\d{3}|[A-Z]{4,}|\bBldg?\b|\bCentre?\b|\bHall?\b|\bRm?\b', after_times):
            result['location'] = after_times.strip()[:50]

    return result


In [12]:
def is_class_data_complete(class_data):
    """
    Check if a class has all the important data before adding it.

    Args:
        class_data (dict): A dictionary containing class information.

    Returns:
        bool: True if the class has all required fields, False otherwise.
    """
    required_fields = ['day', 'course', 'class_type', 'start_time', 'end_time', 'location']
    
    for field in required_fields:
        if not class_data.get(field):
            return False

    return True

In [17]:
for text in test:
    print(is_class_data_complete(parse_class(text)))
    print(parse_class(text))

True
{'day': 'Mon', 'course': 'COMPSCI 2XC3', 'class_code': 'C01', 'class_type': 'Lecture', 'start_time': '10:30AM', 'end_time': '11:20AM', 'location': 'Health Science Centre 1A1'}
True
{'day': 'Mon', 'course': 'COMPSCI 2DB3', 'class_code': 'C01', 'class_type': 'Lecture', 'start_time': '11:30AM', 'end_time': '12:20PM', 'location': 'John Hodgins Engineer Bldg 376'}
True
{'day': 'Mon', 'course': 'COMPSCI 2XC3', 'class_code': 'L03', 'class_type': 'Laboratory', 'start_time': '2:30PM', 'end_time': '5:20PM', 'location': 'Burke Science Bldg. 249'}
True
{'day': 'Mon', 'course': 'INNOVATE 2C03', 'class_code': 'C03', 'class_type': 'Lecture', 'start_time': '7:00PM', 'end_time': '10:00PM', 'location': 'Kenneth Taylor Hall 109'}
True
{'day': 'Wed', 'course': 'COMPSCI 2DB3', 'class_code': 'C01', 'class_type': 'Lecture', 'start_time': '11:30AM', 'end_time': '12:20PM', 'location': 'John Hodgins Engineer Bldg 376'}
True
{'day': 'Thu', 'course': 'COMPSCI 2DB3', 'class_code': 'T02', 'class_type': 'Tutori