In [662]:
"""
extracting course data from an image of courses timetable
"""

import cv2
import pytesseract

class Course:
    def __init__(self, title, type, start_time, end_time, location, day):
        self.title = title
        self.type = type
        self.start_time = start_time
        self.end_time = end_time
        self.location = location
        self.day = day

    def __repr__(self):
        return f"""
                Title: {self.title}
                Type: {self.type}
                Time: {self.start_time} - {self.end_time}
                Location: {self.location}
                Day: {self.day}\n"""

def get_image(path):
    """
    (string) -> numpy.ndarray
    returns image for given path
    """
    return cv2.imread(path)

def extract_information(image):
    """
    (numpy.ndarray) -> list
    returns list of text extracted from given image
    """
    extracted_info = pytesseract.image_to_string(image).strip().replace("\n\n", "\n")
    extracted_info_list = list(extracted_info.split("\n"))

    for item in extracted_info_list:
        if item == " ":
            extracted_info_list.remove(item)

    return extracted_info_list

def helper_capitalize(text):
    """
    (string) -> string
    proprely capitalize course titles
    """
    capitalized = ''
    for char in text:
        if char.isalpha():
            capitalized += char.upper()
        else:
            capitalized += char

    return capitalized

def get_classes(info_list):
    """
    (list) -> list
    returns list of classes of type Class from given list of raw extracted text
    """
    WEEKDAYS = set(["monday", "tuesday", "wednesday", "thursday", "friday"])
    classes = []
    day = 'Monday'
    i = 0

    while i < len(info_list):
        current = info_list[i]
        if current.lower() in WEEKDAYS:
            day = current

            class_title = info_list[i+2].strip()
            class_type = info_list[i+3]
            time = info_list[i+4]
            class_start_time = time[:6]
            class_end_time = time[8:]
            class_location = info_list[i+5].replace("Location: ", "")

            this_class = Course(helper_capitalize(class_title), class_type, class_start_time, class_end_time, class_location, day)
            classes.append(this_class)
            i += 6

        else:
            class_title = info_list[i].strip()
            class_type = info_list[i+1]
            time = info_list[i+2]
            class_start_time = time[:6]
            class_end_time = time[8:]
            class_location = info_list[i+3].replace("Location: ", "")

            this_class = Course(helper_capitalize(class_title), class_type, class_start_time, class_end_time, class_location, day)
            classes.append(this_class)
            i += 4

    return classes

def get_schedule(path):
    img = get_image(path)
    info_list = extract_information(img)
    classes = get_classes(info_list)

    return classes

In [744]:
from icalendar import Calendar, Event
import datetime
from extract_course_data import get_schedule

PATH = 'data/schedule1.png'

ACADEMIC_TERMS = {"summer": "01/05 - 31/08", "fall": "01/09 - 31/12", "winter": "01/01 - 30/04"}
WEEKDAYS = {"monday": 0, "tuesday": 1, "wednesday": 2, "thursday": 3, "friday": 4}

In [745]:
def get_academic_term(academic_term):
    """
    (string) -> tuple
    returns the start and end dates of the given academic term
    """
    term = ACADEMIC_TERMS[academic_term]
    start = term[0:5]
    end = term[8:]

    return start, end

In [746]:
def next_weekday(d, weekday):
    days_ahead = weekday - d.weekday()
    if days_ahead <= 0: # Target day already happened this week
        days_ahead += 7
    return d + datetime.timedelta(days_ahead)

In [747]:
# self.title = title
# self.type = type
# self.start_time = start_time
# self.end_time = end_time
# self.location = location
# self.day = day
academic_term = 'Summer'
year = 2019
classes = []

In [748]:
classes = get_schedule(PATH)

In [749]:
print(classes)

[
                Title: ELG 2136 - AOO
                Type: Lecture
                Time: 11:30  - 12:50
                Location: N/A
                Day: Monday
, 
                Title: ELG 2911 - DOZ
                Type: Tutorial
                Time: 13:00  - 14:20
                Location: N/A
                Day: Monday
, 
                Title: ELG 2136 -A03
                Type: Tutorial
                Time: 14:30  - 15:50
                Location: N/A
                Day: Monday
, 
                Title: MAT 2377 - ADO
                Type: Lecture
                Time: 10:00  - 11:20
                Location: N/A
                Day: Tuesday
, 
                Title: PHY 2323 - ADO
                Type: Lecture
                Time: 11:30  - 12:50
                Location: N/A
                Day: Tuesday
, 
                Title: SEG 2106 - A00
                Type: Lecture
                Time: 13:00  - 14:20
                Location: N/A
                Day: Tuesday
,

In [750]:
cal = Calendar()
academic_term = academic_term.lower()

In [751]:
course = classes[0]
course


                Title: ELG 2136 - AOO
                Type: Lecture
                Time: 11:30  - 12:50
                Location: N/A
                Day: Monday

In [752]:
term_start, term_end = get_academic_term(academic_term)
print(term_start, term_end)

01/05 31/08


In [753]:
event = Event()

In [754]:
f"{course.title} - {course.type}".strip().replace("  ", " ")

'ELG 2136 - AOO - Lecture'

In [755]:
event.add('summary', f"{course.title} - {course.type}".strip().replace("  ", " "))

In [756]:
term_start_date = datetime.date(year, int(term_start[3:]), int(term_start[0:2]))
print(term_start_date)

2019-05-01


In [757]:
first_start_date = next_weekday(term_start_date, WEEKDAYS[course.day.lower()])
first_start_date

datetime.date(2019, 5, 6)

In [758]:
course.start_time, course.end_time

('11:30 ', '12:50')

In [759]:
first_start_date

datetime.date(2019, 5, 6)

In [760]:
hour, minute = int(course.start_time[:2]), int(course.start_time[3:])

In [761]:
from datetime import time
start_time = time(hour, minute, 0)

In [762]:
start_date = datetime.datetime.combine(first_start_date, start_time)

In [763]:
start_date

datetime.datetime(2019, 5, 6, 11, 30)

In [764]:
hour, minute = int(course.end_time[:2]), int(course.end_time[3:])
hour, minute

(12, 50)

In [765]:
end_time = time(hour, minute, 0)

In [766]:
event.add('dtstart', start_date)

In [767]:
end_date = datetime.datetime.combine(first_start_date, end_time)

In [768]:
event.add('dtend', end_date)

In [769]:
start_date, end_date

(datetime.datetime(2019, 5, 6, 11, 30), datetime.datetime(2019, 5, 6, 12, 50))

In [770]:
term_end

'31/08'

In [771]:
last_date = datetime.date(year, int(term_end[3:]), int(term_end[0:2]))
last_date

datetime.date(2019, 8, 31)

In [772]:
f"FREQ=WEEKLY;BYDAY={course.day};INTERVAL=1;UNTIL={last_date.isoformat()}"
 tzs.add('rrule', {'freq': 'yearly', 'bymonth': 10, 'byday': 'MO'})
 # event.add('rrule', u'FREQ=YEARLY;INTERVAL=1;COUNT=10'



IndentationError: unexpected indent (<ipython-input-772-3fc96c761f66>, line 2)

In [773]:
term_end[3:]

'08'

In [774]:
last_date

datetime.date(2019, 8, 31)

In [775]:
term_end_date

datetime.date(2021, 8, 31)

In [776]:
course_end = datetime.datetime.combine(last_date, end_time)
type(course_end)

datetime.datetime

In [777]:
event.add('rrule', {'freq': 'weekly', 'interval': 1, 'byday': f'{course.day[:2]}', 'until': course_end})

In [778]:
event

VEVENT({'SUMMARY': vText('b'ELG 2136 - AOO - Lecture''), 'DTSTART': <icalendar.prop.vDDDTypes object at 0x12709f950>, 'DTEND': <icalendar.prop.vDDDTypes object at 0x12709c850>, 'RRULE': vRecur({'FREQ': 'weekly', 'INTERVAL': 1, 'BYDAY': 'Mo', 'UNTIL': datetime.datetime(2019, 8, 31, 12, 50)})})

In [779]:
from icalendar import vCalAddress, vText

In [780]:
event['location'] = vText(course.location)

In [781]:
cal.add_component(event)

In [782]:
with open(f'ex.ics', 'wb') as ics:
        ics.write(cal.to_ical()) 