# **App Run**

In [None]:
import json
from google.colab import files
from bs4 import BeautifulSoup

In [None]:
class CourseScheduler:

    def __init__(self, courses, week_days, time_slots):
        self.courses = courses
        self.week_days = week_days
        self.time_slots = time_slots
        self.database = self.create_db()

    def create_db(self):
        return {course: [] for course in self.courses}

    def find_text(self, data, element_type):
        elements = data.find_all(element_type)
        return [element.text for element in elements if element.text != '']

    def create_course(self, name):
        return [{"name_course": name, "week_day": {}}]

    def fill_info_course(self, week_day, schedule, data):
        return {week_day: {schedule: {"Unidade": data[0], "Sala": data[1], "Prof(a)": data[2]}}}

    def process_table_row(self, course_type, name, timetable, info, index):
        try:
            skip = False

            for i, course in enumerate(self.database[course_type]):
                if name == course["name_course"]:
                    self.database[course_type][i]["week_day"].update(
                        self.fill_info_course(self.week_days[index-1], timetable, [info[7], info[1], info[2]])
                    )
                    skip = True
                    break

            if not skip:
                self.database[course_type] += self.create_course(name)
                self.database[course_type][-1]["week_day"].update(
                    self.fill_info_course(self.week_days[index-1], timetable, [info[7], info[1], info[2]])
                )

        except Exception as e:
            print(f"Erro ao processar uma coluna: {e}")
            print(f'{course_type} | {name} | {timetable} | {info[6]} | {info[0]} | {info[1]} \n')

            for i, course in enumerate(self.database[course_type]):
                if name == course["name_course"]:
                    self.database[course_type][i]["week_day"].update(
                        self.fill_info_course(self.week_days[index-1], timetable, [info[6], info[0], info[1]])
                    )

    def process_html_file(self, html_file):
        soup = BeautifulSoup(html_file, 'html.parser')
        tables = soup.find_all('table')

        for table in tables:
            rows = table.find_all('tr', class_='c11')

            for row in rows:
                columns = row.find_all('td')

                for index, column in enumerate(columns):
                    if index == 0:
                        continue

                    span = self.find_text(column, 'span')
                    if not span:
                        continue

                    course_type = span[0][:2]
                    name = span[1]
                    timetable = columns[0].text
                    info = self.find_text(table, 'span')

                    self.process_table_row(course_type, name, timetable, info, index)

    def save_to_json(self, filename='data.json'):
        with open(filename, 'w') as f:
            json.dump(self.database, f)

In [None]:
COURSE = ['CC', 'ES', 'EC', 'EP', 'EM']
WEEK_DAY = ['Segunda', 'Terça', 'Quarta', 'Quinta', 'Sexta']
SCHEDULES = ['8:00-10:00h', '10:00-12:00h', '13:30-15:30h', '15:30-17:30h']

In [None]:
uploaded = files.upload()
name_file = list(uploaded.keys())[0]

with open(name_file) as archive:
    html_file = archive.read()

In [None]:
scheduler = CourseScheduler(COURSE, WEEK_DAY, SCHEDULES)
scheduler.process_html_file(html_file)
scheduler.save_to_json()