remove multiprocessing-based schedule generation

rosshamish · Mar 10, 2016 · 3d6aaa0 · 3d6aaa0
1 parent d48a66f
commit 3d6aaa0
Showing 1 changed file with 17 additions and 202 deletions.
diff --git a/classtime/brain/scheduling/schedule_generator.py b/classtime/brain/scheduling/schedule_generator.py
@@ -1,5 +1,4 @@
 import collections
-import multiprocessing
 import pycosat
 import itertools
 
@@ -8,18 +7,8 @@
 
 import classtime
 
-import heapq
 from classtime.brain.scheduling.schedule import Schedule
 
-CANDIDATE_POOL_SIZE = 64
-"""Number of schedules to keep in consideration at any one time"""
-
-WORKERS = 16
-"""Maximum number of worker processes to spawn"""
-
-WORKLOAD_SIZE = CANDIDATE_POOL_SIZE / WORKERS
-"""Number of candidate schedules to give to each worker process"""
-
 
 def find_schedules(schedule_params, num_requested):
     """
@@ -46,8 +35,11 @@ def find_schedules(schedule_params, num_requested):
             logging.warning('"courses" not found for electives. q={}'.format(
                 schedule_params))
 
-    schedules = _generate_schedules_sat(cal,
-        term, course_ids, busy_times, electives_groups, preferences)
+    schedules = _generate_schedules_sat(cal, term, course_ids, busy_times, electives_groups, preferences)
+    schedules = _condense_schedules(cal, schedules)
+    schedules = sorted(schedules,
+                       reverse=True,
+                       key=lambda s: s.overall_score())
     if not schedules:
         logging.error('No schedules found for q={}'.format(
             schedule_params))
@@ -87,7 +79,7 @@ def _generate_schedules_sat(cal, term, course_ids, busy_times, electives_groups,
             for component in course
             for section in component
         ]
-        schedules += (_generate_schedules_sat_from_sections(sections, busy_times, preferences))
+        schedules += _generate_schedules_sat_from_sections(sections, busy_times, preferences)
     return schedules
 
 
@@ -127,10 +119,7 @@ def _generate_schedules_sat_from_sections(sections, busy_times, preferences):
                                   preferences=preferences))
         if len(schedules) > 100:
             break
-
-    return sorted(schedules,
-                  reverse=True,
-                  key=lambda s: s.overall_score())
+    return schedules
 
 
 def _build_section_index(components):
@@ -160,202 +149,28 @@ def _conflicts(section_a, section_b, busy_times):
     if section_a.get('course') == section_b.get('course') and \
        section_a.get('component') == section_b.get('component'):
         return True
-    sched = Schedule(busy_times=busy_times)
-    if sched.conflicts(section_a):
+    schedule = Schedule(busy_times=busy_times)
+    if schedule.conflicts(section_a):
         return True
-    sched.add_section(section_a)
-    if sched.conflicts(section_b):
+    schedule.add_section(section_a)
+    if schedule.conflicts(section_b):
         return True
     return False
 
 
-def _generate_schedules(cal, term, course_ids, busy_times, electives_groups, preferences):
-    """Generate a finite number of schedules
-
-    :param int num_requested: maximum number of schedules to return.
-        Upper limit is CANDIDATE_POOL_SIZE.
-        Will only return valid schedules, even if that means returning
-        less than the requested number.
-
-    :returns: the best possible schedules, sorted by ScheduleScorer
-        scoring functions
-    :rtype: list of :ref:`schedule objects <api-schedule-object>`
-    """
-    def _log_scheduling_component(num, component, pace):
-        logging.debug('({symbol}/{num}) Scheduling {name}'.format(
-            symbol=Schedule.SYMBOLS[pace],
-            num=num,
-            name=' '.join(component[0].get('asString').split()[:-1])))
-
-    candidates = [Schedule(busy_times=busy_times, preferences=preferences)]
-
-    current_status = preferences.get('current-status', False)
-    obey_status = preferences.get('obey-status', False)
-
-    candidates = _schedule_mandatory_courses(candidates, cal,
-        term, course_ids, current_status, obey_status,
-        _log_scheduling_component)
-
-    candidates = _schedule_electives(candidates, cal,
-        term, electives_groups, current_status, obey_status,
-        _log_scheduling_component)
-
-    candidates = _condense_schedules(cal, candidates)
-
-    return sorted(candidates,
-        reverse=True,
-        key=lambda sched: sched.overall_score())
-
-def _schedule_mandatory_courses(candidates, cal, term, course_ids, current_status, obey_status, _log):
-    courses_components = cal.course_components(term, course_ids,
-        current_status=current_status)
-    courses_components = sorted(courses_components, key=len)
-    total_components = sum([len(components)
-                            for components in courses_components])
-
-    pace = 0
-    for course in courses_components:
-        for component in course:
-            _log(total_components, component, pace)
-            candidates = _add_component(candidates, component, pace, obey_status)
-            pace += 1
-    return [candidate for candidate in candidates
-            if len(candidate.sections) == pace]
-
-def _schedule_electives(base_candidates, cal, term, electives_groups, current_status, obey_status, _log):
-    if base_candidates:
-        base_pace = len(base_candidates[0].sections)
-    else:
-        base_pace = 0
-
-    electives_course_lists = [electives.get('courses', list())
-                              for electives in electives_groups]
-    if not electives_course_lists:
-        return base_candidates
-
-    completed_schedules = list()
-    for course_list in electives_course_lists:
-        for course in course_list:
-            candidates = base_candidates[:]
-            pace = base_pace
-            for component in cal.course_components(term, course,
-                                 single=True, current_status=current_status):
-                _log(base_pace, component, pace)
-                candidates = _add_component(candidates, component, pace, obey_status)
-                pace += 1
-            candidates = [candidate for candidate in candidates
-                          if len(candidate.sections) == pace]
-            completed_schedules += candidates
-    return completed_schedules
-
-def _add_component(candidates, component, pace, obey_status):
-    """
-    Schedule generation algorithm
-    1. Pick a schedule candidate from the list.
-    2. Pick a section ("A2") for a component ("LAB") of a course ("CHEM")
-      2b. If the section conflicts with the schedule, throw it out
-      2c. Otherwise, add it to the schedule.
-    3. Do 2 for all section options ("A3") for the component ("LAB").
-    4. Do 3 for all components ("LAB") of a course ("CHEM")
-    5. Do 4 for all schedule candidates
-    6. Do battle royale with the schedules. Only keep the best.
-
-    7. Add the next component using (1->6).
-    8. Repeat until all courses are scheduled.
-
-    :param boolean obey_status: if True, do not schedule closed or cancelled
-        sections
-    """
-    def _candidate_battle_royale(candidates, component, pace, obey_status, heap_size, out_q):
-        """Put the `heap_size` best candidates onto the `out_q`
-
-        :param list candidates: candidate schedules
-        :param list component: sections to consider. Exactly one is added to any
-            given schedule.
-        :param int pace: the number of components which should already have been
-            added to a schedule. If a schedule has less than this, it can never
-            be a complete schedule. Therefore, time should not be wasted on it.
-        :param int heap_size: number of candidate schedules which should escape
-            alive
-        :param multiprocessing.Queue out_q: a multiprocessing Queue to put 
-            results onto.
-
-        :returns: the best schedules
-        :rtype: list of schedules
-        """
-        candidates_to_return = list()
-        for candidate in candidates[:]:
-            if _is_hopeless(candidate, pace):
-                continue
-            for section in component:
-                if obey_status and section.get('classStatus', 'X') == 'X':
-                    continue
-                if obey_status and section.get('enrollStatus', 'C') == 'C':
-                    continue
-                if candidate.conflicts(section):
-                    continue
-                _add_candidate(candidates_to_return,
-                    candidate.clone().add_section(section),
-                    heap_size)
-        out_q.put(candidates_to_return)
-        return
-
-    out_q = multiprocessing.Queue()
-    procs = list()
-    for chunk in _chunks(candidates):
-        proc = multiprocessing.Process(
-            target=_candidate_battle_royale,
-            args=(chunk, component, pace, obey_status,
-                  WORKLOAD_SIZE+1, out_q))
-        procs.append(proc)
-        proc.start()
-
-    candidates = list()
-    for _ in range(len(procs)):
-        for candidate in out_q.get():
-            _add_candidate(candidates, candidate, CANDIDATE_POOL_SIZE)
-
-    for proc in procs:
-        proc.join()
-
-    return candidates
-
-def _add_candidate(candidates, candidate, heap_size):
-    if heap_size == float('inf'):
-        heapq.heappush(candidates, candidate)
-    elif len(candidates):
-        discard = heapq.heapreplace(candidates, candidate)
-        if len(candidates) < heap_size:
-            heapq.heappush(candidates, discard)
-    else:
-        heapq.heappush(candidates, candidate)
-
-def _is_hopeless(candidate, sections_chosen):
-    return len(candidate.sections) < sections_chosen
-
 def _condense_schedules(cal, schedules):
     schedules = sorted(schedules,
-        key=lambda sched: (sched.overall_score(), sched.timetable_bitmap))
-
+                       key=lambda s: (s.overall_score(), s.timetable_bitmap))
     lag, lead = 0, 1
     condensed_indices = list()
     while lead < len(schedules):
-        sched, lead_sched = schedules[lag], schedules[lead]
-        if sched.is_similar(lead_sched):
-            more_like_this_id = cal.get_schedule_identifier(lead_sched)
-            sched.more_like_this.append(more_like_this_id)
+        schedule, lead_schedule = schedules[lag], schedules[lead]
+        if schedule.is_similar(lead_schedule):
+            more_like_this_id = cal.get_schedule_identifier(lead_schedule)
+            schedule.more_like_this.append(more_like_this_id)
             condensed_indices.append(lead)
         else:
             lag = lead
         lead += 1
-    return [sched for i, sched in enumerate(schedules)
+    return [s for i, s in enumerate(schedules)
             if i not in condensed_indices]
-
-# http://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks-in-python
-def _chunks(full_list, chunk_size=None):
-    """ Yield successive n-sized chunks from l.
-    """
-    if chunk_size is None:
-        chunk_size = WORKLOAD_SIZE
-    for i in xrange(0, len(full_list), chunk_size):
-        yield full_list[i:i+chunk_size]