## Sproj Board Scheduler, Fall 2020

A script that to read faculty and students availability for the boards week, and schedule all boards, accounting for their target composition, and student availability.

All of the data used by this script is sensitive, and so has to be stored outside of git.

In this version:
* Student availability comes from BIP (schedules for all majors, downloaded as one report)
* Faculty availability comes from an Excel-like table, weirdly transformed

In [None]:
import pandas as pd
import numpy as np
import time
import datetime
import copy
import random
from IPython.display import HTML, display

In [None]:
hideNames = 0 # set 0 for troubleshooting, set 1 before githubbing

### Phase 1: Load and parse the data

In [None]:
# Boards composition
composition = pd.read_csv('../../data/boards2020f/boards.csv') # Board composition
print(composition.shape[0])
composition.columns.tolist()

In [None]:
faculty_names = composition['Advisor'].append(composition['Board2']).append(composition['Board3']).unique().tolist()
faculty_names = [s for s in faculty_names if type(s)==type(':)')] # Remove nans (they are float)
print(faculty_names)

In [None]:
# Faculty availability
temp = pd.read_csv('../../data/boards2020f/faculty.csv')
translation = {'Mike':'Tibbetts', 'Arseny':'Khakhalin', 'Gabriel':'Perron', 'Brooke Jude':'Jude',
              'Katie Tabb':'Tabb', 'Eli':'Dueker', 'Heather':'Bennett', 'Frank':'Scalzo',
              'Kerri-Ann Norton':'Norton', 'Bruce Robertson':'Robertson', 'Cathy':'Collins'}
avail_faculty = pd.melt(temp, id_vars=['Time'], var_name='faculty', value_name='can_do')
for key,value in translation.items():
    ind = (avail_faculty['faculty']==key)
    avail_faculty.loc[ind, 'faculty'] = value
print(avail_faculty['faculty'].unique())

avail_faculty = avail_faculty.rename({'Time':'time'}, axis=1)
avail_faculty['time'] = pd.to_datetime(avail_faculty.time)
dayletters = ['m','t','w','r','f','s','s']
avail_faculty['day'] = pd.to_datetime(avail_faculty.time).dt.weekday.map(lambda i: dayletters[i])
avail_faculty['minute'] = pd.to_datetime(avail_faculty.time).dt.minute
avail_faculty['hour'] = pd.to_datetime(avail_faculty.time).dt.hour
avail_faculty.loc[avail_faculty['can_do']=='YES', 'can_do'] = True
avail_faculty.loc[avail_faculty['can_do']=='NO', 'can_do'] = False

# Executive decisions <----------------------------- may be a stretch
do_drop_30 = True # Drop all 30-min slots
if do_drop_30:
    for i in range(avail_faculty.shape[0]):
        # Slow but should work. If 2nd half of the hour is taken, the entire hour is taken.
        if avail_faculty.loc[i,'minute']==30:
            avail_faculty.loc[i,'can_do'] = avail_faculty.loc[i-1,'can_do']
    avail_faculty = avail_faculty[avail_faculty['minute']==0].drop(columns=['minute'])

    # Only look at the first week
avail_faculty = avail_faculty[avail_faculty['time'] < avail_faculty.loc[0,'time'] + pd.DateOffset(days=7)]

avail_faculty

In [None]:
# Student schedule
altnames = pd.read_csv('../../data/boards2020f/altnames.csv')
schedule = pd.DataFrame(columns=['name', 'day', 'start', 'finish'])
dayletters = {'M','T','W','Th','F'}
with open('../../data/boards2020f/student_schedules.txt', 'r') as file:    
    current_student = ''
    istudent = 0
    while True:
        line = file.readline()
        if not line:
            break
        s = line.split(' ')
        if s[0]=='Student:':
            lim = [i for i in range(len(s)) if s[i]=='--'][0] # Find where the name ends
            name = ' '.join(s[1:lim])
            if name in altnames.oldname.values:
                name = altnames.loc[altnames.oldname==name, 'name'].values[0]            
            current_student = name            
            if current_student in composition.Student.tolist():
                istudent += 1
                print(istudent, end=' ')
        else: # Course
            days = []
            is_left_time = True
            s = line.replace('-',' ').split(' ') # To avoid problems with pm/am format they use
            if s[0] not in dayletters:
                program = s[0] # Like BIO or CHEM
                course_n = s[1] # Number in the course name, like 101 or whatever
            for ci in range(len(s)):
                c = s[ci]
                if c in dayletters: # Day of the week
                    days.append(c)
                elif c in {'am', 'pm', 'am-', 'pm-'}:                                        
                    if is_left_time:                        
                        left_time = s[ci-1]+' '+c
                        is_left_time = False                        
                    else:
                        right_time = s[ci-1]+' '+c
            for day in days:
                schedule = schedule.append({'name':current_student, 'day':day, 
                                            'program':program, 'course_n':course_n,
                                           'start':left_time, 'finish':right_time}, 
                                           ignore_index=True)

In [None]:
schedule.columns.tolist()
# schedule

In [None]:
# Test individual students
#schedule[schedule.name.str[:3]=="Sac"]

In [None]:
# Missed students
student_list = [s for s in composition.Student.values if s not in schedule.name.values]
# student_list

### Phase 2: Create schedules

In [None]:
# Create time grid for one week

days = ['m','t','w','r','f']
dayName = {'m':'Mon','t':'Tue','w':'Wed','r':'Thr','f':'Fri'}
firstDate = datetime.datetime.strptime('Dec 7 2020', "%b %d %Y")
for iday in range(5): # Calculate proper day names
    s = datetime.datetime.strftime(firstDate+datetime.timedelta(days=iday),"%b %d")
    dayName[days[iday]] = dayName[days[iday]] + ', ' + s
    # Annoying facts: 
    # 1) both time and datetime have strftime() method, but the syntax is different (sequence of arguments)
    # 2) in datetime, most useful stuff sits in datetime.datetime, but not all (timedelta doesn't)
    # 3) although both time and datetime has strptime, only one (datetime) works with datetime.timedelta

startTime = 9
endTime = 18
grid = [] # Array of tuples: day (as a letter), hour (as a 24h int number)
for d in days:
    for t in range(startTime,endTime):
        grid = grid + [(d,t)]

print(dayName)
print(grid)

In [None]:
class Faculty:
    def __init__(self,name):
        self.name = name
        self.avail = []
        
    def __str__(self):
        return "%12s \t" % (self.name) + ''.join(['%d' % i for i in self.avail])
    
    def initAvail(self, grid, data):
        """
        data: availability dataframe with columns [time,day,hour,faculty,can_do] where time is really
        a datetime stamp; day is the weekday; hour is the hour; faculty is a name, can_do is bolean.
        """
        self.avail = [1]*len(grid)
        for ig in range(len(grid)):
            vacant = data.loc[(data['faculty']==self.name) & 
                              (data['day']==grid[ig][0]) &
                              (data['hour']==grid[ig][1]), 'can_do']
            if len(vacant)>0:
                vacant = vacant.values[0]
            else:
                vacant = 0                
            if vacant:
                self.avail[ig] = 1
            else:
                self.avail[ig] = 0
            
    def updateAvail(self,grid,g,newVal=0):
        self.avail = [self.avail[i] if grid[i]!=g else newVal for i in range(len(self.avail))]
        
    def book(self,ig):
        self.avail[ig] = 0

In [None]:
# For every faculty, create a faculty object and fill in their availability

faculty = []
for fn in faculty_names:    
    f = Faculty(fn)
    f.initAvail(grid, avail_faculty)
    for d in days:
        f.updateAvail(grid, (d,12))          # Book lunch breaks
        if d=='w':                          # Wed is an advising day, so take it out completely
            for ihou in range(1,25):
                f.updateAvail(grid, (d,ihou), 0)
    faculty.append(f)    

for f in faculty:
    print(f)

In [None]:
# Read boards that are to be hard-set manually (or were already settled)

student_name_length = 21    # Needs to be set manually, to cover only the name, but not the rest
first_date = 7              # Even more silly - the day that corresponds to Monday.
                            # This entire thing needs to be refactored; this is getting ridiculous!
day_letters = ['m','t','w','r','f'] # Was defined before, but just in case

settled = pd.DataFrame(columns=['name', 'day', 'time'])
with open('../../data/boards2020f/settled.txt', 'r') as file:    
    line = ' '
    while line:
        line = file.readline()
        if not line:
            break
        name = line[:student_name_length].strip()
        s = [c for c in line[student_name_length:].split(' ') if c] # Only non-empty tokens
        day = day_letters[int(s[2])-7]
        time = int(s[3][:-1]) + (12 if s[3][-1]=='p' else 0)
        settled = settled.append({'name':name, 'day':day, 'time':time}, ignore_index=True)
        
#settled

In [None]:
# Calculate student availability, and right after that - distribute boards

class Board:
    '''Board object'''
    
    def __init__(self, data): # Creator
        """
        datais a "composition" dataframe with columns
        ['Student', 'Advisor', 'Board2', 'Board3', 'Size']
        """
        self.student = data.Student
        if hideNames:
            self.student = ''.join(random.sample(self.student.lower(),len(self.student)))
        self.members = list(data[['Advisor','Board2','Board3']])[:data.Size] # Ignore 3d member if 2-board
        #self.email = stuff['email'] # email is a better id, but this time it's not in the report
        #self.type = stuff['type']
        self.avail = []  # Placeholder: availability grid
        self.time = []
        
    def __str__(self):
        s = "%20s" % (self.student) + '\t' 
        s += ' '.join([m[:4] for m in self.members]) + '\t' # Shortened version
        s += ''.join(['%d' % i for i in self.avail])
        return s
    
    def initTimes(self, grid, schedule):
        """
        schedule is a schedule table with columns
        ['name', 'day', 'start', 'finish', 'program', 'course_n']
        start and finish are time strings, like '8:30 am' for example.
        """
        data = schedule.loc[schedule.name==self.student]
        self.avail = [1]*len(grid)
        for i in range(data.shape[0]): # For every class
            h = data.iloc[i] # One row (series)
            if (h['program'] != 'BIO') or (h['course_n'] == 208): # We cancel bio classes, EXCEPT BIOSEM this time                
                for ig in range(len(grid)):
                    g = grid[ig]
                    if g[0]==h.day.lower():
                        # Check if classes during this hour:
                        if ((g[1]   >= pd.to_datetime(h.start).hour +  pd.to_datetime(h.start).minute/60 and 
                             g[1]   <  pd.to_datetime(h.finish).hour + pd.to_datetime(h.finish).minute/60) or
                            (g[1]+1 >= pd.to_datetime(h.start).hour +  pd.to_datetime(h.start).minute/60 and 
                             g[1]+1 <  pd.to_datetime(h.finish).hour + pd.to_datetime(h.finish).minute/60)):
                            self.avail[ig] = 0
                                
    def narrowBoardTime(self):
        '''Additional requirements on some types of boards'''
        if self.type=="final": # For final boards, make Monday unavailable, as sprojes are due that day
            for ig in range(len(grid)):
                if grid[ig][0]=='m':
                    self.avail[ig] = 0
    
    def refreshFac(self,faculty):
        '''Filters boards based on current faculty availability'''
        for f in faculty:
            for facname in self.members:
                if f.name==facname:
                    self.avail = [self.avail[i]*f.avail[i] for i in range(len(self.avail))]

# ---- Actually populating boards
boards = []
print('Student availability:')
for ib in range(len(composition)):
    b = Board(composition.loc[ib,])
    b.initTimes(grid, schedule)
    print(b)
    boards.append(b)
    
for b in boards: # Do the board calculation
    # b.narrowBoardTime() # Not implemented!
    b.refreshFac(faculty)
    
# Rearrange from those that are harder to schedule to those that are easier
niceness = [sum(b.avail) for b in boards]
ind = [i for _,i in sorted(zip(niceness,range(len(niceness))))]
boards = [boards[i] for i in ind]

print('\nFull Board availability:')
for b in boards:
    print(b)

In [None]:
# Backup, just so that the cell below could be rerun without ruining the data
bb = copy.deepcopy(boards)
bf = copy.deepcopy(faculty)

In [None]:
# Solve boards

boards = copy.deepcopy(bb)
faculty = copy.deepcopy(bf)

newboards = []
# First let's sort it so that all pre-assigned students grap their places first
for b in boards:
    if b.student in settled.name.values:
        newboards = [b] + newboards        
    else:
        newboards.append(b)
boards = newboards
for b in boards:
    b.refreshFac(faculty)
    if b.student in settled.name.values:
        i = settled.name.eq(b.student).idxmax()
        stamp = (settled.loc[i,'day'], settled.loc[i,'time'])        
        ig = [i for i in range(len(grid)) if grid[i]==stamp][0]
        print('O', end=' ')
    else:
        temp = [i for i in range(len(b.avail)) if (b.avail[i]==1)]
        if len(temp)==0:
            print('Cannot solve the puzzle for this board:')
            print(b)
            break
        ig = min(temp)
        print('>', end=' ')
    b.time = grid[ig]
    b.avail[ig] = 0
    for f in faculty:
        if f.name in b.members:
            f.book(ig)
    print('%s\t%s\t' % (dayName[grid[ig][0]],grid[ig][1]), end='')
    print(b)
    
#for b in boards:
#    print(b)

In [None]:
# Output table

ind = [min([i for i in range(len(grid)) if grid[i]==b.time]) for b in boards]
ind = [j for (i,j) in sorted(zip(ind,range(len(ind))))]
boards = [boards[i] for i in ind]

def usTime(time):
    if time>12:
        return ("%dp" % (time-12))
    else:
        return ("%da" % (time))

def printboard(b,mode="none"):    
    if mode=="none":
        print("%22s\t%12s\t%s\t%d\t" % (b.student,b.type,dayName[b.time[0]],b.time[1]),end='')
        for fn in b.members:
            print("%10s\t" % (fn),end='')
        print()
    elif mode=="tabs":
        if not hideNames:
            #print("%s\t%s\t%s\t%s\t%s\t" % (b.student,b.email,b.type,dayName[b.time[0]],usTime(b.time[1])),end='')
            #print("%s\t%s\t%s\t" % (b.student,dayName[b.time[0]],usTime(b.time[1])),end='')
            print(f"{b.student:21} {dayName[b.time[0]]}  {usTime(b.time[1]):3}  ",end='')
        else:
            print("%s\t%s\t%s\t%s\t%s\t" % (b.student,"nope@nope.edu",b.type,dayName[b.time[0]],usTime(b.time[1])),end='')
        for fn in b.members:
            print(f"{fn:12}",end='')
        print()
    elif mode=="html":
        s = ''
        s += "<tr><td>%s</td><td>%s</td><td>%s %d</td><td>" % (b.student,b.type,dayName[b.time[0]],b.time[1])
        for fn in b.members:
            s += "%s " % (fn)
        s += "</td></tr>"
        display(HTML(s))

for b in boards:
    printboard(b,mode="tabs")

In [None]:
#Sort by faculty:

for faculty_name in sorted(faculty_names): # Alphabetize
    for f in faculty:
        if f.name==faculty_name:
            break
    print(f.name)
    for b in boards:
        if f.name in b.members:
            printboard(b,mode="tabs")
    print()

In [None]:
# Generate Letters

if False:
    for b in boards:
        profs = ''
        for i in range(len(b.members)):
            if i == len(b.members)-1:
                if len(b.members)==2:
                    profs += ' and '
                else:
                    profs += ', and '
            elif i>0:
                profs += ', '
            profs += b.members[i]
        print(f"""Dear {b.student.split(' ')[0]},

    Your Biology board with professors {profs} is scheduled next week, on {dayName[b.time[0]]} at {usTime(b.time[1]):3}.

    If this time doesn't work for you please let me know!

    I picked this time taking into account your profs availability, and your course on BIP.
    All BIO courses except Biosem will be cancelled next week, but I'm assuming that 
    other courses could be taking place, so I tried to avoid them). 
    At some point your advisor will send you a Zoom link for the meeting.

    Thanks! And good luck finishing the semester!!""")    
        print('------------------')