## Sproj Board Scheduler, Spring 2021

New version, Pandas-based

A script that to read faculty and students availability for the boards week, and schedule all boards, accounting for their target composition, and student availability.

All of the data used by this script is sensitive, and so has to be stored outside of git.

Data sources:
* Student availability comes from BIP (schedules for all majors, downloaded as one report, saved to pdf by Craig, then from pdf to txt by me)
* Faculty availability comes from an csv file, indicating times when their availability switches

### TODOs

* Make sure sprojes are only starting Friday

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import random
from IPython.display import HTML, display

# Set constants and prepare time slots

In [None]:
hideNames = 0 # set 0 for troubleshooting, set 1 before githubbing

folder_name = '../../data/boards2021s/' # Update as necessary
days = ['May-5-2021', 'May-6-2021', 'May-7-2021', 'May-10-2021', 'May-11-2021']

In [None]:
# Prepare time axis for the availability matrix
temp = []
for day in days:
    for hour in range(8,19):
        stamp = day + ' ' + str(hour) + ':00'
        temp.append(pd.to_datetime(stamp))
df_avail = pd.DataFrame({'slot':temp})
#df_avail.head(20)

# Load data

In [None]:
# Senior boards composition
composition = pd.read_csv(folder_name + 'senior_boards.csv') # Board composition
composition['kind'] = 'sproj'

faculty_names = (composition['advisor']
                 .append(composition['member2'])
                 .append(composition['member3'])                                  
                 .dropna()
                 .unique()
                 .tolist()
                )

for i in range(composition.shape[0]):    
    # Remove comma and put first name first, and last name - last
    composition.loc[i,'student'] = ' '.join(composition.loc[i,'student'].split(', ')[::-1])
    
# print(' '.join(faculty_names))

# Moderation boards composition
moderations = pd.read_csv(folder_name + 'moderation_boards.csv') # Board composition
moderations = moderations.drop(['advisors', 'paper'], axis=1)
moderations['kind'] = 'moderation'

composition = pd.concat((composition,moderations), axis=0).reset_index()
(composition.advisor
 .append(composition.member2)
 .append(composition.member3)
 .value_counts()
)

In [None]:
# Read the official faculty list and create columns, marking off universal times

# Times to be blocked for everyone:
block_times = np.array([['May-05-2021 12p', 'May-05-2021 2p'],
                        ['May-06-2021 12p', 'May-06-2021 1p'],
                        ['May-07-2021 12p', 'May-07-2021 1p'],
                        ['May-10-2021 12p', 'May-10-2021 1p'],
                        ['May-11-2021 12p', 'May-11-2021 1p']
                       ])
# Don't plan sprojes before this date:
first_day_of_sprojes = 'May-07-2021'

df_fac = pd.read_csv(folder_name + 'faculty.csv')
for i in range(df_fac.shape[0]):
    ser = df_fac.iloc[i]
    df_avail[ser.first_name] = True
    for j in range(block_times.shape[0]):        
        df_avail.loc[(df_avail.slot >= pd.to_datetime(block_times[j,0])) &
                     (df_avail.slot < pd.to_datetime(block_times[j,1])), ser.first_name] = False
        
def plot_avail(df):
    plt.imshow(df.iloc[:,1:].values.T*1, cmap='gray')
    
# Visualize
plot_avail(df_avail)

In [None]:
# Read and process faculty availability
table = pd.read_csv(folder_name + 'faculty_availability.csv')
for i in range(table.shape[0]):
    if table.loc[i,'flag']=='in':
        continue
    start = pd.to_datetime(table.loc[i,'day'] + ' ' + table.loc[i,'time'])
    if (i == table.shape[0]-1 or 
        table.loc[i, 'name'] != table.loc[i+1, 'name'] or
        table.loc[i, 'day'] != table.loc[i+1, 'day']):
        end = pd.to_datetime(table.loc[i,'day'] + ' ' + '11p')
    else:
        end = pd.to_datetime(table.loc[i,'day'] + ' ' + table.loc[i+1,'time'])
    name = table.loc[i, 'name']    
    ind = (df_avail.slot >= start) & (df_avail.slot < end)
    #if name=='Gabriel':
    #    print(start,end)
    #    print(df_avail.slot[ind])
    df_avail.loc[ind, name] = False

# Show
plot_avail(df_avail)

# Faculty by their availability
print('Scheduling slots offered by each faculty:')
print("\n".join([f"{j}: {i}" for i,j in 
                 sorted([(sum(df_avail[name]), name) for name in df_avail.columns[1:].tolist()], reverse=True)]))

In [None]:
# Read and process student availability
altnames = pd.read_csv(folder_name + 'altnames.csv')
schedule = pd.DataFrame(columns=['name', 'day', 'start', 'finish'])
dayletters = {'M','T','W','Th','F'}
with open(folder_name + 'student_schedules.txt', 'r') as file:    
    current_student = ''
    while True:
        line = file.readline()        
        if not line:
            break
        if len(line)<3: # Empty line
            continue
        s = line.split(' ')
        if s[0]=='Student:':
            lim = [i for i in range(len(s)) if s[i]=='--'][0] # Find where the name ends
            name = ' '.join(s[1:lim])
            if name in altnames.oldname.values:
                name = altnames.loc[altnames.oldname==name, 'name'].values[0]                        
            current_student = name
            print('.', end='')
            #print(name, end=" | ")
        else: # Course
            days = []
            is_left_time = True
            s = line.replace('-',' ').split(' ') # To avoid problems with pm/am format they use            
            if s[0] not in dayletters: # If it's a couse row, and not just a time row
                program = s[0] # Program code, like BIO or CHEM
                course_n = s[1] # Number in the course name, like 101 or something
            for ci in range(len(s)):
                c = s[ci]
                if c in dayletters: # Day of the week
                    days.append(c)
                elif c in {'am', 'pm', 'am-', 'pm-'}:                                        
                    if is_left_time:                        
                        left_time = s[ci-1]+' '+c
                        is_left_time = False                        
                    else:
                        right_time = s[ci-1]+' '+c            
            for day in days:
                schedule = schedule.append({'name':current_student, 'day':day, 
                                            'program':program, 'course_n':course_n,
                                           'start':left_time, 'finish':right_time}, 
                                           ignore_index=True)
                
# Go from literals (like "12 pm") to datetime. 
# The date doesn't make sense here (it defaults to current date), only time.
schedule.start = pd.to_datetime(schedule.start)
schedule.finish = pd.to_datetime(schedule.finish)

In [None]:
# Missed students
student_list = [s for s in composition.student.values if s not in schedule.name.values]
print('Missing students:', student_list)

In [None]:
# Add students to availability table

day_dict = {'M':0, 'T':1, 'W':2, 'Th':3, 'F':4, 'S':5} # That's how pandas.weekday works
for name in composition.student:
    df_avail[name] = True
    if name not in schedule.name.unique():
        print(f"No schedule for student {name}; assuming full availability.")
    else:
        classes = schedule.query("name==@name").reset_index()
        for i in range(classes.shape[0]):
            if classes.loc[i, 'program'] != 'BIO':
                daynum = day_dict[classes.loc[i, 'day']]
                df_avail.loc[(df_avail.slot.dt.weekday==daynum) &
                             (df_avail.slot.dt.hour >= classes.loc[i, 'start'].hour) &
                             (df_avail.slot.dt.hour < classes.loc[i, 'finish'].ceil('H').hour),
                             name
                            ] = False

plot_avail(df_avail)
plt.xlabel('Time slots');
plt.ylabel('Students and faculty');

# Calculate board availability

In [None]:
df_boards = pd.DataFrame({'slot': df_avail.slot})
for i in range(composition.shape[0]):
    name1 = composition.loc[i,'advisor']
    name2 = composition.loc[i,'member2']
    name3 = composition.loc[i,'member3']    
    student = composition.loc[i,'student']
    df_boards[student] = (df_avail.loc[:, student] &
                                               df_avail.loc[:, name1] &
                                               df_avail.loc[:, name2] &
                                               df_avail.loc[:, name3]
                                              )
    if composition.loc[i, 'kind'] == 'sproj':
        df_boards.loc[df_boards.slot < pd.to_datetime(first_day_of_sprojes),
                      student] = False

plot_avail(df_boards)
plt.xlabel('Possible 1-hour time slots');
plt.ylabel('Boards');

In [None]:
def reorder(df):
    # Sort boards by toughness
    options = np.sum(df.iloc[:,1:].values.astype(int), axis=0)
    df = df.iloc[:,[0]+[i+1 for i in np.argsort(options)]]
    return df

df_boards = reorder(df_boards)    
plot_avail(df_boards)
plt.xlabel('Possible 1-hour time slots');
plt.ylabel('Boards');

In [None]:
# A helping routine for troubleshooting of individual tricky boards
def show_composition(name):
    # Shows possibilities for one board
    view = composition.query("student.str.contains(@name)", engine='python')
    #print(view)
    list_of_participants = view.loc[:, ['student', 'advisor', 'member2', 'member3']].values.tolist()[0]
    print(list_of_participants)
    print(df_avail.loc[:, ['slot'] + list_of_participants])    
    
# show_composition('John Smith')

# Actual scheduling

In [None]:
def book(df_boards, dfc, name, slot, value=0):
    # A helper function, to make notation simpler    
    name1 = dfc.loc[dfc.student==name,'advisor'].values[0]
    name2 = dfc.loc[dfc.student==name,'member2'].values[0]
    name3 = dfc.loc[dfc.student==name,'member3'].values[0]
    ind = ((dfc.advisor==name1) | (dfc.advisor==name2) | (dfc.advisor==name3) | 
           (dfc.member2==name1) | (dfc.member2==name2) | (dfc.member2==name3) | 
           (dfc.member3==name2) | (dfc.member3==name2) | (dfc.member3==name3))
    name_list = dfc.loc[ind, 'student']
    for affected in name_list:
        df_boards.loc[df_boards.slot==slot, affected] = value
    return df

df = df_boards.copy() # Make a copy
df_result = pd.DataFrame({'slot':[], 'name':[]})
for i in range(1,df.shape[1]): # For every board, sorted by toughness
    name = df.columns[i]
    ind = np.where(df.iloc[:,i].values)[0]
    # print(ind)    
    if len(ind)==0:
        print(f"Cannot book a board for {name}")
    else:
        ind = ind[0] # Take first
    slot = df.slot[ind]    
    df_result = df_result.append({'slot':slot, 'name':name}, ignore_index=True)
    # print(f"Booking {name} for {slot}")
    df = book(df, composition, name, slot)
    if i<df.shape[1]-1: # Next exists
        df = pd.concat((df.iloc[:, :(i+1)], reorder(df.iloc[:, (i+1):])), axis=1)

# df_result

In [None]:
# Finalize dataset

df_result = (df_result
             .sort_values('slot')
             .merge(composition, how='left', left_on='name', right_on='student')
             .drop(['index'], axis=1)
            )
df_result

# Old code

In [None]:
# Output table

ind = [min([i for i in range(len(grid)) if grid[i]==b.time]) for b in boards]
ind = [j for (i,j) in sorted(zip(ind,range(len(ind))))]
boards = [boards[i] for i in ind]

def usTime(time):
    if time>12:
        return ("%dp" % (time-12))
    else:
        return ("%da" % (time))

def printboard(b,mode="none"):    
    if mode=="none":
        print("%22s\t%12s\t%s\t%d\t" % (b.student,b.type,dayName[b.time[0]],b.time[1]),end='')
        for fn in b.members:
            print("%10s\t" % (fn),end='')
        print()
    elif mode=="tabs":
        if not hideNames:
            #print("%s\t%s\t%s\t%s\t%s\t" % (b.student,b.email,b.type,dayName[b.time[0]],usTime(b.time[1])),end='')
            #print("%s\t%s\t%s\t" % (b.student,dayName[b.time[0]],usTime(b.time[1])),end='')
            print(f"{b.student:21} {dayName[b.time[0]]}  {usTime(b.time[1]):3}  ",end='')
        else:
            print("%s\t%s\t%s\t%s\t%s\t" % (b.student,"nope@nope.edu",b.type,dayName[b.time[0]],usTime(b.time[1])),end='')
        for fn in b.members:
            print(f"{fn:12}",end='')
        print()
    elif mode=="html":
        s = ''
        s += "<tr><td>%s</td><td>%s</td><td>%s %d</td><td>" % (b.student,b.type,dayName[b.time[0]],b.time[1])
        for fn in b.members:
            s += "%s " % (fn)
        s += "</td></tr>"
        display(HTML(s))

for b in boards:
    printboard(b,mode="tabs")

In [None]:
#Sort by faculty:

for faculty_name in sorted(faculty_names): # Alphabetize
    for f in faculty:
        if f.name==faculty_name:
            break
    print(f.name)
    for b in boards:
        if f.name in b.members:
            printboard(b,mode="tabs")
    print()

In [None]:
# Generate Letters

if False:
    for b in boards:
        profs = ''
        for i in range(len(b.members)):
            if i == len(b.members)-1:
                if len(b.members)==2:
                    profs += ' and '
                else:
                    profs += ', and '
            elif i>0:
                profs += ', '
            profs += b.members[i]
        print(f"""Dear {b.student.split(' ')[0]},

    Your Biology board with professors {profs} is scheduled next week, on {dayName[b.time[0]]} at {usTime(b.time[1]):3}.

    If this time doesn't work for you please let me know!

    I picked this time taking into account your profs availability, and your course on BIP.
    All BIO courses except Biosem will be cancelled next week, but I'm assuming that 
    other courses could be taking place, so I tried to avoid them). 
    At some point your advisor will send you a Zoom link for the meeting.

    Thanks! And good luck finishing the semester!!""")    
        print('------------------')