## Sproj Board Scheduler, Spring 2021

New version, Pandas-based

A script that to read faculty and students availability for the boards week, and schedule all boards, accounting for their target composition, and student availability.

All of the data used by this script is sensitive, and so has to be stored outside of git.

Data sources:
* Student availability comes from BIP (schedules for all majors, downloaded as one report, saved to pdf by Craig, then from pdf to txt by me)
* Faculty availability comes from an csv file, indicating times when their availability switches

### TODOs

* Make sure sprojes are only starting Friday

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import random
from IPython.display import HTML, display

# Set constants and prepare time slots

In [None]:
hideNames = 0 # set 0 for troubleshooting, set 1 before githubbing

folder_name = '../../data/boards2021s/' # Update as necessary
days = ['May-5-2021', 'May-6-2021', 'May-7-2021', 'May-10-2021', 'May-11-2021']

In [None]:
# Prepare time axis for the availability matrix
temp = []
for day in days:
    for hour in range(8,18):
        stamp = day + ' ' + str(hour) + ':00'
        temp.append(pd.to_datetime(stamp))
df_avail = pd.DataFrame({'slot':temp})
#df_avail.head(20)

# Load data

In [None]:
# Senior boards composition
composition = pd.read_csv(folder_name + 'senior_boards.csv') # Board composition
composition['kind'] = 'sproj'

faculty_names = (composition['advisor']
                 .append(composition['member2'])
                 .append(composition['member3'])                                  
                 .dropna()
                 .unique()
                 .tolist()
                )

for i in range(composition.shape[0]):    
    # Remove comma and put first name first, and last name - last
    composition.loc[i,'student'] = ' '.join(composition.loc[i,'student'].split(', ')[::-1])
    
# print(' '.join(faculty_names))

# Moderation boards composition
moderations = pd.read_csv(folder_name + 'moderation_boards.csv') # Board composition
moderations = moderations.drop(['advisors', 'paper'], axis=1)
moderations['kind'] = 'moderation'

composition = pd.concat((composition,moderations), axis=0).reset_index()
(composition.advisor
 .append(composition.member2)
 .append(composition.member3)
 .value_counts()
)

In [None]:
# Read the official faculty list and create columns, marking off universal times

# Times to be blocked for everyone:
block_times = np.array([['May-05-2021 12p', 'May-05-2021 2p'],
                        ['May-06-2021 12p', 'May-06-2021 1p'],
                        ['May-07-2021 12p', 'May-07-2021 1p'],
                        ['May-10-2021 12p', 'May-10-2021 1p'],
                        ['May-11-2021 12p', 'May-11-2021 1p']
                       ])
# Don't plan sprojes before this date:
first_day_of_sprojes = 'May-07-2021'

df_fac = pd.read_csv(folder_name + 'faculty.csv')
for i in range(df_fac.shape[0]):
    ser = df_fac.iloc[i]
    df_avail[ser.first_name] = True
    for j in range(block_times.shape[0]):        
        df_avail.loc[(df_avail.slot >= pd.to_datetime(block_times[j,0])) &
                     (df_avail.slot < pd.to_datetime(block_times[j,1])), ser.first_name] = False
        
def plot_avail(df):
    plt.imshow(df.iloc[:,1:].values.T*1, cmap='gray')
    
# Visualize
plot_avail(df_avail)

In [None]:
# Read and process faculty availability
table = pd.read_csv(folder_name + 'faculty_availability.csv')
for i in range(table.shape[0]):
    if table.loc[i,'flag']=='in':
        continue
    start = pd.to_datetime(table.loc[i,'day'] + ' ' + table.loc[i,'time'])
    if (i == table.shape[0]-1 or 
        table.loc[i, 'name'] != table.loc[i+1, 'name'] or
        table.loc[i, 'day'] != table.loc[i+1, 'day']):
        end = pd.to_datetime(table.loc[i,'day'] + ' ' + '11p')
    else:
        end = pd.to_datetime(table.loc[i,'day'] + ' ' + table.loc[i+1,'time'])
    name = table.loc[i, 'name']    
    ind = (df_avail.slot >= start) & (df_avail.slot < end)
    #if name=='Gabriel':
    #    print(start,end)
    #    print(df_avail.slot[ind])
    df_avail.loc[ind, name] = False

# Show
plot_avail(df_avail)

# Faculty by their availability
print('Scheduling slots offered by each faculty:')
print("\n".join([f"{j}: {i}" for i,j in 
                 sorted([(sum(df_avail[name]), name) for name in df_avail.columns[1:].tolist()], reverse=True)]))

In [None]:
# Availability of one faculty
# print('\n'.join([str(v) for v in df_avail.loc[df_avail['Mike'], 'slot']]))

In [None]:
# Read and process student availability
altnames = pd.read_csv(folder_name + 'altnames.csv')
schedule = pd.DataFrame(columns=['name', 'day', 'start', 'finish'])
dayletters = {'M','T','W','Th','F'}
with open(folder_name + 'student_schedules.txt', 'r') as file:    
    current_student = ''
    while True:
        line = file.readline()        
        if not line:
            break
        if len(line)<3: # Empty line
            continue
        s = line.replace('\t',' ').split(' ') # Remove tabs
        s = ' '.join(s).split(' ') # Remove repated spaces and split again
        if s[0]=='Student:':
            lim = [i for i in range(len(s)) if s[i]=='--'][0] # Find where the name ends
            name = ' '.join(s[1:lim])
            if name in altnames.oldname.values:
                name = altnames.loc[altnames.oldname==name, 'name'].values[0]                        
            current_student = name
            print('.', end='')
            #print(name, end=" | ")
        else: # Course
            days = []
            is_left_time = True
            s = line.replace('-',' ').split(' ') # To avoid problems with pm/am format they use            
            if s[0] not in dayletters: # If it's a couse row, and not just a time row
                program = s[0] # Program code, like BIO or CHEM
                course_n = s[1] # Number in the course name, like 101 or something
            for ci in range(len(s)):
                c = s[ci]
                if c in dayletters: # Day of the week
                    days.append(c)
                elif c in {'am', 'pm', 'am-', 'pm-'}:                                        
                    if is_left_time:                        
                        left_time = s[ci-1]+' '+c
                        is_left_time = False                        
                    else:
                        right_time = s[ci-1]+' '+c            
            for day in days:
                schedule = schedule.append({'name':current_student, 'day':day, 
                                            'program':program, 'course_n':course_n,
                                           'start':left_time, 'finish':right_time}, 
                                           ignore_index=True)
                
# Go from literals (like "12 pm") to datetime. 
# The date doesn't make sense here (it defaults to current date), only time.
schedule.start = pd.to_datetime(schedule.start)
schedule.finish = pd.to_datetime(schedule.finish)

In [None]:
# Check one student's classes
# schedule.query("name.str.contains('arx')", engine='python')

In [None]:
# Missed students
student_list = [s for s in composition.student.values if s not in schedule.name.values]
print('Missing students:', student_list)

In [None]:
# Add students to availability table

day_dict = {'M':0, 'T':1, 'W':2, 'Th':3, 'F':4, 'S':5} # That's how pandas.weekday works
for name in composition.student:
    df_avail[name] = True
    if name not in schedule.name.unique():
        print(f"No schedule for student {name}; assuming full availability.")
    else:
        classes = schedule.query("name==@name").reset_index()
        for i in range(classes.shape[0]):
            if classes.loc[i, 'program'] != 'BIO':
                daynum = day_dict[classes.loc[i, 'day']]
                df_avail.loc[(df_avail.slot.dt.weekday==daynum) &
                             (df_avail.slot.dt.hour >= classes.loc[i, 'start'].hour) &
                             (df_avail.slot.dt.hour < classes.loc[i, 'finish'].ceil('H').hour),
                             name
                            ] = False

plot_avail(df_avail)
plt.xlabel('Time slots');
plt.ylabel('Students and faculty');

# Calculate board availability

In [None]:
df_boards = pd.DataFrame({'slot': df_avail.slot})
for i in range(composition.shape[0]):
    name1 = composition.loc[i,'advisor']
    name2 = composition.loc[i,'member2']
    name3 = composition.loc[i,'member3']    
    student = composition.loc[i,'student']
    df_boards[student] = (df_avail.loc[:, student] &
                                               df_avail.loc[:, name1] &
                                               df_avail.loc[:, name2] &
                                               df_avail.loc[:, name3]
                                              )
    if composition.loc[i, 'kind'] == 'sproj':
        df_boards.loc[df_boards.slot < pd.to_datetime(first_day_of_sprojes),
                      student] = False

plot_avail(df_boards)
plt.xlabel('Possible 1-hour time slots');
plt.ylabel('Boards');

In [None]:
def reorder(df):
    # Sort boards by toughness
    options = np.sum(df.iloc[:,1:].values.astype(int), axis=0)
    df = df.iloc[:,[0]+[i+1 for i in np.argsort(options)]]
    return df

df_boards = reorder(df_boards)    
plot_avail(df_boards)
plt.xlabel('Possible 1-hour time slots');
plt.ylabel('Boards');

In [None]:
# A helping routine for troubleshooting of individual tricky boards
def show_composition(name):
    # Shows possibilities for one board
    view = composition.query("student.str.contains(@name)", engine='python')
    #print(view)
    list_of_participants = view.loc[:, ['student', 'advisor', 'member2', 'member3']].values.tolist()[0]
    print(list_of_participants)
    print(df_avail.loc[:, ['slot'] + list_of_participants])    
    
# show_composition('John Smith')

# Actual scheduling

In [None]:
def book(df_boards, dfc, name, slot, value=False):
    # A helper function, to make notation simpler    
    name1 = dfc.loc[dfc.student==name,'advisor'].values[0]
    name2 = dfc.loc[dfc.student==name,'member2'].values[0]
    name3 = dfc.loc[dfc.student==name,'member3'].values[0]
    # Find all boards
    ind = ((dfc.advisor==name1) | (dfc.advisor==name2) | (dfc.advisor==name3) | 
           (dfc.member2==name1) | (dfc.member2==name2) | (dfc.member2==name3) | 
           (dfc.member3==name1) | (dfc.member3==name2) | (dfc.member3==name3))
    name_list = dfc.loc[ind, 'student']
    for affected in name_list:
        df_boards.loc[df_boards.slot==slot, affected] = value
    return df

In [None]:
df = df_boards.copy() # Start with making a copy of the original dataframe
df_result = pd.DataFrame({'slot':[], 'name':[]}) # The result will go here

# Read boards that were already scheduled, and add them to the list
df_already = pd.read_csv(folder_name + 'settled.txt', sep='\t').drop('index', axis=1)
print(f"Pre-scheduled: {df_already.shape[0]} boards")
for i in range(df_already.shape[0]):
    name = df_already.loc[i, 'student']
    slot = pd.to_datetime(df_already.loc[i, 'slot'])
    df = book(df, composition, name, slot) # We need to go through this to update other boards availability
    df_result = df_result.append({'slot':slot, 'name':name}, ignore_index=True)

plot_avail(df)
# Find boards that remain to be scheduled
df = df.loc[:, [name for name in df.columns if name not in df_already.student.values]]
print(f"Remains to schedule: {df.shape[1]-1} boards")
# Schedule them
for i in range(1,df.shape[1]): # For every board, sorted by toughness
    name = df.columns[i]
    ind = np.where(df.iloc[:,i].values)[0]
    if len(ind)==0:
        print(f"Cannot book a board for {name}")
        continue
    else:
        ind = ind[0] # Take first
    slot = df.slot[ind]    
    df_result = df_result.append({'slot':slot, 'name':name}, ignore_index=True)
    # print(f"Booking {name} for {slot}")
    df = book(df, composition, name, slot)
    if i<df.shape[1]-1: # If not over, reoarder the rest
        df = pd.concat((df.iloc[:, :(i+1)], reorder(df.iloc[:, (i+1):])), axis=1)

# Finalize:
df_result = (df_result
             .sort_values('slot')
             .merge(composition, how='left', left_on='name', right_on='student')
             .drop(['index','name'], axis=1)
            )
df_result

In [None]:
','.join(df_result.query("kind=='moderation'").email)

In [None]:
(df_result
 .assign(weekday=df_result.slot.dt.strftime('%a'))
 .assign(date=df_result.slot.dt.strftime('%b-%d'))
 .assign(time=df_result.slot.dt.strftime('%I%p').str.lower())
 .loc[:,['student', 'weekday', 'date', 'time', 'advisor', 'member2', 'member3', 'kind']]
 .sort_values('student')
 .set_index('student')
)

In [None]:
# Printing it by faculty

def print_row(ser):
    # Prints one row
    day_dict = {0:'Mon', 1:'Tue', 2:'Wed', 3:'Thr', 4:'Fri'}
    print(f"{day_dict[ser.slot.weekday()]} ", end='')
    print(f"{ser.slot.strftime('%b-%d')} {ser.slot.strftime('%I%p').lower()} ", end='')
    print(f"{ser.student:22s} {ser.kind[:3]}  {ser.advisor:8} {ser.member2:8} {ser.member3:8}")
    
def print_df(df):
    for i in range(df.shape[0]):
        print_row(df.iloc[i,:])
    
for name in ['Brooke', 'Bruce', 'Cathy', 'Eli', 'Gabriel', 'Heather', 'Mike', 'Arseny', 'Kerri-Ann']:
    print(name)
    print_df(df_result.query("advisor==@name | member2==@name | member3==@name"))
    print('')

In [None]:
# For a given set of faculty, show their cross-availability
def show_cross_availability(names):    
    slots = df_avail.loc[np.logical_or.reduce(df_avail.loc[:,names].values, axis=1), 'slot']
    out = pd.DataFrame({'slot': slots})
    for name in names:        
        slot_status = out.merge(df_avail.loc[:,['slot', name]], how='left', on='slot')[name]        
        slot_status = ['OPEN' if a else '-' for a in slot_status ]
        ind = (df_result.advisor==name) | (df_result.member2==name) | (df_result.member3==name)
        booked_slots = out.merge(df_result.loc[ind,:], how='left', on='slot').student.fillna('').values
        out[name] = [booked_slots[i] if booked_slots[i]!='' else slot_status[i] for i in range(len(slot_status))]
    return out.reset_index()

show_cross_availability(['Mike','Heather','Brooke','Cailey Mitchell'])