In [2]:
#Read in applicant data and a control file

#applicant_data should have the following header row:
#c("name", "penn_email", "preference_1", "preference_2", "preference_3", "preference_4", "manual_staffing", "other_projects", "unlisted_projects")

#control_file should have the following header row:
#c("team_name", "team_size")

#The team names from the applicant data file as listed under preference_1, etc. and the control file team names must be EXACTLY THE SAME

#Likewise, the team name listed for any manual staffing must be exactly the same

#Other projects and unlisted projects should have yes/no answers

#Ensure that none of the fields within either the applicant data or control file contains commas

#These are COMMA SEPARATED files, so any teams names listed as "TEAM X, LLC" will mess up reading in the file.

#CHANGE THESE TWO LINES TO THE PROPER PATH FOR APPLICANT DATA AND CONTROL FILE
applicant_data <- read.table("staffing_input_applicant_data_2017.csv", sep=",", header = TRUE)

control_file <- read.table("staffing_input_control_file_2017.csv", sep=",", header = TRUE)

#head(applicant_data)
#head(control_file)

In [3]:
#Create an empty staffing matrix to fill. Dimensions: # of teams rows and maximum team size (of any team) # columns

#For each of the teams that will be staffed with fewer than the maximum team size, add BUFFER text 

empty_staffing <- matrix(data = NA, nrow = dim(control_file)[1], ncol = max(control_file$team_size))

rownames(empty_staffing) <- control_file$team_name

for(i in 1:dim(control_file)[1]){
    if(control_file$team_size[i] < ncol(empty_staffing)){
        n_buffer <- ncol(empty_staffing) - control_file$team_size[i]
        empty_staffing[i, 1:n_buffer] <- c("BUFFER")
    }
}

#empty_staffing

In [4]:
#Fill in empty_staffing with manual overrides first

manual_applicant_indices <- which(applicant_data$manual_staffing != "")

for(i in 1:length(manual_applicant_indices)){
    manual_team <- as.character(applicant_data$manual_staffing[manual_applicant_indices[i]])
    empty_staffing[manual_team, which(is.na(empty_staffing[manual_team,]))[1]] <- as.character(applicant_data$name[manual_applicant_indices[i]])
}

#empty_staffing

In [5]:
#Remove manually staffed applicants from the applicant data

#Shuffle remaining applicants

applicant_data <- applicant_data[-c(manual_applicant_indices),]

#dim(applicant_data)
#head(applicant_data)

shuffled_applicants <- applicant_data[sample(nrow(applicant_data)),]

#head(shuffled_applicants)

In [6]:
#For each applicant, if first choice is available, staff them
#Likewise, move on to 2nd, 3rd, and 4th as necessary
#If none of those are available, and they're fine with being put on another project, put them on the first available project

for(i in 1:dim(shuffled_applicants)[1]){
    
    if(!is.na(which(is.na(empty_staffing[as.character(shuffled_applicants$preference_1[i]),]))[1]) == 'TRUE'){
        team <- as.character(shuffled_applicants$preference_1)[i]
        empty_staffing[team, which(is.na(empty_staffing[team,]))[1]] <- as.character(shuffled_applicants$name[i])
    }
    
    else if(!is.na(which(is.na(empty_staffing[as.character(shuffled_applicants$preference_2[i]),]))[1]) == 'TRUE'){
        team <- as.character(shuffled_applicants$preference_2)[i]
        empty_staffing[team, which(is.na(empty_staffing[team,]))[1]] <- as.character(shuffled_applicants$name[i])
    }
        
    else if(!is.na(which(is.na(empty_staffing[as.character(shuffled_applicants$preference_3[i]),]))[1]) == 'TRUE'){
        team <- as.character(shuffled_applicants$preference_3)[i]
        empty_staffing[team, which(is.na(empty_staffing[team,]))[1]] <- as.character(shuffled_applicants$name[i])
    }
        
    else if(!is.na(which(is.na(empty_staffing[as.character(shuffled_applicants$preference_4[i]),]))[1]) == 'TRUE'){
        team <- as.character(shuffled_applicants$preference_4)[i]
        empty_staffing[team, which(is.na(empty_staffing[team,]))[1]] <- as.character(shuffled_applicants$name[i])
    }
        
    else if(shuffled_applicants$other_projects[i] == "Yes"){
        empty_staffing[which(is.na(empty_staffing))[1]] <- as.character(shuffled_applicants$name[i])
    }
    
}

In [7]:
empty_staffing

0,1,2,3,4,5,6
1) Aptagen LLC,BUFFER,Paola Torre,Ross Pirnie,Katelyn Roberts,Chris Yi,Pimkhuan Hannanta-anan
2) Castleman Disease Collaborative Network,BUFFER,BUFFER,Bowen Wang,Kalyani Nambiar,Tai-Yun Kuo,Zvi Cramer
3) Chronic Care Management LLC,BUFFER,Claudia B. Lanauze Torres,Ben Auerbach,Jaclyn Robustelli,Seungha Lee,Omer Chaudhry
4) Cytovas LLC,BUFFER,BUFFER,Hong Xie,Moen Sen,Tanya Marar,Minsuk Song
5) Dyskeratosis Congenita Outreach Inc.,BUFFER,BUFFER,Eric Villeneuve,Nayantara Kosaraju,Riley Payne,Yue Zhu
6) Oncoceutics Inc.,BUFFER,BUFFER,Enrique Lin Shiao,Joshua Parris,Yifan Wang,Dahmane Ouazia
7) Osage University Partners,BUFFER,Cindy Lin,Satinder Dahiya,Akriti Kharbanda,Matias Porras Paniagua,Dr. Mukta Asnani
8) NeuroFlow LLC,BUFFER,Ananth Srinivasan,Khaing Win,Sahaana Sekhar,Suraj Bharadwaj,Zakary Beach
9) Nutrivert LLC,BUFFER,BUFFER,Vishwanathan Rajaraman,Lindsay Roth,Lauren Walker,Vanessa Munoz
10) Pennsylvania Drug Discovery Institute,Juliette Zhu,Rizwan Saffie,PEEYUSH GOEL,John Logan Brock,Shuhe Wang,Yi Xu


In [8]:
#Any applicants who are unable to be staffed will be listed here

applicant_data$name[which(applicant_data$name %in% empty_staffing == 'FALSE')]

In [11]:
#Save output to a new csv

write.table(empty_staffing, file="staffing_output.csv", sep=",", row.names=TRUE, quote=FALSE)  


#TO DO:
#After manual staffing step, shuffle individuals X times and save a fit quality metric for each run to get best staffing over runs
#Logic for individuals who say no to being otherwise staffed
#Logic for excluding people from certain projects
#--> could replace shuffle with a pseudo shuffle that always addresses difficult applicants first, but will this be abused?
#Add exceptions for actions that break things
#Return more attractive output file

In [12]:
table(applicant_data$preference_1)


                            1) Aptagen LLC 
                                         3 
 10) Pennsylvania Drug Discovery Institute 
                                         5 
                         11) PolyAurum LLC 
                                         3 
                   12) SAI MedPartners LLC 
                                         4 
                      13) Saturn Care Inc. 
                                         2 
            14) SiO2 Medical Products Inc. 
                                         1 
                           15) SR One Ltd. 
                                        13 
                 16) Zitter Health Insight 
                                         2 
2) Castleman Disease Collaborative Network 
                                         2 
            3) Chronic Care Management LLC 
                                         1 
                            4) Cytovas LLC 
                                         5 
   5) Dyskeratosis Congenita Ou

In [13]:
table(applicant_data$preference_2)


                            1) Aptagen LLC 
                                         1 
 10) Pennsylvania Drug Discovery Institute 
                                         5 
                         11) PolyAurum LLC 
                                         0 
                   12) SAI MedPartners LLC 
                                         3 
                      13) Saturn Care Inc. 
                                         2 
            14) SiO2 Medical Products Inc. 
                                         3 
                           15) SR One Ltd. 
                                         9 
                 16) Zitter Health Insight 
                                         2 
2) Castleman Disease Collaborative Network 
                                         1 
            3) Chronic Care Management LLC 
                                         2 
                            4) Cytovas LLC 
                                         7 
                       6) Oncoc