In [2]:
import pandas as pd
import numpy as np
import sys

In [3]:
data = pd.read_excel('last_year.xlsx')

In [4]:
# remove all rows with NaN

data = data.dropna(how='all')
list(data)

['Tracking Number',
 'Employee Type',
 'Supervisor Name',
 'Laboratory',
 'Building',
 'Room',
 'Phone',
 'Judge',
 'Presentation Type',
 'Luncheon?',
 'Categories',
 'Abstract Status',
 'Title']

In [5]:
# select relevent information only

data_select = data[['Tracking Number','Employee Type', 'Laboratory', 'Judge', 'Presentation Type']]
data_select = data_select.fillna('Non')
group = data_select['Presentation Type'].value_counts()
print(group)
data_select.head()

Non    71
A      67
P      61
O      55
Name: Presentation Type, dtype: int64


Unnamed: 0,Tracking Number,Employee Type,Laboratory,Judge,Presentation Type
2,SC010536EB,Post Doctoral Fellow,LCP,N,Non
3,SC009159DN,Post Doctoral Fellow,LMB,N,Non
4,SC009915DV,Post Doctoral Fellow,GBB,Y,Non
5,SC009816DU,Post Baccalaureate Fellow,LMB,N,Non
6,SC009024DM,Post Doctoral Fellow,LDB,N,Non


In [6]:
# select everyone who agreed to be a judge

data_judges = data_select.loc[data_select['Judge'] == 'Y']
data_judges = data_judges.fillna('Non')
data_judges = data_judges.sort_values(by='Presentation Type')
group = data_judges['Presentation Type'].value_counts()
print(group)
data_judges.head()

O      51
P      38
A      11
Non    10
Name: Presentation Type, dtype: int64


Unnamed: 0,Tracking Number,Employee Type,Laboratory,Judge,Presentation Type
126,SC008457DG,Other,ODIR,Y,A
9,SC010590EB,Principal Investigator,KDB,Y,A
17,SC009384DP,Principal Investigator,LBG,Y,A
42,SC008862DK,Principal Investigator,LCBB,Y,A
46,SC010653EC,Staff Scientist,MDB,Y,A


In [7]:
# randomly select 16 oral presentations

orals = data_judges.loc[data_judges['Presentation Type'] == 'O'].sample(n=16)
posters = pd.concat([data_judges, orals]).drop_duplicates(keep=False)
print(len(orals))
orals.head()

16


Unnamed: 0,Tracking Number,Employee Type,Laboratory,Judge,Presentation Type
161,SC008934DL,Research Fellow,LERB,Y,O
223,SC008853DK,Post Doctoral Fellow,LMCB,Y,O
220,SC008727DJ,IRTA pre-doctoral,LDB,Y,O
157,SC008844DK,Post Doctoral Fellow,LERB,Y,O
254,SC010302DZ,IRTA pre-doctoral,LMCB,Y,O


In [8]:
# replace orals that werr not selected as orals to poster

print(len(posters))
posters = posters.replace('O', 'P')
p_posters = posters.loc[posters['Presentation Type'] == 'P']
non_posters = posters.loc[posters['Presentation Type'] != 'P']
print(len(p_posters), len(non_posters))

94
73 21


In [80]:
# sort by labs

labs_p = p_posters.sort_values(by='Laboratory')
n_poster = len(labs_p)
print('# of posters: ', n_poster, '\n')
print(labs_p.head(), '\n')

labs_p = p_posters['Laboratory'].value_counts()
print(labs_p)

# of posters:  73 

    Tracking Number         Employee Type Laboratory Judge Presentation Type
191      SC008619DI  Post Doctoral Fellow         BC     Y                 P
186      SC009789DT  Post Doctoral Fellow         DB     Y                 P
204      SC008691DI       Clinical Fellow        DDB     Y                 P
169      SC009780DT       Clinical Fellow        DDB     Y                 P
179      SC009474DQ     IRTA pre-doctoral        DDB     Y                 P 

DEOB     11
LBG       8
LMCB      8
LDB       8
LCDB      7
KDB       5
DDB       5
LCP       4
LBC       4
LMB       4
PECRB     2
MDB       2
DB        1
GBB       1
BC        1
MMB       1
LERB      1
Name: Laboratory, dtype: int64


In [82]:
day_1 = 0
for i in range(len(labs_p)):
    day_1 += labs_p[i]
    day_1 += labs_p[-i-1]
    if day_1 > int(n_poster/2):
        print(i, day_1, n_poster-day_1)
        break

3 39 34


In [15]:
labs_p=labs_p.assign(freq=labs_p.groupby('Laboratory')['Laboratory'].transform('count')).sort_values(by=['freq', 'Laboratory'], ascending=[False, True])
labs_p

Unnamed: 0,Tracking Number,Employee Type,Laboratory,Judge,Presentation Type,freq
222,SC008835DK,Post Baccalaureate Fellow,DEOB,Y,P,11
245,SC008556DH,Post Baccalaureate Fellow,DEOB,Y,P,11
159,SC010239DY,Post Doctoral Fellow,DEOB,Y,P,11
236,SC009951DV,Post Doctoral Fellow,DEOB,Y,P,11
153,SC010176DX,Post Baccalaureate Fellow,DEOB,Y,P,11
201,SC010095DW,Clinical Fellow,DEOB,Y,P,11
218,SC008772DJ,IRTA pre-doctoral,DEOB,Y,P,11
243,SC009240DO,Research Fellow,DEOB,Y,P,11
142,SC009546DR,IRTA pre-doctoral,DEOB,Y,P,11
226,SC010572EB,Post Doctoral Fellow,DEOB,Y,P,11


In [117]:
labs_nonp = non_posters['Laboratory'].value_counts()
print(labs_nonp)

LMCB     5
LERB     3
KDB      2
LBG      2
LCBB     1
GBB      1
ODIR     1
DB       1
LBC      1
OTHER    1
LMB      1
CEB      1
MDB      1
Name: Laboratory, dtype: int64


In [4]:
names = (data['Name'].to_list())
employee_types = (data['Employee Type'].to_list())
supervisor_names = (data['Supervisor Name'].to_list())
laboratories = (data['Laboratory'].to_list())
buildings =  (data['Building'].to_list())
rooms = (data['Room'].to_list())
judges = (data['Judge'].to_list())
presentation_types = (data['Presentation Type'].to_list())
research_keywords = (data['Research Keyword'].to_list())

In [5]:
class Fellow():
	def __init__(self):
		self.name=''
		self.employee_type=''
		self.supervisor_name=''
		self.laboratory=''
		self.building=''
		self.room=''
		self.judge=''
		self.presentation_type=''
		self.research_keyword=''	

		self.judges=[]

In [6]:
people = []
volunteer_judges_list = []

oral_list = []
poster_list = []



In [7]:
for i in range(len(names)):
	person = Fellow()
	people.append(person)
	person.name=names[i]
	person.employee_type=employee_types[i]
	if 'Fellow' not in person.employee_type and person.employee_type != 'IRTA pre-doctoral':
		volunteer_judges_list.append(person)
	person.presentation_type=presentation_types[i]
	if person.presentation_type == 'O':
		oral_list.append(person)
	elif person.presentation_type == 'P':
		poster_list.append(person)


In [8]:
###
### Check volunteer_judges_list ###
for person in volunteer_judges_list:
	print(person.employee_type)
###


Staff Scientist
Staff Scientist
Other
Technician
Staff Scientist
Staff Scientist
Other


In [9]:
###
### Check oral_list and poster_list###
for person in oral_list:
        print(person.name)
for person in poster_list:
	print(person.name)
###

Person2
Person4
Person6
Person11
Person13
Person19
Person22
Person24
Person25
Person27
Person28
Person7
Person10
Person14
Person18
Person20
Person21
