In [2]:
#defining the classes Package, Bin and Student

class Package:
    def __init__(self, id, bin, location_in_bin, size, student):
        self.id = id
        self.size = size
        self.student = student  #student object
        self.letter = student.lastname_letter
        self.bin = bin
        self.location_in_bin = location_in_bin
        
    def getID(self):
        return self.id

    def getSize(self):
        return self.size
    
    def getLetter(self):
        return self.letter
    
    def getBin(self):
        return self.bin

    def getStudent(self):
        return self.student

    def getLocation(self):
        return self.location_in_bin

    # going to be used to modify Package when something is removed from its bin
    def setLocation(self, value):
        self.location_in_bin = value
    
    def __str__(self):
        return f"packageID: {self.id} {self.letter} | size = {self.size} | bin = {self.bin}, position {self.location_in_bin}"

    
class Bin:
    def __init__(self,label):
        self.label = label
        self.packages = []
    
    def addPackage(self,package):
        self.packages.append(package)
    
    def removePackage(self,package):
        self.packages.remove(package)

    def getLabel(self):
        return self.label
    
    def getPackages(self):
        return self.packages

    def getNumPackages(self):
        return len(self.packages)

    def __str__(self):
        return f"binID: {self.label}"

class Student: 
    def __init__(self,id,lastname_letter):
        self.id = id
        self.lastname_letter = lastname_letter.upper()

    def getID(self):
        return self.id

    def getLetter(self):
        return self.lastname_letter
        
    def __str__(self):
        return f"studentID: {self.id} | {self.lastname_letter}"

In [3]:
# create student
test_student = Student(0, "L")
print(test_student)

# create bins
bins = []
for i in range(10):
    bins.append(Bin(i))

bins[0].getNumPackages()

#create package with id=0, put it in bin j=0
j = 0
test_package = Package(0, j, bins[j].getNumPackages(), 'small', test_student)
print(test_package)

studentID: 0 | L
packageID: 0 L | size = small | bin = 0, position 0


In [4]:
import pandas as pd
import numpy as np
import random as random

In [5]:
df = pd.read_csv('lastnames.csv')
df['Percentage'] = df['Percentage']*0.01
total_probability = df['Percentage'].sum()
df ["Probability"] = df['Percentage']/total_probability
lastname_distribution = df[['Letter','Probability']]
lastname_distribution.head()

Unnamed: 0,Letter,Probability
0,M,0.096154
1,S,0.094151
2,B,0.085136
3,C,0.077123
4,H,0.071114


In [6]:
def select_lastname_based_on_distribution(df):
    rand_val = random.random()
    cumulative_prob = 0
    for index, row in df.iterrows():
        cumulative_prob += row['Probability']
        if rand_val <= cumulative_prob:
            return row['Letter']

In [100]:
def runLastNameSimulation(packagesize_dist, lastname_dist, num_bins, num_packages, arrival_prob, days):
    # for now, packagesize_dist is not implemented.

    #setup bins
    all_bins = [] #to store all the bins of this simulation
    #splits the alphabet into bins
    alpha_bin_names = np.array_split(list('ABCDEFGHIJKLMNOPQRSTUVWXYZ'), num_bins)
    #creates bins based on alphabet
    for name in alpha_bin_names:
       all_bins.append(Bin(str(name)))
    
    all_packages = [] #to store all the packages of this simulation
    all_students = [] #to store all the students of this simulation
    counter_packages = 0 # for packageID purposes
    counter_students = 0 #for studentID purposes

    #VARIBLES WE WANT TO TRACK IN THE SIMULATION WILL GO IN THIS:
    df = pd.DataFrame(columns=['day', 'studentID', 'packageID', 'rackTime', 'findTime', 'totalTime'])
    df_total_packages = pd.DataFrame(columns = ['day', 'num_packages'])

    for d in range(days):
        # print("day ", d)
        for i in range(num_packages):
            #randomly get the lastnameletter from the distribution 
            letter = select_lastname_based_on_distribution(lastname_dist)

            #make the student 
            all_students.append(Student(counter_students, letter))
            counter_students= counter_students + 1 #increase counter, so that the next student has a unique id 


            #determine which bin it goes in
            for j in range(num_bins):
                if letter in alpha_bin_names[j]:
                    bin_to_go_in = j #this returns the bin's index number in the all_bins list 
    
            #finally, make the package object
            package = (Package(counter_packages, bin_to_go_in, all_bins[bin_to_go_in].getNumPackages(), 'small', all_students[i]))
            all_packages.append(package)
            counter_packages= counter_packages + 1 #increase counter, so that the next package has a unique id 
            
            #update the bin so it knows it has a new package
            all_bins[bin_to_go_in].addPackage(package)

        #print where the packages were assigned to!
        # for p in all_packages:
        #     print(p)

        # for s in all_students:
        #     print(s)
                
        # --------------------------------------------------------------------------
        #BEGIN THE SIMULATION
        
        # there is some probability that a student comes to pick up their package today 
        students_arriving_today = []
        for student in all_students:
            randomvar = random.random()
            if (randomvar <= arrival_prob):
                students_arriving_today.append(student)

        #shuffle the students arriving today 
        random.shuffle(students_arriving_today)

        # for s in students_arriving_today:
        #     print(s.getID())
            
        #now process the students in this order
        for student in students_arriving_today:

            #make a list of all the packages this student has in the mailroom as of today
            student_packages = []
            for p in all_packages:
                if(p.getStudent()==student):
                    student_packages.append(p)

            first_package_checker = True
            for student_p in student_packages:
                #update the df with how long it took to get the package
                # add one second for rackTime and findTime so that it doesn't take 0 seconds
                if(first_package_checker):
                    df.loc[len(df.index)] = [d, student.getID(), student_p.getID(), student_p.getBin()+1, student_p.getLocation()+1, 
                                             student_p.getBin()+student_p.getLocation()+2]
                    first_package_checker = False
                else: #multiple packages, don't need to walk to any other bin (since bins are based on last name)
                    df.loc[len(df.index)] = [d, student.getID(), student_p.getID(), 0, student_p.getLocation()+1, student_p.getLocation()+1]
                
                #remove the package from the bin and adjust location in bin info
                # print(all_bins[student_p.getBin()].packages)
                all_bins[student_p.getBin()].removePackage(student_p)
                all_packages.remove(student_p)


                #adjust bin info for packages still in that bin, ONLY IF the package has a higher location_in_bin value than the removed package.
                for package in all_bins[student_p.getBin()].getPackages():
                    if student_p.getLocation()<package.getLocation():
                        package.setLocation(package.getLocation()-1)

            #also remove the student 
            all_students.remove(student)
                
        #after we process everything for the day, save how many packages there are at the end of the day
        # df_total_packages .loc= pd.Dataframe(columns = ['day', 'num_packages'])
        df_total_packages.loc[len(df_total_packages.index)] = [d, len(all_packages)]


        # print(df)
        # print(df_total_packages)
    return df, df_total_packages



In [None]:
# runLastNameSimulation(packagesize_dist, lastname_dist, num_bins, num_packages, arrival_prob, days):
runLastNameSimulation('test', lastname_distribution, 10, 800, 0.9, 100)

# redo based on Amber's suggestion of 5 students, multiple packages

In [93]:
def runAmberSim(num_bins, arrival_prob, students, days):
    
    #setup bins
    all_bins = [] #to store all the bins of this simulation
    #splits the alphabet into bins
    alpha_bin_names = np.array_split(list('ABCDEFGHIJKLMNOPQRSTUVWXYZ'), num_bins)
    #creates bins based on alphabet
    for name in alpha_bin_names:
       all_bins.append(Bin(str(name)))
    
    all_packages = [] #to store all the packages of this simulation
    counter = 0 # for packageID purposes

    #VARIBLES WE WANT TO TRACK IN THE SIMULATION WILL GO IN THIS:
    df = pd.DataFrame(columns=['day', 'studentID', 'packageID', 'rackTime', 'findTime', 'totalTime'])
    
    for d in range(days):
        # print("day ", d)
        for student in students: #one package per student!
            #determine which bin it goes in
            for j in range(num_bins):
                if student.getLetter() in alpha_bin_names[j]:
                    bin_to_go_in = j #this returns the bin's index number in the all_bins list 
    
            #finally, make the package object
            package = (Package(counter, bin_to_go_in, all_bins[bin_to_go_in].getNumPackages(), 'small', student))
            all_packages.append(package)
            
            #update the bin so it knows it has a new package
            all_bins[bin_to_go_in].addPackage(package)
    
            counter = counter+1
    
        #print where the packages were assigned to!
        # for p in all_packages:
        #     print(p)

        #BEGIN THE SIMULATION
        
        # there is some probability that a student comes to pick up their package today 
        students_arriving_today = []
        for student in students:
            randomvar = random.random()
            if (randomvar <= arrival_prob):
                students_arriving_today.append(student)

        #shuffle the students arriving today 
        random.shuffle(students_arriving_today)

        #now process the students in this order
        for student in students_arriving_today:

            #make a list of all the packages this student has in the mailroom as of today
            student_packages = []
            for p in all_packages:
                if(p.getStudent()==student):
                    student_packages.append(p)

            first_package_checker = True
            for student_p in student_packages:
                #update the df with how long it took to get the package
                # add one second for rackTime and findTime so that it doesn't take 0 seconds
                if(first_package_checker):
                    df.loc[len(df.index)] = [d, student.getID(), student_p.getID(), student_p.getBin()+1, student_p.getLocation()+1, 
                                             student_p.getBin()+student_p.getLocation()+2]
                    first_package_checker = False
                else: #multiple packages, don't need to walk to any other bin (since bins are based on last name)
                    df.loc[len(df.index)] = [d, student.getID(), student_p.getID(), 0, student_p.getLocation()+1, student_p.getLocation()+1]
                
                #remove the package from the bin and adjust location in bin info
                # print(all_bins[student_p.getBin()].packages)
                all_bins[student_p.getBin()].removePackage(student_p)
                all_packages.remove(student_p)

                #adjust bin info for packages still in that bin, ONLY IF the package has a higher location_in_bin value than the removed package.
                for package in all_bins[student_p.getBin()].getPackages():
                    if student_p.getLocation()<package.getLocation():
                        package.setLocation(package.getLocation()-1)
        print(df)
    return df


In [94]:
#we decided to manually make 5 students per Amber's suggestion
students_list = []
students_list.append(Student(0, 'A'))
students_list.append(Student(1, 'A'))
students_list.append(Student(2, 'L'))
students_list.append(Student(3, 'L'))
students_list.append(Student(4, 'Z'))

# runAmberSim(num_bins, arrival_prob, students, days):
output_df = runAmberSim(10, 0.5, students_list, 7)

   day  studentID  packageID  rackTime  findTime  totalTime
0    0          4          4        10         1         11
1    0          3          3         4         2          6
   day  studentID  packageID  rackTime  findTime  totalTime
0    0          4          4        10         1         11
1    0          3          3         4         2          6
2    1          1          1         1         2          3
3    1          1          6         0         3          3
4    1          0          0         1         1          2
5    1          0          5         0         1          1
6    1          3          8         4         3          7
7    1          2          2         4         1          5
8    1          2          7         0         1          1
    day  studentID  packageID  rackTime  findTime  totalTime
0     0          4          4        10         1         11
1     0          3          3         4         2          6
2     1          1          1        

In [31]:
output_df

Unnamed: 0,day,studentID,packageID,rackTime,findTime,totalTime
0,0,1,1,1,2,3
1,0,2,2,4,1,5
2,1,1,6,1,3,4
3,1,2,7,4,2,6
4,2,3,3,4,1,5
5,2,3,8,0,1,1
6,2,3,13,0,2,2
7,2,2,12,4,1,5
8,2,4,4,10,1,11
9,2,4,9,0,1,1


In [36]:
output_df.mean()

day           2.964286
studentID     2.107143
packageID    14.250000
rackTime      2.428571
findTime      1.464286
totalTime     3.892857
dtype: float64

In [37]:
output_df.median()

day           2.0
studentID     2.0
packageID    13.5
rackTime      1.0
findTime      1.0
totalTime     3.0
dtype: float64