In [1]:
import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
def set_total_patrons_count(samples: int = 1) -> int:
    """
    Set the total number of patrons for 1 day.
    Uses a beta distribution.


    :samples: Number of times to run the simulation, used for testing the distribution.
    :return: RANDOMIZED variable, based on Chicago Public Library data.
    >>> results = []
    >>> for test in range(5):
    ...     results.append(set_total_patrons_count(samples=10000))  # Testing mode
    >>> print(results)
    >>> min(results) >= 444
    True
    >>> max(results) <= 949
    True
    """
    # We cannot assume that because people have used public computers in the past, they will continue to.
    # In fact, CPL data shows that usage decreased for the last 3 years, specifically by 13.5% from 2018 to 2019.
    low_service = (514 * .865)      # I've intentionally lowered the low end by 13.5%.
    # But it's also likely that due to the economic crisis, usage will go up (Jaeger et al., 2011).
    peak_service = random.uniform(622, 949)     # 949 was the highest number in 2016
    # Source: https://github.com/iSchool-597PR/Examples_Fa20/blob/master/week_07/Probability_Distributions.ipynb & https://numpy.org/doc/stable/reference/random/generated/numpy.random.Generator.beta.html
    g = np.random.default_rng()
    patron_pct = np.random.Generator.beta(g, low_service, peak_service, samples)
    if samples > 1:
        # Testing my distribution: Does it look like the CPL data?
        patron_array = (low_service * patron_pct) + low_service
        plt.hist(patron_array,
                        bins=200,
                        density=True)
        plt.show()
    patron_count = (low_service * patron_pct[0]) + low_service
    return int(patron_count)

In [3]:
def patrons_per_minute(total_patrons: int, plot: bool=False) -> list:
    """
    #use these as weights, for each person coming that day, which minute did they arrive? Draw one random # representing the minute, for each person.
    Discrete probability distribution of patrons being added, based on Seattle Public Library data.
    Note: Demand for computers != use of computers, but we only have data measuring use.

    :param total_patrons:
    :param plot:
    :return:
    >>> patrons_per_minute(700)
    [1,2,3]
    >>> for i in range(10):
    ...     patrons_per_minute(700, plot=True)
    [1,2,3]
    """
    # Determine RANDOMLY, WITH WEIGHTS, what minute each patron arrived at.
    hours = np.arange(10)
    minutes = np.arange(600)
    probs = []
    for i in range(600):
        if i < 60:
            probs.append(0.035010)
        elif i < 120:
            probs.append(0.045726)
        elif i < 180:
            probs.append(0.055542)
        elif i < 240:
            probs.append(0.136442)
        elif i < 300:
            probs.append(0.165399)
        elif i < 360:
            probs.append(0.223067)
        elif i < 420:
            probs.append(0.199427)
        elif i < 480:
            probs.append(0.088998)
        elif i < 540:
            probs.append(0.047607)
        elif i < 600:
            probs.append(0.002781)
    patron_dist = random.choices(minutes, weights=probs, k=total_patrons)   # np.random.choice(hours, total_patrons, p=probs)
    if plot is True:
        plt.hist(patron_dist,
                 bins=200,
                 density=True)
        plt.show()
    return patron_dist  # Returns a list of all hours that patrons arrived

In [4]:
total_patrons_today = set_total_patrons_count()
ppm = patrons_per_minute(total_patrons_today)
patron_df = pd.DataFrame(ppm, columns=['Arrival_minute'])
counts = patron_df['Arrival_minute'].value_counts()
counts

353    8
405    6
375    6
361    6
404    5
      ..
535    1
296    1
292    1
291    1
2      1
Name: Arrival_minute, Length: 331, dtype: int64

In [89]:
patron_df = patron_df.sort_values(['Arrival_minute'])
patron_df['Got_computer_minute'] = np.nan
patron_df['Wait_time'] = np.nan
patron_df['Departed_queue'] = np.nan
patron_df.dtypes
# Minute = int 
# Arrival minute = int64
# Got computer min = float64
# Wait time = float64
# Departed q = float64
patron_df.shape

duplicate = patron_df[patron_df.duplicated(subset='Arrival_minute',keep=False)] 
duplicate

Unnamed: 0,Arrival_minute,Got_computer_minute,Wait_time,Departed_queue
37,19,,,
19,19,,,
508,25,,,
360,25,,,
56,25,,,
...,...,...,...,...
419,515,,,
423,515,,,
561,515,,,
78,529,,,


In [101]:
minute = 22
patron_df.loc[lambda x: x['Arrival_minute'] == minute, ['Got_computer_minute']] = minute                # Add when they got a computer
#patron_df.loc[lambda x: x['Arrival_minute'] == minute, ['Leave_minute']] = minute + 15  # Add when they got a computer
patron_df.loc[lambda x: x['Arrival_minute'] == minute, ['Wait_time']] = (patron_df['Got_computer_minute'] - patron_df['Arrival_minute'])

patron_df.head(10)

Unnamed: 0,Arrival_minute,Got_computer_minute,Wait_time,Departed_queue
503,2,2.0,0.0,
325,6,6.0,0.0,
361,9,9.0,0.0,
130,14,14.0,0.0,
256,17,17.0,0.0,
18,18,18.0,0.0,
37,19,19.0,0.0,
19,19,19.0,0.0,
49,21,21.0,0.0,
316,22,22.0,0.0,


In [103]:
# Series.nsmallest(n=5, keep='first')

duplicate = patron_df[patron_df.duplicated(subset='Arrival_minute',keep=False)]
duplicate = duplicate[duplicate['Got_computer_minute'].isnull() == True]
small = duplicate['Arrival_minute'].nsmallest(n=1, keep='first').index
small[0]

minute = 25

patron_df.at[small[0], 'Got_computer_minute'] = minute
patron_df.head(20)

patron_df.at[503, 'Arrival_minute']

# patrons_df.loc[lambda x: x['Arrival_minute'] == minute, ['Got_computer_minute']] = minute                # Add when they got a computer
# patrons_df.loc[lambda x: x['Arrival_minute'] == minute, ['Leave_minute']] = minute + select_reservation_length()  # Add when they got a computer
# patrons_df.loc[lambda x: x['Arrival_minute'] == minute, ['Wait_duration']] = (patrons_df['Got_computer_minute'] - patrons_df['Arrival_minute'])
            

2

Unnamed: 0,Arrival_minute,Got_computer_minute,Wait_time,Departed_queue
37,19,,,
19,19,,,
508,25,,,
360,25,,,
56,25,,,
...,...,...,...,...
419,515,,,
423,515,,,
561,515,,,
78,529,,,


In [6]:
#patron_df['Inventory qty'] = 20
#patron_df.groupby('Inventory qty').agg([np.min, np.median, np.max])

# df.groupby(df.index.year)[["Open", "High", "Low", "Close"]].agg([
#         np.max, np.min, np.mean,
#     ])

In [9]:
# Get sums, mins, medians, maxes of a particular df column
duplicate['Arrival_minute'].sum()
duplicate['Arrival_minute'].median()
duplicate['Arrival_minute'].min()
duplicate['Arrival_minute'].max()

# sim_results = pd.concat(sims, ignore_index=True)
#     median_repair_cost = sim_results['Repair cost'].groupby('Inventory qty').agg([np.median])

median_min = duplicate.groupby('Arrival_minute').agg([np.median])
median_min['Got_computer_minute']

Unnamed: 0_level_0,median
Arrival_minute,Unnamed: 1_level_1
1,
24,
30,
38,
46,
...,...
473,
477,
482,
488,


In [46]:
# Source: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html 
patron_df.loc[lambda x: x['Arrival_minute'] == 20, ['Got_computer_minute']] = 8 
patron_df.loc[lambda x: x['Arrival_minute'] == 20]
patron_df.loc[lambda x: x['Arrival_minute'] == 20, ['Wait_time']] = (10 - patron_df['Arrival_minute'])
patron_df

# # If the value of minute - "Got_computer_minute" = 60...
# z= 100
# patron_df['Current_minute'] = 10
# val = patron_df[patron_df['Got_computer_minute'] == 8]
# bob = val['Got_computer_minute'].tolist()

# if minute - bob[0] == 60:
#     do stuff
# Can't evaluate this if more than 1 row returns...
# if (val['Current minute'].item() - val['Got_computer_minute'].item() > 5) is False:
#     z = 0
# z
waiting = 10

minute = 524+60
patron_df.loc[lambda x: (x['Got_computer_minute'].isnull() == True) & (x['Arrival_minute'] == minute - 60), ['Departed_queue']] = 1

              
done_waiting = patron_df['Arrival_minute'][(patron_df['Got_computer_minute'].isnull() == True) & (patron_df['Arrival_minute'] == minute - 60)].tolist()
if len(done_waiting) > 0:
    waiting -= len(done_waiting)
    
done_waiting
patron_df[patron_df['Arrival_minute'] == minute - 60]

patron_df
    
# Locate rows w/ min(arrival_min) & gotcomp is NULL 
patron_df.loc[lambda x: (x['Arrival_minute'] == patron_df['Arrival_minute'].min()) & (x['Got_computer_minute'].isnull() == True)] #, ['Boop']] = 1



Unnamed: 0,Arrival_minute,Got_computer_minute,Wait_time,Departed_queue
29,4,,,
122,4,,,


In [27]:
minute = 15
waiting = 10
computers_in_use = 15
computers_available = 18

while waiting > 0 and (computers_in_use < computers_available):
    comps_free = computers_available - computers_in_use
    waiting -= 1
    computers_in_use += 1 # or, len(rows returned)
    #print(waiting, computers_in_use, computers_available)

    
            # find patron_df min(arrival_time) && got_comp_min.isna == True
            # update got_comp_min == min, leave_min == min + 60
            # wait duration = got_comp_min - min
    

# iloc slice is INCLUSIVE
#patron_df.iloc[0:5]    # Returns first 5 rows of patron_df

#Min is returning the absolute min of arrival minute, not relative to got_computer_min null status.
#patron_df.loc[lambda x: x['Arrival_minute'] == patron_df['Arrival_minute'].min(), patron_df['Got_computer_minute']] = minute

patron_df.loc[lambda x: x['Arrival_minute'] == 4, ['Got_computer_minute']] = 8 

# Return a series or int w/ min(arrival minute) where got_computer_minute is null
lowest_arrive_min = patron_df['Arrival_minute'][(patron_df['Got_computer_minute'].isnull() == True) & (patron_df['Departed_queue'].isnull() == True)].min()



# Note when 2+ ppl arrived in same minute, this will update both
patron_df.loc[lambda x: (x['Got_computer_minute'].isnull() == True) 
              & (x['Arrival_minute'] == lowest_arrive_min), ['Got_computer_minute']] = minute
# patron_df.loc[lambda x: (x['Got_computer_minute'].isnull() == True) 
#               & (x['Arrival_minute'] == lowest_arrive_min), ['Leave_minute']] = minute + 60
patron_df.loc[lambda x: (x['Got_computer_minute'].isnull() == True) 
              & (x['Arrival_minute'] == lowest_arrive_min), ['Wait_time']] = patron_df['Got_computer_minute'] - minute

patron_df.head(30)


# Update:
#got_comp_min == min, leave_min == min + 60, wait duration = got_comp_min - min


# patron_df.loc[lambda x: (x['Arrival_minute'] == patron_df['Arrival_minute'].min()) 
#               & (x['Got_computer_minute'].isnull() == True), patron_df['Wait_time']] = minute - patron_df['Arrival_minute']
# Get the length of a df with len(patron_df.index)

Unnamed: 0,Arrival_minute,Got_computer_minute,Wait_time,Departed_queue
70,2,8.0,,
311,4,8.0,,
239,5,15.0,,
508,5,15.0,,
294,8,15.0,,
407,11,15.0,,
50,12,15.0,,
208,16,15.0,,
560,18,15.0,,
476,20,15.0,,


In [23]:
computers_in_use = 0
    for minute in range(hours_open * 60):
        # UPDATE COMPUTER USAGE
        if minute not in counts.index.values:
            # If 0 patrons arrived at this minute, skip ahead. Otherwise... count how many patrons arrived.
            patrons_this_minute = 0
        else:
            patrons_this_minute = counts[minute]
            if computers_in_use < computers_available:
                # If computer available, add # patrons to computers in use
                computers_in_use += patrons_this_minute
                # Find and update ONLY df rows where "Arrival_minute" = minute
                if minute == arrival minute, then got_computer_minute == minute
                # patron_df.loc[[patron_df['Arrival_minute'] == minute], ['Got_computer_minute']] = minute
                # TODO: FIX: This is overwriting all rows.
                # patron_df['Got_computer_minute'] = minute                                                             # Add when they got a computer
                # patron_df['Wait_time'] = minute - patron_df['Arrival_minute']                                         # Add wait time (how long you waited before getting a computer, implies you GOT a computer)
                # LIST VERSION # TODO: Figure out how to identify which to update.
                # patron_queue[minute]["got_computer_minute"] = minute
                # patron_queue[minute]["wait_time"] = minute - patron_queue[minute]["arrival_minute"]
            else:
                # If a computer is unavailable, people_waiting += # patrons
                waiting += patrons_this_minute
        # UPDATE QUEUE LEAVERS
        if minute - patron_df[patron_df['Got_computer_minute'] == 60]:     # TODO: Update 60 to vary w/ the time length they are staying for
            # Free up computer when patron reaches 1 hour
            computers_in_use -= 1
        if patron_df['Got_computer_minute'] == np.nan and minute - patron_df['Arrival_minute'] > 60:
            # Count people who wait over n minutes
            # TODO: FIX: This is going to set all values to 1
            #patron_df['Departed_queue'] = 1
            leavers += 1
            waiting -= 1

IndentationError: unexpected indent (<ipython-input-23-f3421aa4e254>, line 2)

In [38]:
hours = 10
for minute in range(hours * 60):
    computers_in_use = 0
    if minute not in (counts.index.values):
        patrons_this_minute = 0
    else:
        patrons_this_minute = counts[minute]
    print(minute, patrons_this_minute)

0 1
1 0
2 0
3 0
4 0
5 1
6 0
7 0
8 1
9 1
10 0
11 0
12 0
13 1
14 0
15 1
16 0
17 1
18 0
19 2
20 0
21 1
22 0
23 0
24 0
25 1
26 0
27 1
28 0
29 0
30 0
31 0
32 1
33 1
34 2
35 0
36 0
37 0
38 0
39 0
40 0
41 0
42 0
43 0
44 1
45 0
46 0
47 0
48 0
49 1
50 1
51 0
52 0
53 0
54 1
55 0
56 0
57 0
58 0
59 0
60 0
61 0
62 1
63 0
64 0
65 2
66 0
67 0
68 1
69 0
70 0
71 1
72 0
73 0
74 1
75 0
76 0
77 1
78 0
79 0
80 1
81 1
82 2
83 1
84 0
85 0
86 0
87 0
88 1
89 0
90 0
91 0
92 2
93 1
94 0
95 2
96 0
97 0
98 0
99 0
100 0
101 0
102 2
103 0
104 0
105 0
106 1
107 0
108 1
109 1
110 1
111 1
112 0
113 0
114 0
115 1
116 2
117 0
118 0
119 0
120 2
121 0
122 0
123 2
124 2
125 1
126 0
127 1
128 0
129 1
130 1
131 0
132 1
133 0
134 2
135 0
136 0
137 0
138 0
139 0
140 0
141 0
142 1
143 0
144 1
145 0
146 0
147 0
148 3
149 1
150 1
151 0
152 2
153 1
154 1
155 1
156 0
157 0
158 0
159 0
160 0
161 0
162 0
163 0
164 2
165 0
166 1
167 2
168 0
169 0
170 0
171 1
172 2
173 0
174 1
175 1
176 1
177 1
178 2
179 0
180 2
181 1
182 3
183 1
184 1


In [29]:
print(patrons_this_minute)

1


In [32]:
hours_open = 10
computers_available = 20
for minute in range(hours_open * 60):
    computers_in_use = 0
    # If 0 patrons arrived at this minute, skip ahead. Otherwise... count how many patrons arrived.
    if minute in counts.index.values:
        patrons_this_minute = counts[minute]
        for patrons in range(patrons_this_minute):
            # UPDATE COMPUTER USAGE
            if computers_in_use < computers_available:
                patron_df['Arrival_minute']
                #patron_df.loc[[patron_df['Arrival_minute'] == minute], ['Got_computer_minute']] = minute
                #patron_df['Got_computer_minute'] = minute
                #patron_df['Wait_time'] = minute - patron_df['Arrival_minute']
                computers_in_use += 1

ValueError: PandasArray must be 1-dimensional.