# 1. Peak Period Analysis & Staffing Recommendations
   Objective \
   Determine the busiest time windows (e.g., by order_seated_at_local or bill_paid_at_local) at
   each venue and recommend optimal staffing levels to improve service efficiency


In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
venues = pd.read_csv("../data/CxCData/venues.csv")
venues

Unnamed: 0,venue_xref_id,concept,city,country,start_of_day_offset
0,ea74268311cfcc47d2b4c38ef08b9dab0117226a9a99a9...,FAST_FOOD,Albuquerque,US,00:00:00
1,e7cec41c46a9706ba1a702b368be5431913b6dc9c9da49...,FAMILY_DINING,Brooklyn,US,00:00:00
2,e63e79791883b0c4ac71d41eaee727932c13d6a4ec8f78...,FAST_CASUAL,Windsor,CA,00:00:00
3,c100388c9328b30b6bdfbea43f113d367ae9d32b03e84b...,BAR,Burnaby,CA,01:00:00
4,a819ee7f83f27dd3d7f44515e22aca3c26e6704ddd2a9b...,FAMILY_DINING,Tisdale,CA,00:00:00
...,...,...,...,...,...
596,cfd1fa6e8f122e93b1cb02e7b7541749335e7088095f8e...,,Toronto,CA,00:00:00
597,75e7251298f2d62406dbfe1a11011cffdad90b27704687...,,Richardson,US,00:00:00
598,40c38939612f59ad418569e94a075935576bff0449c7f8...,FAST_CASUAL,Guelph,CA,09:00:00
599,2ed5bf26505a6064d09b40bacdb6fccc9665c3de029494...,,Mississauga,CA,00:00:00


In [11]:
any(venues['venue_xref_id'].isnull())

False

In [12]:
any(venues['concept'].isnull())

True

In [13]:
any(venues['city'].isnull())

False

In [14]:
any(venues['country'].isnull())

False

In [16]:
any(venues['start_of_day_offset'] == '')

False

In [None]:
venues['concept'].unique()

array(['FAST_FOOD', 'FAMILY_DINING', 'FAST_CASUAL', 'BAR', 'CAFE',
       'SPORTS_CLUB', nan, 'FINE_DINING', 'BREWERY', 'POP_UP', 'BAKERY',
       'BUFFET', 'HOTEL', 'ENTERTAINMENT_COMPLEX', 'FOOD_TRUCK'],
      dtype=object)

In [6]:
venues.loc[pd.isna(venues['concept'])]

Unnamed: 0,venue_xref_id,concept,city,country,start_of_day_offset
19,6a4d65d8ac11e9a04fd36823e5cda38c632a3821da3e0e...,,Caguas,US,00:00:00
35,57d1ec857c955b4b5ae8313778fcee23fa638a805ea920...,,Niles Township,US,00:00:00
81,abc9f91be5b16b673b527fc797b4ce1504f81ea000d907...,,New York,US,08:00:00
83,4b06b0aaa301e1017dfa4da7baaa6ab334f29bcb221164...,,Dartmouth,CA,00:00:00
90,1d7751ba58ff82e365f1b961977f82537011a71d9b13b6...,,Charlottetown,CA,00:00:00
...,...,...,...,...,...
590,77ef1033c7249d619411ef17a85a8fbe51f2447741e640...,,Bathurst,CA,12:00:00
594,748fa11184636bc3a5de05eca1b969ace3d55e60f8bca3...,,Sherwood Park,CA,00:00:00
596,cfd1fa6e8f122e93b1cb02e7b7541749335e7088095f8e...,,Toronto,CA,00:00:00
597,75e7251298f2d62406dbfe1a11011cffdad90b27704687...,,Richardson,US,00:00:00


In [None]:
venues['start_of_day_offset'].unique()

array(['00:00:00', '01:00:00', '04:00:00', '05:00:00', '02:00:00',
       '17:00:00', '11:00:00', '10:00:00', '06:00:00', '07:00:00',
       '03:00:00', '08:00:00', '09:00:00', '12:00:00', '15:00:00',
       '14:00:00', '16:00:00', '23:00:00'], dtype=object)

In [16]:
venues.loc[venues['start_of_day_offset'] == "00:00:00"]['concept'].unique()

array(['FAST_FOOD', 'FAMILY_DINING', 'FAST_CASUAL', 'CAFE', 'SPORTS_CLUB',
       nan, 'BAR', 'POP_UP', 'FINE_DINING', 'BREWERY', 'BAKERY', 'BUFFET',
       'HOTEL', 'FOOD_TRUCK'], dtype=object)

In [18]:
venues.loc[venues['start_of_day_offset'] == "00:00:00"]['concept'].unique().shape, venues['concept'].unique().shape

((14,), (15,))

It seems like ENTERTAINMENT_COMPLEX is the only one that doesn't start at 12 AM.

In [19]:
venues.loc[venues['concept'] == "ENTERTAINMENT_COMPLEX"]['start_of_day_offset'].unique()

array(['05:00:00'], dtype=object)

This makes sense now. Only 5 AM.

In [21]:
venues.loc[venues['start_of_day_offset'] == "01:00:00"]['concept'].unique()

array(['BAR', 'FAMILY_DINING', 'CAFE', nan, 'FAST_CASUAL'], dtype=object)

In [26]:
venues.loc[venues['concept'] == "BAR"]['start_of_day_offset'].unique()

array(['01:00:00', '00:00:00', '11:00:00', '06:00:00', '04:00:00',
       '07:00:00', '02:00:00', '10:00:00', '09:00:00', '03:00:00',
       '05:00:00', '08:00:00', '12:00:00', '15:00:00', '16:00:00',
       '23:00:00'], dtype=object)

In [36]:
np.setdiff1d(venues['start_of_day_offset'].unique(), venues.loc[venues['concept'] == "BAR"]['start_of_day_offset'].unique())

array(['14:00:00', '17:00:00'], dtype=object)

Nothing interesting here tbh

In [21]:
from ydata_profiling import ProfileReport
ProfileReport(venues, title='Venue Report')

Summarize dataset: 100%|██████████| 14/14 [00:00<00:00, 55.02it/s, Completed]                     
Generate report structure: 100%|██████████| 1/1 [00:01<00:00,  1.19s/it]
Render HTML: 100%|██████████| 1/1 [00:00<00:00,  6.31it/s]




In [3]:
from ydata_profiling import ProfileReport
ProfileReport(venues, title='Venue Report').to_file('venue_report.html')

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
venues['city'].unique()

array(['Albuquerque', 'Brooklyn', 'Windsor', 'Burnaby', 'Tisdale',
       'CALGARY', 'Edmonton', 'Worth Township', 'Toronto', 'Payson',
       'Victoria', 'Hereford', 'Ottawa', 'Surrey', 'Cave Creek',
       'Seattle', 'Caguas', 'Petty Harbour-Maddox Cove', 'Vancouver',
       'Brook', 'Fredonia', 'Halton Hills', 'Menomonie', 'Hialeah',
       'Union City', 'Omaha', 'West Albany Township', 'Grand Rapids',
       'La Salle', 'Marion', 'Niles Township', 'Ucluelet', 'Ocean City',
       'Columbus', 'Barrie', 'Bristow', 'Mississauga', 'Porters Lake',
       'Thousand Oaks', 'Rincon', 'Prince George', 'Severn', 'McAlester',
       'Auburn', 'Hamilton', 'Easton', 'Goodfield', 'Red Deer',
       'St. Catharines', 'Richboro', 'Fairfield', 'Burlington',
       'Winnipeg', 'Sevierville', 'Orange', 'Brewster', 'Denver',
       'Dartmouth', 'Metairie', 'Warwick', 'New York', 'Colorado City,',
       'Ocoee', 'Guelph', 'Fayetteville', 'Houston', 'Murray',
       'Charlottetown', 'Port Elgin', 'Char

In [16]:
venues['city'].unique().shape

(387,)