In [1]:
# Use loans, our inventory table, and a dump of item types from MyTurn to generate statistics about each "Item Type" we have inventory for.
#
# The "Item Type" is MyTurn's way of bucketing items. You may have many difference sizes
# of box wrench, but they'll all have "Box Wrench" as the "Item Type", allowing us to
# understand trends/loan categories as a group.
import pandas as pd

def itertuples(item_type_df):
    return item_type_df.rename(
        columns={'Type': 'name', 'Parent Type': 'parent'},
    ).itertuples(index=False)

raw_item_types = pd.read_csv(
    'data/input/item-types.csv',
    # Skip the "Full Hierarchy" column since we don't use it at the moment
    usecols=('Type', 'Parent Type')
)
# Remove 'root' row, since it's a bit weird
item_types = raw_item_types.iloc[1:]
item_types

Unnamed: 0,Type,Parent Type
1,Tools,root
2,Hand Tools,Tools
3,Shovels & Digging,Hand Tools
4,Spade Shovel,Shovels & Digging
5,Square Shovel,Shovels & Digging
...,...,...
2380,Office & Industrial,root
2381,Cash Registers,Office & Industrial
2382,Cubicles,Office & Industrial
2383,More,root


In [2]:
# For each "Item Type", create a list that includes itself and all descendants
from collections import defaultdict

descendants = defaultdict(lambda: set())

for item_type in itertuples(item_types):
    # Add itself for convenience
    descendants[item_type.name].add(item_type.name)
    # Add the child to it's parent's descendants list.
    descendants[item_type.parent].add(item_type.name)
    
# Remove the defaultdict, so we don't accidentally access keys that don't exist
descendants = dict(descendants)
descendants

{'Tools': {'Aerial Equipment',
  'Air & Pneumatic Tools',
  'Animal & Rodent Control',
  'Automotive Tools',
  'Bike Tools',
  'Chains, Ropes, and Straps',
  'Farm Tools',
  'Fencing Tools',
  'Flooring Tools',
  'Garden Tools',
  'Hand Tools',
  'Heavy Equipment',
  'Magnifiers',
  'Meters & Diagnostics',
  'Plumbing Tools',
  'Power Tools',
  'Propane & Gas Powered Tools',
  'Safety Equipment',
  'Soil Augers',
  'Tool Kits',
  'Tools',
  'Tree & Orchard Care Tools',
  'Welding',
  'Wheelbarrows & Wagons'},
 'root': {'Art',
  'Assistive Technology',
  'Books & Media',
  'Clothing',
  'Education',
  'Electrical Equipment',
  'Electronics',
  'Emergency Response',
  'Furniture & Spaces',
  'Hobbies & Crafts',
  'Kids & Baby',
  'Kitchen & Dining',
  'Medical Equipment',
  'More',
  'Music',
  'Office & Industrial',
  'Party & Events',
  'Science & Engineering',
  'Sports & Outdoors',
  'Tools',
  'Toys & Games',
  'Vehicles & Trailers'},
 'Hand Tools': {'Augers & Drain Openers',
  'Axe

In [3]:
import pandas as pd

inventory = pd.read_pickle('data/output/inventory.pkl')
loans = pd.read_pickle('data/output/loans.pkl')

In [4]:
# Generate the report
import pandas as pd

# An empty list of the result
rows = []

# Index loans by "Item ID" so we can speed up lookups below
loans_by_item_id = loans.set_index('Item ID')

for item_type in itertuples(item_types):
    # Pull a list of items for this item_type or its descendants
    items = inventory[
        inventory['Item Type'].isin(descendants[item_type.name])
    ]
    item_type_count = len(descendants[item_type.name])
    count = items['Item ID'].count()
    disabled_count = items[items['Disabled'] | items['In Maintenance'] | items['Wish List']]['Item ID'].count()
    active_count = count - disabled_count

    # Determine the maximum number of items checked out at once.
    #
    # This is easier than it sounds. We just grab all the check-in/out dates and increment the check-out
    # count when we encounter a check-out, decrement on a check-in. It doesn't matter which item specifically
    # is checked-out/checked-in.
    item_loans = loans_by_item_id[loans_by_item_id.index.isin(items['Item ID'])]
    
    loan_dates = []
    for _, loan in item_loans.iterrows():
        loan_dates.append({'type': 'CHECKOUT', 'date': loan['Checked Out']})
        
        if pd.notna(loan['Checked In']):
            # After 6 months, treat the item as lost (non concurrently checked out)
            loan_dates.append({
                'type': 'CHECKIN',
                'date': min(loan['Checked In'], loan['Checked Out'] + pd.Timedelta(weeks=25))
            })

    # Sort them in ascending order by date
    loan_dates.sort(key=lambda d: d['date'])
    
    max_checked_out = 0
    max_checked_out_date = None
    checked_out = 0
    for ld in loan_dates:
        if ld['type'] == 'CHECKOUT':
            checked_out += 1
            if checked_out > max_checked_out:
                max_checked_out = checked_out
                max_checked_out_date = ld['date']
        else:
            checked_out -= 1
            
    rows.append({
        'Item Type': item_type.name, 'Types Of Items': item_type_count,
        'Item Count': count, 'Disabled Item Count': disabled_count, 'Active Item Count': active_count,
        'Max Checked Out': max_checked_out, 'Max Checked Out Date': max_checked_out_date
    })

item_type_stats = pd.DataFrame(rows)
item_type_stats = item_type_stats.set_index(keys=['Item Type'])

item_type_stats

Unnamed: 0_level_0,Types Of Items,Item Count,Disabled Item Count,Active Item Count,Max Checked Out,Max Checked Out Date
Item Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Tools,24,267,105,162,48,2024-01-25 11:12:00
Hand Tools,80,1355,631,724,209,2024-01-18 18:56:00
Shovels & Digging,10,56,18,38,23,2024-01-20 12:11:00
Spade Shovel,1,13,7,6,3,2018-03-21 15:09:00
Square Shovel,1,15,4,11,6,2024-01-15 19:55:00
...,...,...,...,...,...,...
Office & Industrial,3,2,1,1,1,2022-11-22 19:53:00
Cash Registers,1,0,0,0,0,NaT
Cubicles,1,1,1,0,1,2022-11-22 19:53:00
More,2,3,1,2,0,NaT


In [5]:
# Add a column representing the "utilization" of the item.
#
# For now, we're counting utilization as the maximum number checked out ever, divided by the current number of active items.

with_inventory = item_type_stats[item_type_stats['Item Count'] > 0]

with_utilization = pd.DataFrame(with_inventory)
# Round percentages to the nearest whole number for simplicity
with_utilization['Utilization'] = (100 * with_utilization['Max Checked Out'] / with_utilization['Active Item Count']).round(1)

with_utilization.sort_values(by=['Utilization'], ascending=False)

Unnamed: 0_level_0,Types Of Items,Item Count,Disabled Item Count,Active Item Count,Max Checked Out,Max Checked Out Date,Utilization
Item Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Awl,1,3,3,0,1,2017-05-24 20:56:00,inf
Glassware,1,1,1,0,1,2018-02-17 12:04:00,inf
Camping Tarps,1,3,3,0,2,2018-04-21 09:53:00,inf
Hammer Drills,1,3,3,0,2,2017-06-17 09:26:00,inf
Audio Recording Gear,4,1,1,0,1,2019-05-15 20:21:00,inf
...,...,...,...,...,...,...,...
Stage & Screen,9,1,1,0,0,NaT,
Other Props,1,1,1,0,0,NaT,
Medical Braces,8,2,2,0,0,NaT,
Knee Braces,1,1,1,0,0,NaT,


In [6]:
# Print the item_type_stats with the "Utilization" column in the order the "Item Type"s are listed in the
# original data/input/item_types.csv (ordered in depth first search order).

with_utilization.to_csv('data/output/item_type_stats.csv')
with_utilization.to_pickle('data/output/item_type_stats.pkl')

In [7]:
with_utilization.sort_values(by='Utilization', ascending=False).to_csv('data/output/temp_item_type_stats_sorted.csv')

In [8]:
# This table is challenging to use.
#
# * The peak utilization is sometimes a weird checkout - like the one user who checked out 3 hatches at once and got robbed.
# * Very specific item categories cause a lot of noise ("Crowbar With Chisel Ends"). I don't actually care if we're out of
#   these, just whether users can get a crowbar. Not sure if I should make a list of these and exclude/mark them, if there's
#   an underlying problem with MyTurn's Item Types, or if I should hide/group this.

In [9]:
with_utilization.loc[['Hammer Drills']]

Unnamed: 0_level_0,Types Of Items,Item Count,Disabled Item Count,Active Item Count,Max Checked Out,Max Checked Out Date,Utilization
Item Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Hammer Drills,1,3,3,0,2,2017-06-17 09:26:00,inf


In [10]:
# TODO: Is this actually useful?
#
# * When sub-item types are similar, it can raise a false alarm (no "Crowbars", but plenty of "Crowbars With Chisel Ends")