In [None]:
import pandas as pd

inventory = pd.read_pickle('/home/max/chtl/data/processed/inventory.pkl')
loans = pd.read_pickle('/home/max/chtl/data/processed/loans.pkl')
item_types = pd.read_pickle('/home/max/chtl/data/processed/item-types.pkl')

In [None]:
from collections import defaultdict

# For each "Item Type", create a list that includes itself and all descendants
descendants = defaultdict(lambda: set())

for ix, item_type in item_types.iterrows():
    # Add itself for convenience
    descendants[item_type['Type']].add(item_type['Type'])
    # Add the child to it's parent's descendants list.
    descendants[item_type['Parent Type']].add(item_type['Type'])
    
# Remove the defaultdict, so we don't accidentally access keys that don't exist
descendants = dict(descendants)
descendants

In [None]:
# Use loans, our inventory table, and a dump of item types from MyTurn to generate statistics about each "Item Type" we have inventory for.
#
# The "Item Type" is MyTurn's way of bucketing items. You may have many difference sizes
# of box wrench, but they'll all have "Box Wrench" as the "Item Type", allowing us to
# understand trends/loan categories as a group.
import pandas as pd

# An empty list of the result
rows = []

# Index loans by "Item ID" so we can speed up lookups below
loans_by_item_id = loans.set_index('Item ID')

for ix, item_type in item_types.iterrows():
    # Pull a list of items for this item_type or its descendants
    items = inventory[
        inventory['Item Type'].isin(descendants[item_type['Type']])
    ]
    item_type_count = len(descendants[item_type['Type']])
    count = items['Item ID'].count()
    disabled_count = items[items['Disabled'] | items['In Maintenance'] | items['Wish List']]['Item ID'].count()
    active_count = count - disabled_count

    # Determine the maximum number of items checked out at once.
    #
    # This is easier than it sounds. We just grab all the check-in/out dates and increment the check-out
    # count when we encounter a check-out, decrement on a check-in. It doesn't matter which item specifically
    # is checked-out/checked-in.
    item_loans = loans_by_item_id[loans_by_item_id.index.isin(items['Item ID'])]
    
    loan_dates = []
    for _, loan in item_loans.iterrows():
        loan_dates.append({'type': 'CHECKOUT', 'date': loan['Checked Out']})
        
        if pd.notna(loan['Checked In']):
            # After 6 months, treat the item as lost (non concurrently checked out)
            loan_dates.append({
                'type': 'CHECKIN',
                'date': min(loan['Checked In'], loan['Checked Out'] + pd.Timedelta(weeks=25))
            })

    # Sort them in ascending order by date
    loan_dates.sort(key=lambda d: d['date'])
    
    max_checked_out = 0
    max_checked_out_date = None
    checked_out = 0
    for ld in loan_dates:
        if ld['type'] == 'CHECKOUT':
            checked_out += 1
            if checked_out > max_checked_out:
                max_checked_out = checked_out
                max_checked_out_date = ld['date']
        else:
            checked_out -= 1
            
    rows.append({
        'Item Type': item_type['Type'], 'Types Of Items': item_type_count,
        'Item Count': count, 'Disabled Item Count': disabled_count, 'Active Item Count': active_count,
        'Max Checked Out': max_checked_out, 'Max Checked Out Date': max_checked_out_date
    })

item_type_stats = pd.DataFrame(rows)
item_type_stats = item_type_stats.set_index(keys=['Item Type'])

item_type_stats

In [None]:
# Add a column representing the "utilization" of the item.
#
# For now, we're counting utilization as the maximum number checked out ever, divided by the current number of active items.

with_inventory = item_type_stats[item_type_stats['Item Count'] > 0]

with_utilization = pd.DataFrame(with_inventory)
# Round percentages to the nearest whole number for simplicity
with_utilization['Utilization'] = (100 * with_utilization['Max Checked Out'] / with_utilization['Active Item Count']).round(1)

with_utilization.sort_values(by=['Utilization'], ascending=False)

In [None]:
# Print the item_type_stats with the "Utilization" column in the order the "Item Type"s are listed in the
# original data/input/item_types.csv (ordered in depth first search order).

with_utilization.to_csv('/home/max/chtl/data/reports/item_type_stats.csv')
with_utilization.to_pickle('/home/max/chtl/data/reports/item_type_stats.pkl')

In [None]:
with_utilization.sort_values(by='Utilization', ascending=False).to_csv('/home/max/chtl/data/reports/temp_item_type_stats_sorted.csv')

In [None]:
# This table is challenging to use.
#
# * The peak utilization is sometimes a weird checkout - like the one user who checked out 3 hatches at once and got robbed.
# * Very specific item categories cause a lot of noise ("Crowbar With Chisel Ends"). I don't actually care if we're out of
#   these, just whether users can get a crowbar. Not sure if I should make a list of these and exclude/mark them, if there's
#   an underlying problem with MyTurn's Item Types, or if I should hide/group this.

In [None]:
with_utilization.loc[['Hammer Drills']]

In [None]:
# TODO: Is this actually useful?
#
# * When sub-item types are similar, it can raise a false alarm (no "Crowbars", but plenty of "Crowbars With Chisel Ends")

In [None]:
# Okay, let's redo. Go through items and group, not by item types.abs

# Use loans, our inventory table, and a dump of item types from MyTurn to generate statistics about each "Item Type" we have inventory for.
#
# The "Item Type" is MyTurn's way of bucketing items. You may have many difference sizes
# of box wrench, but they'll all have "Box Wrench" as the "Item Type", allowing us to
# understand trends/loan categories as a group.
import pandas as pd
from dataclasses import dataclass

@dataclass
class ItemTypeCounts:
    item_count: int
    loan_count: int

# An empty list of the result
rows = []

# Index loans by "Item ID" so we can speed up lookups below
loans_by_item_id = loans.set_index('Item ID')


item_type_counts = defaultdict(lambda: ItemTypeCounts(0, 0))

for ix, item in inventory.iterrows():
    # Determine the maximum number of items checked out at once.
    #
    # This is easier than it sounds. We just grab all the check-in/out dates and increment the check-out
    # count when we encounter a check-out, decrement on a check-in. It doesn't matter which item specifically
    # is checked-out/checked-in.
    loan_count = 0
    if item['Item ID'] in loans_by_item_id:
        loan_count = len(loans_by_item_id[item['Item ID']])
    
    c = item_type_counts[item['Item Type']]
    c.item_count += 1
    c.loan_count += len(item_loans)


for item_type, counts in item_type_counts.items():
    rows.append({
        'Item Type': item_type,
        'Item Count': counts.item_count,
        'Loan Count': counts.loan_count
    })
simple_stats = pd.DataFrame(rows)
simplestats = simple_stats.set_index(keys=['Item Type'])

In [None]:
simple_stats.to_csv('/home/max/chtl/data/reports/simple_stats.csv')
