# Test Things Out Here

In [None]:
# investigate weird 2023 loan data
from datetime import date
import pandas as pd

loans = pd.read_pickle('data/output/loans.pkl')
loans['Length'] = loans['Checked In'].fillna(pd.to_datetime(date.today())) - loans['Checked Out']
loans['Year'] = loans['Checked Out'].map(lambda c: c.year)
# Exclude 2015 (tool library not fully opened yet) and the current year if we're not that far in yet
loans_with_complete_years = loans[~loans['Year'].isin([2015])]

print('How Many Days Is An Item Typically Loaned For?')
loans_with_complete_years.groupby('Year').describe(
    include='timedelta', percentiles=[.25, .5, .75, .95, .99]
# Drop the 'count' column here, we have it elsewhere. Round each ovservation to the nearest day.
)['Length'].drop(columns='count').map(lambda t: t.round(freq='D'))

In [None]:
%config Completer.use_jedi = False

In [None]:
# How do "successful" loans trend over time?
#
# Let's call the "success" rate the number of loans which are returned within a month.
# TODO: This doesn't handle longer checkout periods well.
from pandas import Timedelta, Timestamp
from datetime import date, datetime, timedelta

loans['Returned Within Month'] = (loans['Checked In'].fillna(Timestamp(2030, 1, 1)) - loans['Checked Out']) <= Timedelta(31, unit='D')
loans['Returned Within 2 Months'] = (loans['Checked In'].fillna(Timestamp(2030, 1, 1)) - loans['Checked Out']) <= Timedelta(60, unit='D')
loans['Month'] = loans['Checked Out'].map(lambda d: date(d.year, d.month, 1))
loans = loans[loans['Checked Out'] < (datetime.now() - timedelta(days=31))]

with_counts = loans.groupby(['Month', 'Returned Within Month']).size().reset_index(name="Loan Count")
returned_within_month_counts = with_counts[with_counts['Returned Within Month']].groupby(['Month']).sum(['Loan Count'])['Loan Count']
total_counts = with_counts.groupby(['Month']).sum(['Loan Count'])['Loan Count']

returned_within_month_percents = (100 * returned_within_month_counts / total_counts).fillna(0).round(2)
returned_within_month_percents.plot(figsize=(12, 4), grid=True, title='Percentage Of Loans Returned Within 1 Month', xlabel='Year Month', ylabel="%")
# TODO: We don't have a way of seeing if loans are not returned by users but marked as returned. Maybe that's why things were higher pre-pandemic?
#
# We should see how high we can get it. 80% is terrible. Was adding an address, etc, discouraging people from not returning things? Did our reminder emails change?

In [None]:
loans.groupby(['Month']).size().plot(figsize=(12, 4), grid=True, title="Loan Counts By Month", xlabel='Year Month', ylabel='Count')