# Analyze Hemingway's known lending library activity


In [1]:
import pandas as pd

# add project root to path so we can import utility methods
import sys
sys.path.append('..')

from utils.missing_data_processing import load_initial_data, preprocess_events_data
from utils.charts import save_altair_chart, raincloud_plot


In [2]:
from importlib import reload
import utils.charts
reload(utils.charts)

from utils.charts import save_altair_chart, raincloud_plot


## Load event data

In [3]:
# for hemingway borrowing, we only need events
events_df, _, _, _ = load_initial_data()
# preprocess to split shared account members and generate short ids for members and items
events_df = preprocess_events_data(events_df)

## Hemingway's borrowing

Filter to borrow events for Hemingway only based on item type and Hemingway's short member id, then limit to the fields that are relevant for this analysis.

In [4]:
# get hemingway's borrow events
hemingway_borrows = events_df[(events_df.member_id == 'hemingway-ernest') & (events_df.event_type == 'Borrow')]
len(hemingway_borrows)

117

In [5]:
# limit to fields we care about
hemingway_borrows = hemingway_borrows[['start_date', 'end_date', 'item_title', 'item_authors', 'item_year', 'borrow_duration_days']]
hemingway_borrows.head()

Unnamed: 0,start_date,end_date,item_title,item_authors,item_year,borrow_duration_days
338,1927-11,1927-12-07,The Bridge of San Luis Rey,"Wilder, Thornton",1927.0,
551,1938-11,,The Education of Hyman Kaplan,"Rosten, Leo",1937.0,
552,1938-12,,New Masses,,,
5629,1925-10-08,,Mammonart: An Essay in Economic Interpretation,"Sinclair, Upton",1925.0,
5643,1925-10-12,1925-10-27,Sentimental Education,"Flaubert, Gustave",1869.0,15.0


### Distribution of borrow durations

How long did Hemingway typically borrow books?


In [6]:
# generate labels for tooltip display 
def item_label(row):
    # label item based on title, authors, and year; author and year may not be present
    text = row.item_title
    if not pd.isna(row.item_authors):
        # could be multiple authors; split them
        authors_firstlast = []
        for author in row.item_authors.split(';'):
            # author names are formatted lastname, first. split and reverse for display
            name_parts = author.split(', ', 1)
            name_parts.reverse()
            authors_firstlast.append(" ".join(name_parts))
        text = f"{text} by {', '.join(authors_firstlast)}"
    if not pd.isna(row.item_year):
        text = f"{text} ({int(row.item_year)})"
    return text

def borrow_label(row):
    # generate a borrow label from start and end date; may not both be known
    start, end = "-", "-"
    if not pd.isna(row.start_date):
        start = row.start_date
    
    if not pd.isna(row.end_date):
        end = row.end_date
    
    return f"{start} to {end}"

hemingway_borrows['item'] = hemingway_borrows.apply(item_label, axis=1)
hemingway_borrows['dates'] = hemingway_borrows.apply(borrow_label, axis=1)
# copy and format borrow duration days for readable version of tooltip
hemingway_borrows["days out"] = hemingway_borrows.borrow_duration_days.apply(lambda x: "-" if pd.isna(x) else int(x))
hemingway_borrows.head()

Unnamed: 0,start_date,end_date,item_title,item_authors,item_year,borrow_duration_days,item,dates,days out
338,1927-11,1927-12-07,The Bridge of San Luis Rey,"Wilder, Thornton",1927.0,,The Bridge of San Luis Rey by Thornton Wilder ...,1927-11 to 1927-12-07,-
551,1938-11,,The Education of Hyman Kaplan,"Rosten, Leo",1937.0,,The Education of Hyman Kaplan by Leo Rosten (1...,1938-11 to -,-
552,1938-12,,New Masses,,,,New Masses,1938-12 to -,-
5629,1925-10-08,,Mammonart: An Essay in Economic Interpretation,"Sinclair, Upton",1925.0,,Mammonart: An Essay in Economic Interpretation...,1925-10-08 to -,-
5643,1925-10-12,1925-10-27,Sentimental Education,"Flaubert, Gustave",1869.0,15.0,Sentimental Education by Gustave Flaubert (1869),1925-10-12 to 1925-10-27,15


In [18]:
# generate a raincloud plot with a tooltip for hover interaction
import altair as alt

# TEMP
import utils.charts
from importlib import reload
reload(utils.charts)
from utils.charts import raincloud_plot


hemingway_raincloud = raincloud_plot(
    hemingway_borrows, 'borrow_duration_days', 'Borrow duration in days',
    tooltip=alt.Tooltip(['item', 'dates', 'days out']))
hemingway_raincloud = hemingway_raincloud.properties(title='Distribution of borrow duration for Ernest Hemingway')
hemingway_raincloud

ValueError: item encoding field is specified without a type; the type cannot be inferred because it does not match any column in the data.

alt.VConcatChart(...)

In [None]:
# save the chart as high res static image
save_altair_chart(hemingway_raincloud, "fig 12 - hemingway_borrow_duration_raincloud_plot.png")

In [None]:
# export to html for embedding interactive version
hemingway_raincloud.save("hemingway_borrow_duration_raincloud_plot.html")