# Analyze Hemingway's known lending library activity


In [1]:
import pandas as pd

import sys
sys.path.append('..')

from utils.missing_data_processing import load_initial_data
from utils.save_altair_charts import save_chart


## Load event data

In [2]:
import os
os.path.exists("/Volumes/Samsung_T5/shxco-missingdata-specreading/data/source_data")

True

In [3]:
# for hemingway borrowing, we only need events
events_df = load_initial_data()

In [4]:
# split multiple members for shared accounts in events
events_df[
    ["first_member_uri", "second_member_uri"]
] = events_df.member_uris.str.split(";", expand=True)

# generate short form id for convenience
events_df["member_id"] = events_df.first_member_uri.apply(
    lambda x: x.split("/")[-2]
)
events_df["item_id"] = events_df.item_uri.apply(
    lambda x: x.split("/")[-2] if pd.notna(x) else None
)

# long-borrow overrides don't affect hemingway; skip applying corrections

## Hemingway's borrowing

In [5]:
# get hemingway's borrow events
hemingway_borrows = events_df[(events_df.member_id == 'hemingway-ernest') & (events_df.event_type == 'Borrow')]
len(hemingway_borrows)

117

### Generate a raincloud plot to show borrowing durations

Use borrowing duration in days.


In [6]:
import altair as alt

# create a density area plot of borrow duration in days
duration_density = alt.Chart(hemingway_borrows).transform_density(
    'borrow_duration_days',
    as_=['borrow_duration', 'density'],
).mark_area(orient='vertical').encode(
     x=alt.X('borrow_duration:Q', title=None, axis=alt.X(labels=False, ticks=False)), #, title='Borrow duration in days'),
     y=alt.Y(
        'density:Q',
        # stack='center',  # if centered, this would be a violin plot
        # suppress labels and ticks because we're going to combine this
        title=None,
        axis=alt.Axis(labels=False, values=[0],grid=False, ticks=False),
    ),
).properties(
    height=100,
    width=800
)

duration_density

In [7]:
# Create jitter plot of borrows
# jittering / stripplot adapted from https://stackoverflow.com/a/71902446/9706217
stripplot = alt.Chart(hemingway_borrows).mark_circle(size=15).encode(
    x=alt.X("borrow_duration_days", title='Borrow duration in days', axis=alt.Axis(labels=True)),
    y=alt.Y("jitter:Q", title=None, axis=None),
).transform_calculate(
    jitter="(random() / 200) - 0.0052"
).properties(
    height=100,
    width=800,
)
stripplot

In [10]:
# use vertical concat to combine the two plots together
raincloud_plot = alt.vconcat(duration_density, stripplot).properties(
    # title='Distribution of borrow duration for Ernest Hemingway'
  ).configure_concat(
    spacing=0
)
raincloud_plot

In [11]:
# save the chart
save_chart(raincloud_plot, "fig 12 - hemingway_borrow_duration_raincloud_plot.png")