<a href="https://colab.research.google.com/github/rlskoeser/shxco-missingdata-specreading/blob/main/missing-data/Sco_prophet_missingdata_weekly.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Predict Missing Members Data from Shakespeare and Company Project

This notebook 

## Setup Libraries and Load S&Co Data



In [1]:
try:
    import prophet
    print("Prophet is already installed.")
except ImportError:
    print("Prophet is not installed. Installing...")
    import subprocess
    subprocess.check_call(["pip", "install", "prophet"])

Prophet is already installed.


In [2]:
from datetimerange import DateTimeRange
import warnings
warnings.filterwarnings('ignore')
import altair as alt
alt.data_transformers.disable_max_rows()


import sys
sys.path.append('..')

from scripts.missing_data_processing import *
from scripts.forecasting_missing_data import *

In [4]:
events_df = load_initial_data()
logbook_events_df = generate_logbooks_events(events_df)
membership_events = generate_member_events(events_df)
logbook_gaps_df, logbooks_weekly_count, logbook_gaps = generate_logbook_gaps(logbook_events_df)
member_events, newmember_yearly_count, members_first_dates = generate_member_events(events_df)
members_first_events = member_events.groupby("member_id").first().reset_index()
newmember_subscriptions_by_year, newmember_subscriptions_by_week = generate_newmember_subscriptions(member_events, logbook_gaps)
subscription_first_events = member_events[member_events.source_type.str.contains('Logbook') & member_events.event_type.isin(['Subscription', 'Renewal'])].groupby("member_id").first().reset_index()

The 6 large gaps in the logbooks
	January 01 1928 to February 29 1928 (59 days)
	January 03 1930 to June 01 1930 (149 days)
	August 01 1930 to December 31 1930 (152 days)
	February 17 1931 to September 25 1932 (586 days)
	January 01 1937 to February 16 1938 (411 days)
	May 06 1938 to October 20 1938 (167 days)

The 5 small gaps in the logbooks that will be skipped
	October 30 1927 to November 01 1927 (2 days)
	February 07 1934 to February 08 1934 (1 day)
	January 01 1935 to January 11 1935 (10 days)
	December 29 1935 to January 01 1936 (3 days)
	August 29 1939 to September 12 1939 (14 days)


# Missing membership events

## Logbook / membership events by week

In [5]:
base = alt.Chart(logbooks_weekly_count).encode(
    alt.X('logbook_date:T', axis=alt.Axis(title='date'))
).properties(
    width=1200,
    height=275
)

line = base.mark_line().encode(
    alt.Y('total',
          axis=alt.Axis(title='total events per week'))
)
# draw rectangles to highlight logbook gaps
logbook_gaps_span = alt.Chart(logbook_gaps_df).encode(
     alt.X('start')
).properties(
    width=1200,
    height=275
).mark_rect(
    opacity=0.2, color="gray"
).encode(
    x='start',
    x2='end',
    y=alt.value(0),  # 0 pixels from bottom
    y2=alt.value(275)  # 300 pixels from top
)


line + logbook_gaps_span

# missing members

## member data setup

In [6]:
newmember_base = alt.Chart(newmember_yearly_count, title='new members by year').encode(
    alt.X('date:T', axis=alt.Axis(title='date'))
).properties(
    width=1200,
    height=275
)

newmember_line = newmember_base.mark_line().encode(
    alt.Y('total',
          axis=alt.Axis(title='new members by year'))
)
newmember_line

In [7]:
newmember_line + logbook_gaps_span

In [8]:
# confirm the number of rows in the df == the number of unique members included
len(members_first_events.member_id.unique())

5085

In [11]:
# what is the breakdown of event types for first events?
members_first_events.event_type.value_counts()

Subscription        4189
Renewal              550
Reimbursement        208
Borrow                91
Purchase              18
Separate Payment      12
Supplement            10
Gift                   3
Crossed out            1
Request                0
Name: event_type, dtype: int64

In [12]:
# what about source of first events
members_first_events.source_type.value_counts()

Logbook                                      2744
Logbook;Address Book                         1375
Address Book                                  445
Logbook;Lending Library Card                  242
Lending Library Card                          220
Lending Library Card;Address Book              22
Logbook;Lending Library Card;Address Book      19
Lending Library Card;Logbook                   15
Lending Library Card;Lending Library Card       2
Lending Library Card;Logbook;Address Book       1
Name: source_type, dtype: int64

### new members added, logbooks only

In [13]:
# get first events for each member from logbooks only

# go back to member events, limit to logbook events, then group and get first event
logbook_first_events = member_events[member_events.source_type.str.contains('Logbook')].groupby("member_id").first().reset_index()


In [14]:
# confirm one row per member
len(logbook_first_events.member_id.unique())

4597

In [15]:
# check source type breakdown
logbook_first_events.source_type.value_counts()

Logbook                                              2828
Logbook;Address Book                                 1412
Logbook;Lending Library Card                          309
Logbook;Lending Library Card;Address Book              25
Lending Library Card;Logbook                           20
Logbook;Lending Library Card;Lending Library Card       1
Logbook;Address Book;Lending Library Card               1
Lending Library Card;Logbook;Address Book               1
Name: source_type, dtype: int64

In [16]:
# get new member yearly count for logbook-sourced events only 
logbook_newmembers_by_year = logbook_first_events.groupby([pd.Grouper(key='date', freq='Y')])['member_id'].count().reset_index()
logbook_newmembers_by_year.rename(columns={'member_id': 'total'}, inplace=True)
logbook_newmembers_by_year

Unnamed: 0,date,total
0,1919-12-31,53
1,1920-12-31,193
2,1921-12-31,278
3,1922-12-31,322
4,1923-12-31,280
5,1924-12-31,325
6,1925-12-31,404
7,1926-12-31,436
8,1927-12-31,296
9,1928-12-31,289


In [17]:
logbook_newmember_base = alt.Chart(logbook_newmembers_by_year, title='new members by year').encode(
    alt.X('date:T', axis=alt.Axis(title='date'))
).properties(
    width=1200,
    height=275
)

logbook_newmember_line = logbook_newmember_base.mark_line(color="purple").encode(
    alt.Y('total',
          axis=alt.Axis(title='new members by year'))
)
newmember_line + logbook_newmember_line

### aggregate by month instead of year

confusing because totals and graphs in the dataset essay are monthly, it looks wrong

In [18]:
# get new member monthly count for logbook-sourced events only 
logbook_newmembers_by_month = logbook_first_events.groupby([pd.Grouper(key='date', freq='M')])['member_id'].count().reset_index()
logbook_newmembers_by_month.rename(columns={'member_id': 'total'}, inplace=True)

In [19]:
logbook_newmember_monthly_base = alt.Chart(logbook_newmembers_by_month, title='new members by month').encode(
    alt.X('date:T', axis=alt.Axis(title='date'))
).properties(
    width=1200,
    height=275
)

logbook_newmember_monthly_line = logbook_newmember_monthly_base.mark_line(color="purple", opacity=0.5).encode(
    alt.Y('total',
          axis=alt.Axis(title='new members by month'))
)

logbook_newmember_monthly_line

In [20]:
newmember_monthly_count = members_first_dates.groupby([pd.Grouper(key='date', freq='M')])['member_id'].count().reset_index()
newmember_monthly_count.rename(columns={'member_id': 'total'}, inplace=True)

In [21]:
newmember_monthly_base = alt.Chart(newmember_monthly_count, title='new members by month').encode(
    alt.X('date:T', axis=alt.Axis(title='date'))
).properties(
    width=1200,
    height=275
)

newmember_monthly_line = newmember_monthly_base.mark_line(opacity=0.5).encode(
    alt.Y('total',
          axis=alt.Axis(title='new members by month'))
)

newmember_monthly_line

### members only documented in address books

In [22]:
# what about members we _only_ know about from the address books?

# any member with at least one event in the logbooks
logbook_members = member_events[member_events.source_type.str.contains('Logbook')].member_id.unique()
# any member with at least one event on a card
lending_card_members = member_events[member_events.source_type.str.contains('Lending Library Card')].member_id.unique()
# any member with at least one event from an address book
addressbook_members = member_events[member_events.source_type.str.contains('Address Book')].member_id.unique()

# get a list of address-book-only members 
address_book_only_members = set(member_events.member_id.unique()) - set(logbook_members) - set(lending_card_members)
print('%d members who only have events from the address books' % len(address_book_only_members))


329 members who only have events from the address books


In [23]:
member_events[member_events.member_id.isin(address_book_only_members)].source_type.describe()

count              365
unique               1
top       Address Book
freq               365
Name: source_type, dtype: object

In [24]:
# get date added for address-only members

addressbook_first_events = member_events[member_events.member_id.isin(address_book_only_members)].groupby("member_id").first().reset_index()

In [25]:
# get new member yearly count for addressbook-only members events only 
addressbook_newmembers_by_year = addressbook_first_events.groupby([pd.Grouper(key='date', freq='Y')])['member_id'].count().reset_index()
addressbook_newmembers_by_year.rename(columns={'member_id': 'total'}, inplace=True)

In [26]:
addressbook_newmembers_by_year.total.sum()

329

### members only documented on lending cards

In [27]:
# do we have any lending-card only members?

# get a list of address-book-only members 
lending_card_only_members = set(member_events.member_id.unique()) - set(logbook_members) - set(addressbook_members)
print('%d members who only have events from lending library cards' % len(lending_card_only_members))


124 members who only have events from lending library cards


In [28]:
cardonly_members = member_events[member_events.member_id.isin(lending_card_only_members)]
cardonly_members['year'] = cardonly_members.date.apply(lambda x: x.year)
cardonly_members.year.unique()

array([1919, 1920, 1922, 1923, 1924, 1925, 1927, 1928, 1929, 1930, 1931,
       1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941])

In [29]:
# card-only member first events
cardonly_first_events = member_events[member_events.member_id.isin(lending_card_only_members)].groupby("member_id").first().reset_index()

In [30]:
# get new member yearly count for lending card-only members
cardonly_newmembers_by_year = cardonly_first_events.groupby([pd.Grouper(key='date', freq='Y')])['member_id'].count().reset_index()
cardonly_newmembers_by_year.rename(columns={'member_id': 'total'}, inplace=True)

### other members

In [31]:
# get all other members so we can generate stacked area chart properly

other_members = set(member_events.member_id.unique()) - set(lending_card_only_members) - set(address_book_only_members)
print('%d "regular" members (not addressbook or lending card only)' % len(other_members))
 

4632 "regular" members (not addressbook or lending card only)


In [32]:
# other member first events
other_member_first_events = member_events[member_events.member_id.isin(other_members)].groupby("member_id").first().reset_index()

In [33]:
# new member yearly count 
other_newmembers_by_year = other_member_first_events.groupby([pd.Grouper(key='date', freq='Y')])['member_id'].count().reset_index()
other_newmembers_by_year.rename(columns={'member_id': 'total'}, inplace=True)

In [34]:
# plot all three sets of new members together

# combine into a single df for plotting with altair

def combine_newmember_counts():
  # new_member_counts = newmember_yearly_count.copy()
  # new_member_counts['series'] = 'all events'

  # FIXME: this is not plotting the same thing as the others!
  # logbook_newmembers = logbook_newmembers_by_year.copy()
  # logbook_newmembers['series'] = 'logbook events only'

  other_newmembers = other_newmembers_by_year.copy()
  other_newmembers['series'] = 'all other members'

  addressbook_newmembers = addressbook_newmembers_by_year.copy()
  addressbook_newmembers['series'] = 'addressbook-only members'
  
  card_newmembers = cardonly_newmembers_by_year.copy()
  card_newmembers['series'] = 'card-only members'
  
  # combined_new_member_counts_df = pd.concat([new_member_counts, logbook_newmembers, addressbook_newmembers])
  combined_new_member_counts_df = pd.concat([other_newmembers, addressbook_newmembers, card_newmembers])

  return combined_new_member_counts_df

combine_newmember_counts_df = combine_newmember_counts()


### aggregate new members from source by month

In [35]:
# get new member monthly count for addressbook-only members events only 
addressbook_newmembers_by_month = addressbook_first_events.groupby([pd.Grouper(key='date', freq='M')])['member_id'].count().reset_index()
addressbook_newmembers_by_month.rename(columns={'member_id': 'total'}, inplace=True)

In [36]:
# get new member monthly count for lending card-only members
cardonly_newmembers_by_month = cardonly_first_events.groupby([pd.Grouper(key='date', freq='M')])['member_id'].count().reset_index()
cardonly_newmembers_by_month.rename(columns={'member_id': 'total'}, inplace=True)

In [37]:
# new member monthly count 
other_newmembers_by_month = other_member_first_events.groupby([pd.Grouper(key='date', freq='M')])['member_id'].count().reset_index()
other_newmembers_by_month.rename(columns={'member_id': 'total'}, inplace=True)

In [38]:
# combine into a single df for plotting with altair

def combine_newmember_monthly_counts():
  # new_member_counts = newmember_yearly_count.copy()
  # new_member_counts['series'] = 'all events'

  # FIXME: this is not plotting the same thing as the others!
  # logbook_newmembers = logbook_newmembers_by_year.copy()
  # logbook_newmembers['series'] = 'logbook events only'

  other_newmembers = other_newmembers_by_month.copy()
  other_newmembers['series'] = 'all other members'

  addressbook_newmembers = addressbook_newmembers_by_month.copy()
  addressbook_newmembers['series'] = 'addressbook-only members'
  
  card_newmembers = cardonly_newmembers_by_month.copy()
  card_newmembers['series'] = 'card-only members'
  
  # combined_new_member_counts_df = pd.concat([new_member_counts, logbook_newmembers, addressbook_newmembers])
  combined_new_member_counts_df = pd.concat([other_newmembers, addressbook_newmembers, card_newmembers])

  return combined_new_member_counts_df

combine_newmember_monthly_counts_df = combine_newmember_monthly_counts()


In [39]:
domain = ['addressbook-only members', 'card-only members', 'all other members']
range_ = ['#d7191c', '#fdae61', '#2c7bb6']

newmember_monthly_stacked = alt.Chart(combine_newmember_monthly_counts_df, title='new members by source').mark_area(opacity=0.5).encode(
    x="date:T",
    y="total",
    color=alt.Color("series", legend=alt.Legend(title="member group"), scale=alt.Scale(domain=domain, range=range_))
).properties(
    width=1200,
    height=275
)

newmember_monthly_stacked

### Fig - New members by month, based on source. (Blue line: new members by month from any source; purple line: new members based on logbook data only, i.e. first logbook events for members).

In [40]:
newmember_monthly_stacked + newmember_monthly_line + logbook_newmember_monthly_line + logbook_gaps_span

## new members by first subscription

In [41]:
# to model properly, we only expect membership to start with a subscription
# include renewals, since sometimes they were written down for each other

# go back to member events, limit to logbooks AND by event type, then group and get first event for each member
subscription_first_events = member_events[member_events.source_type.str.contains('Logbook') & member_events.event_type.isin(['Subscription', 'Renewal'])].groupby("member_id").first().reset_index()

In [42]:
len(subscription_first_events.member_id.unique())

4320

In [43]:
# exclude from gaps, just in case

subscription_first_events_nogaps = subscription_first_events.copy()

for i, gap in enumerate(logbook_gaps):
  gap_start = gap['start']
  gap_end = gap['end']
  subscription_first_events_nogaps = subscription_first_events_nogaps[~((subscription_first_events_nogaps.date >= gap_start) & (subscription_first_events_nogaps.date <= gap_end))]

len(subscription_first_events_nogaps.member_id.unique())

4315

In [44]:
# get new member yearly count based only on subscriptions
# newmember_subscriptions_by_year = subscription_first_events.groupby([pd.Grouper(key='date', freq='Y')])['member_id'].count().reset_index()

newmember_subscriptions_by_year = subscription_first_events_nogaps.groupby([pd.Grouper(key='date', freq='Y')])['member_id'].count().reset_index()
newmember_subscriptions_by_year.rename(columns={'member_id': 'total'}, inplace=True)

In [45]:
# get new member monthly count based only on subscriptions, so we can forecast with prophet
newmember_subscriptions_by_week = subscription_first_events_nogaps.groupby([pd.Grouper(key='date', freq='W')])['member_id'].count().reset_index()
newmember_subscriptions_by_week.rename(columns={'member_id': 'total'}, inplace=True)

In [46]:
# plot new subscriptions logbooks with gaps


newsubs_base = alt.Chart(newmember_subscriptions_by_week).encode(
    alt.X('date:T', axis=alt.Axis(title='date'))
).properties(
    width=1200,
    height=275
)

newsubs_line = newsubs_base.mark_line().encode(
    alt.Y('total',
          axis=alt.Axis(title='new subscriptions per week'))
)
newsubs_line + logbook_gaps_span

### use prophet to forecast missing subscriptions

#### plot the forecasts

In [47]:
post1932_date = pd.to_datetime(date(1932, 9, 27))
# Assuming `weekly_subscriptions` and `logbook_gaps` are defined elsewhere
forecasted_subscriptions = forecast_missing_subscriptions(newmember_subscriptions_by_week, logbook_gaps, post1932_date)

15:45:06 - cmdstanpy - INFO - Chain [1] start processing
15:45:06 - cmdstanpy - INFO - Chain [1] done processing
15:45:07 - cmdstanpy - INFO - Chain [1] start processing
15:45:07 - cmdstanpy - INFO - Chain [1] done processing
15:45:07 - cmdstanpy - INFO - Chain [1] start processing
15:45:07 - cmdstanpy - INFO - Chain [1] done processing
15:45:07 - cmdstanpy - INFO - Chain [1] start processing
15:45:07 - cmdstanpy - INFO - Chain [1] done processing
15:45:07 - cmdstanpy - INFO - Chain [1] start processing
15:45:07 - cmdstanpy - INFO - Chain [1] done processing
15:45:07 - cmdstanpy - INFO - Chain [1] start processing
15:45:07 - cmdstanpy - INFO - Chain [1] done processing


In [48]:
chart_height = 275
gap_areas = plot_gap_areas(logbook_gaps, chart_height, newmember_subscriptions_by_week, include_line=False)
plot_newsubs_weekly_forecast(forecasted_subscriptions, gap_areas, logbook_gaps, chart_height, post1932_date, newmember_subscriptions_by_week, show_model=True, separate_model_decades=False)

In [49]:
lognewsub_weeks_fcst = forecast_missing_subscriptions(newmember_subscriptions_by_week, logbook_gaps, post1932_date, train_all_data=True, return_prophet_model=False, use_weekly_growth_cap=True, use_total_growth_cap=True)

15:46:02 - cmdstanpy - INFO - Chain [1] start processing
15:46:02 - cmdstanpy - INFO - Chain [1] done processing
15:46:02 - cmdstanpy - INFO - Chain [1] start processing
15:46:02 - cmdstanpy - INFO - Chain [1] done processing
15:46:02 - cmdstanpy - INFO - Chain [1] start processing
15:46:02 - cmdstanpy - INFO - Chain [1] done processing
15:46:02 - cmdstanpy - INFO - Chain [1] start processing
15:46:02 - cmdstanpy - INFO - Chain [1] done processing
15:46:03 - cmdstanpy - INFO - Chain [1] start processing
15:46:03 - cmdstanpy - INFO - Chain [1] done processing
15:46:04 - cmdstanpy - INFO - Chain [1] start processing
15:46:04 - cmdstanpy - INFO - Chain [1] done processing
15:46:04 - cmdstanpy - INFO - Chain [1] start processing
15:46:04 - cmdstanpy - INFO - Chain [1] done processing


In [50]:
chart_height = 275
gap_areas = plot_gap_areas(logbook_gaps, chart_height, newmember_subscriptions_by_week, include_line=False)
plot_newsubs_weekly_forecast(lognewsub_weeks_fcst, gap_areas, logbook_gaps, chart_height, post1932_date, newmember_subscriptions_by_week, show_model=True, separate_model_decades=False)

## Simpler missing member estimate


In [51]:
total_accounts = len(events_df.member_uris.unique())
total_logbook_accounts = len(logbook_events_df.member_uris.unique())
total_logbook_events = logbook_events_df.shape[0]

# new member per logbook event
n_accounts_per_logbook_event = total_logbook_accounts / total_logbook_events

# estimated total number of logbook events
# from prophet:
#  13,955 events (upper 15,220; lower 12,358
# from KM estimate: 13,597

est_total_logbook_events = 13955
est_total_logbook_events_upper = 15220
est_total_logbook_events_lower = 12358


est_logbook_accounts = est_total_logbook_events * n_accounts_per_logbook_event
est_logbook_accounts_upper = est_total_logbook_events_upper * n_accounts_per_logbook_event
est_logbook_accounts_lower = est_total_logbook_events_lower * n_accounts_per_logbook_event


n_missing_accounts = int(est_logbook_accounts - total_accounts)
n_missing_accounts_upper = int(est_logbook_accounts_upper - total_accounts)
n_missing_accounts_lower = int(est_logbook_accounts_lower - total_accounts)


# how many accounts do we have 
percent_surviving_accounts = (total_accounts / est_logbook_accounts) * 100
percent_surviving_accounts_upper = (total_accounts / est_logbook_accounts_upper) * 100
percent_surviving_accounts_lower = (total_accounts / est_logbook_accounts_lower) * 100


print("""
total accounts: %d
total accounts from logbooks: %d:
total logbook events: %d
new member per logbook event: %.2f
est total members from logbooks: %.2f (upper %.2f, lower %.2f)
est missing accounts: %.2f (upper %.2f, lower %.2f)
percent covered: %.2f (upper %.2f, lower %.2f)
percent missing: 
""" % (
    total_accounts,
    total_logbook_accounts,
    total_logbook_events,
    n_accounts_per_logbook_event,
    # est total members
    est_logbook_accounts, est_logbook_accounts_upper, est_logbook_accounts_lower,
    n_missing_accounts, n_missing_accounts_upper, n_missing_accounts_lower,
    percent_surviving_accounts, percent_surviving_accounts_upper, percent_surviving_accounts_lower
))


total accounts: 5139
total accounts from logbooks: 4604:
total logbook events: 11601
new member per logbook event: 0.40
est total members from logbooks: 5538.21 (upper 6040.24, lower 4904.42)
est missing accounts: 399.00 (upper 901.00, lower -234.00)
percent covered: 92.79 (upper 85.08, lower 104.78)
percent missing: 



In [52]:
len(events_df.member_uris.unique())

5139

In [53]:
len(events_df.member_names.unique())  # use this one, since it includes titles and merges slightly fewer accounts

4720

In [54]:
len(events_df.member_sort_names.unique())

4714

In [55]:
def simple_missing_member_est_collapsed():
  # group by name instead of id
  total_accounts = len(events_df.member_names.unique())
  total_logbook_accounts = len(logbook_events_df.member_names.unique())
  total_logbook_events = logbook_events_df.shape[0]

  # new member per logbook event
  n_accounts_per_logbook_event = total_logbook_accounts / total_logbook_events

  # estimated total number of logbook events
  # from prophet:
  #  13,955 events (upper 15,220; lower 12,358
  # from KM estimate: 13,597

  est_total_logbook_events = 13955
  est_total_logbook_events_upper = 15220
  est_total_logbook_events_lower = 12358


  est_logbook_accounts = est_total_logbook_events * n_accounts_per_logbook_event
  est_logbook_accounts_upper = est_total_logbook_events_upper * n_accounts_per_logbook_event
  est_logbook_accounts_lower = est_total_logbook_events_lower * n_accounts_per_logbook_event


  n_missing_accounts = int(est_logbook_accounts - total_accounts)
  n_missing_accounts_upper = int(est_logbook_accounts_upper - total_accounts)
  n_missing_accounts_lower = int(est_logbook_accounts_lower - total_accounts)


  # how many accounts do we have 
  percent_surviving_accounts = (total_accounts / est_logbook_accounts) * 100
  percent_surviving_accounts_upper = (total_accounts / est_logbook_accounts_upper) * 100
  percent_surviving_accounts_lower = (total_accounts / est_logbook_accounts_lower) * 100


  print("""
  ** counting by unique name instead of member id**
  total accounts: %d
  total accounts from logbooks: %d:
  total logbook events: %d
  new member per logbook event: %.2f
  est total members from logbooks: %.2f (upper %.2f, lower %.2f)
  est missing accounts: %.2f (upper %.2f, lower %.2f)
  percent covered: %.2f (upper %.2f, lower %.2f)
  """ % (
      total_accounts,
      total_logbook_accounts,
      total_logbook_events,
      n_accounts_per_logbook_event,
      # est total members
      est_logbook_accounts, est_logbook_accounts_upper, est_logbook_accounts_lower,
      n_missing_accounts, n_missing_accounts_upper, n_missing_accounts_lower,
      percent_surviving_accounts, percent_surviving_accounts_upper, percent_surviving_accounts_lower
  ))


simple_missing_member_est_collapsed()


  ** counting by unique name instead of member id**
  total accounts: 4720
  total accounts from logbooks: 4200:
  total logbook events: 11601
  new member per logbook event: 0.36
  est total members from logbooks: 5052.24 (upper 5510.21, lower 4474.06)
  est missing accounts: 332.00 (upper 790.00, lower -245.00)
  percent covered: 93.42 (upper 85.66, lower 105.50)
  
