In [None]:
# setup
import pandas as pd
import numpy as np
import scipy.stats
import matplotlib.pyplot as plt
from matplotlib import rcParams

# change for different filenames
DATAFILE = 'rentals_reg_count.csv'
datetime_columns = ['date']

In [None]:
def mean_confidence_interval(data, ci=0.95):
    array = 1.0 * np.array(data)  # turn iterable into np.array
    n = len(array)  # get size
    mean, stderrmean = np.mean(array), scipy.stats.sem(array)  # 
    h = stderrmean * scipy.stats.t.ppf((1 + ci) / 2., n-1)
    return mean, mean-h, mean+h


# read in csv
df = pd.read_csv(
    DATAFILE,
    parse_dates=datetime_columns,
    infer_datetime_format=True
)
# get cumulative sum of registrations
df['total_registrations'] = df['registrations_count'].cumsum()

# break up dates
df['month'] = df['date'].dt.month
df['year'] = df['date'].dt.year

# group df by year, month
grouped = df.groupby(['year', 'month'])
labels = [str(x) for x in grouped.groups.keys()]
data, err, yci = [], [], []

# collect average, stdev, and 85% CI per year, month
for name, group in grouped:
    normalized_rental_series = group['rentals_count']/group['total_registrations']
    data.append(normalized_rental_series.values)
    err.append(np.std(normalized_rental_series))
    _, low_ci, high_ci = mean_confidence_interval(normalized_rental_series, 0.85)
    yci.append([low_ci, high_ci])

# Draw boxplots, specifying desired style
plt.figure(figsize=(200, 50))
plt.boxplot(data, labels=labels, meanline=True, showmeans=True, notch=True, conf_intervals=yci)

plt.xlabel('Year, Month', fontsize=100)
plt.ylabel('Normalized Data (rental count/total registrations)', fontsize=100)
plt.title('Normalized Box Plot Rental Counts for Hours 7-9', fontsize=100)
rcParams['xtick.labelsize'] = 75
rcParams['ytick.labelsize'] = 100
plt.savefig('boxplot.pdf')  # change output filename here
# error bars = quartiles of data
# box is 2nd and 3rd quartile of data
# green bar across is mean
# red bar across is median
# notch is 85% confidence interval range
# circles outside of error bars are outliers