# Tenure distribution, powerlaw

In [None]:
import random
import matplotlib.pyplot as plt
import math
import numpy
print("numpy",numpy.__version__)
import sys
print("Python version", sys.version)

In [None]:
# https://www.bls.gov/news.release/pdf/tenure.pdf
# https://www.ebri.org/docs/default-source/ebri-issue-brief/ebri_ib_474_tenure-28feb19.pdf
max_tenure_in_years = 10
max_tenure_in_days = max_tenure_in_years*365
# for a uniform distribution, the average is then 5 years
number_of_business_days_per_year=260
number_of_people_involved_in_process = 5
number_of_experiments = 100000

In [None]:
list_of_youngest_tenures = [min([random.randint(0,max_tenure_in_days) for _ in range(number_of_people_involved_in_process)]) 
                            for _ in range(number_of_experiments)]

In [None]:
median=numpy.median(list_of_youngest_tenures)

print(median)

In [None]:
bin_height_y, bin_location_x, _ = plt.hist(list_of_youngest_tenures,bins=20)

plt.plot([median, median], [0, bin_height_y.max()],color='r')

# https://stackoverflow.com/a/49097562/1164295
y_vals = plt.gca().get_yticks()
plt.gca().set_yticklabels(['{:3.0f}'.format(_/1000) for _ in y_vals])  # normalize to population of 100 people

text_font_size = 12
#plt.xticks(list(plt.xticks()[0]) + [median]) # https://stackoverflow.com/a/14716726/1164295
plt.xlim([0, max(list_of_youngest_tenures)])
plt.xticks(fontsize=text_font_size)
plt.yticks(fontsize=text_font_size)
plt.xlabel("Tenure [days]", fontsize=text_font_size)
plt.ylabel("Number of People with that Tenure", fontsize=text_font_size);
#plt.title("Tenure of youngest member\n"+
#          "for process with "+str(number_of_people_involved_in_process)+" participants\n"+
#          "assuming uniform distribution of tenure and \n"+
#          "max tenure "+str(max_tenure_in_years)+" years")
plt.savefig("tenure_uniform_distribution_with_max_tenure"+
            str(max_tenure_in_years)+"_and_"+
            str(number_of_people_involved_in_process)+"_participants.png",
            format='png',
            dpi=300,
            bbox_inches='tight')
plt.savefig("tenure_uniform_distribution_with_max_tenure"+
            str(max_tenure_in_years)+"_and_"+
            str(number_of_people_involved_in_process)+"_participants.pdf",
            format='pdf',
            dpi=300,
            bbox_inches='tight')
plt.savefig("tenure_uniform_distribution_with_max_tenure"+
            str(max_tenure_in_years)+"_and_"+
            str(number_of_people_involved_in_process)+"_participants.svg",
            format='svg',
            dpi=300,
            bbox_inches='tight')

In [None]:
list_of_youngest_tenures_median = []
max_participant_count=10
for process_participant_count in range(1,max_participant_count):
    list_of_youngest_tenures_median.append(numpy.median(
    [min([random.randint(0,max_tenure_in_days) for _ in range(process_participant_count)]) 
                            for _ in range(number_of_experiments)]))

In [None]:
plt.scatter(range(1,max_participant_count), list_of_youngest_tenures_median)
plt.xticks(fontsize=text_font_size)
plt.yticks(fontsize=text_font_size)
plt.ylim([0, max(list_of_youngest_tenures_median)*1.1])
plt.xlabel("number of process participants", fontsize=text_font_size)
plt.ylabel("median tenure in days of youngest participant", fontsize=text_font_size)
#plt.title("Tenure of youngest member\n"+
#          "assuming uniform distribution of tenure and \n"+
#          "max tenure "+str(max_tenure_in_years)+" years");
plt.savefig("tenure_uniform_distribution_with_max_tenure"+str(max_tenure_in_years)+".png",
            format='png',
            bbox_inches='tight')
plt.savefig("tenure_uniform_distribution_with_max_tenure"+str(max_tenure_in_years)+".pdf",
            format='pdf',
            bbox_inches='tight')
plt.savefig("tenure_uniform_distribution_with_max_tenure"+str(max_tenure_in_years)+".svg",
            format='svg',
            bbox_inches='tight')