# Tenure distribution, powerlaw

In [None]:
import random
import matplotlib.pyplot as plt
import math
import numpy
print("numpy",numpy.__version__)
import sys
print("Python version", sys.version)

In [None]:
# Set the random seed value so that stochastic processes are repeatable
random.seed(10)

In [None]:
# https://www.bls.gov/news.release/pdf/tenure.pdf
# https://www.ebri.org/docs/default-source/ebri-issue-brief/ebri_ib_474_tenure-28feb19.pdf
max_tenure_in_years = 10
max_tenure_in_days = max_tenure_in_years*365
# for a uniform distribution, the average is then 5 years
number_of_business_days_per_year=260
number_of_people_involved_in_process = 5
number_of_experiments = 100000

a_parameter = 5

In [None]:
list_of_youngest_tenures = [min((-1*numpy.random.power(a_parameter,number_of_people_involved_in_process)+1)*max_tenure_in_days)
for _ in range(number_of_experiments)] 

In [None]:
bin_height_y, bin_location_x, _ = plt.hist(list_of_youngest_tenures,bins=20)

median = numpy.median(list_of_youngest_tenures)

plt.plot([median, median], [0, bin_height_y.max()],color='r')

# https://stackoverflow.com/a/49097562/1164295
y_vals = plt.gca().get_yticks()
plt.gca().set_yticklabels(['{:3.0f}'.format(_/1000) for _ in y_vals]) # normalize to population of 100 people

text_font_size = 12
plt.xticks(list(plt.xticks()[0]) + [median], fontsize=text_font_size) # https://stackoverflow.com/a/14716726/1164295
plt.yticks(fontsize=text_font_size)
plt.xlim([0, max(list_of_youngest_tenures)])
plt.xlabel("Tenure [days]", fontsize=text_font_size)
plt.xlim(0, 1000)
plt.ylabel("Number of People with that Tenure", fontsize=text_font_size);
#plt.title("Tenure of youngest member\n"+
#          "for process with "+str(number_of_people_involved_in_process)+" participants\n"+
#          "assuming powerlaw distribution of tenure and \n"+
#          "max tenure "+str(max_tenure_in_years)+" years")
plt.savefig("tenure_power_distribution_a"+str(a_parameter)+
            "_with_max_tenure"+str(max_tenure_in_years)+"_and_"+
            str(number_of_people_involved_in_process)+"_participants.png",
            format='png',
            dpi=300,
            bbox_inches='tight')
plt.savefig("tenure_power_distribution_a"+str(a_parameter)+
            "_with_max_tenure"+str(max_tenure_in_years)+"_and_"+
            str(number_of_people_involved_in_process)+"_participants.pdf",
            format='pdf',
            dpi=300,
            bbox_inches='tight')
plt.savefig("tenure_power_distribution_a"+str(a_parameter)+
            "_with_max_tenure"+str(max_tenure_in_years)+"_and_"+
            str(number_of_people_involved_in_process)+"_participants.svg",
            format='svg',
            dpi=300,
            bbox_inches='tight')

In [None]:
list_of_youngest_tenures_median = []
max_participant_count=10
for process_participant_count in range(1,max_participant_count):
    list_of_youngest_tenures_median.append(numpy.median(
    [min((-1*numpy.random.power(a_parameter,process_participant_count)+1)*max_tenure_in_days)
       for _ in range(number_of_experiments)]))

In [None]:
plt.scatter(range(1,max_participant_count), list_of_youngest_tenures_median)
plt.xticks(fontsize=text_font_size)
plt.yticks(fontsize=text_font_size)
plt.xlabel("number of process participants", fontsize=text_font_size)
plt.ylabel("median tenure in days of youngest participant", fontsize=text_font_size)
plt.ylim([0, max(list_of_youngest_tenures_median)*1.1])
#plt.title("Tenure of youngest member\n"+
#          "assuming powerlaw distribution of tenure and \n"+
#          "max tenure "+str(max_tenure_in_years)+" years");
plt.savefig("tenure_power_distribution_a"+str(a_parameter)+
            "_with_max_tenure"+str(max_tenure_in_years)+".png",
            format='png',
            dpi=300,
            bbox_inches='tight')
plt.savefig("tenure_power_distribution_a"+str(a_parameter)+
            "_with_max_tenure"+str(max_tenure_in_years)+".pdf",
            format='pdf',
            dpi=300,
            bbox_inches='tight')
plt.savefig("tenure_power_distribution_a"+str(a_parameter)+
            "_with_max_tenure"+str(max_tenure_in_years)+".svg",
            format='svg',
            dpi=300,
            bbox_inches='tight')
