# Part 1: Import libraries and create functions
- sample_NPS_data: provide survey response size, promoter proportion, and detractor proportion and it returns a numpy array of responses
- calc_NPS: takes a list and outputs the Net Promotor Score (NPS)
- NPS_bootstrap: takes a list of survey responses and number of sampling iterations desired, and outputs a list of NPS scores from the bootstrapped samples. 
- NPS_hist: takes a list of bootstrapped NPS scores and returns a histogram to show the distribution. 
- NPS_density: takes a list of bootstrapped NPS scores and returns a density plot to show the distribution.

In [3]:
import numpy as np
import pandas as pd
import altair as alt

In [36]:
# provide survey response size, promoter proportion, and detractor proportion
# returns a numpy array of responses
def sample_NPS_data(size, promoter_prop, detractor_prop):
    assert promoter_prop + detractor_prop <= 1
    assert promoter_prop >= 0
    assert detractor_prop >= 0
    assert size > 0
    promoters = np.random.choice([10,9], int(round(size*promoter_prop,0)), p=[0.5,0.5])
    detractors = np.random.choice([6,5,4,3,2,1,0], int(round(size*detractor_prop,0)), p=[1/7,1/7,1/7,1/7,1/7,1/7,1/7])
    passives = np.random.choice([8,7], int(round(size*(1-promoter_prop-detractor_prop),0)), p=[0.5,0.5])
    responses = np.concatenate((promoters, detractors, passives))
    return responses

In [4]:
def calc_NPS(list):
    #print(list)
    promotors = 0
    detractors = 0 
    passives = 0 
    for sample in list:
        if sample >= 9:
            promotors += 1
        elif sample <= 6:
            detractors += 1
        else:
            passives += 1
    denominator = promotors + detractors + passives
    percent_promotors = promotors / denominator
    percent_detractors = detractors / denominator
    NPS = round((percent_promotors - percent_detractors)*100, 1)

    return NPS

In [5]:
def NPS_bootstrap(list, iterations):
    size = len(list)
    bootstrapped_list = []
    for i in range(0,iterations):
        bootstrapped_sample = np.random.choice(list, size)
        #print(bootstrapped_sample)
        bootstrapped_NPS = calc_NPS(bootstrapped_sample)
        #print(bootstrapped_NPS)
        bootstrapped_list.append(bootstrapped_NPS)
        #print(bootstrapped_list)
    return bootstrapped_list

In [6]:
def NPS_hist(list):
    df = pd.DataFrame(data = {"tests" : list})

    bin_max = max(list)+10
    bin_min = min(list) - 10

    hist_chart = alt.Chart(df).mark_bar().encode(
        alt.X("tests", bin=alt.Bin(maxbins=40), 
            scale=alt.Scale(domain=[bin_min,bin_max])  
        ),
        y='count()'
    )

    return hist_chart

In [7]:
def NPS_density(list):
    df = pd.DataFrame(data = {"tests" : list})
    mark_min = min(list) - 10
    mark_max = max(list) + 10
    density_chart = alt.Chart(df).transform_density(
        density='tests',
        as_=['tests', 'density'],
        
    ).mark_area(color="lightgray").encode(
        x=alt.X('tests', axis=alt.Axis(title="Population Distribution of Pickups"),
            scale=alt.Scale(domain=[mark_min,mark_max])),
        y=alt.Y('density:Q', axis=alt.Axis(title="Density"))
    )

    mean_chart = alt.Chart(df).mark_rule(color="black", strokeDash=[1,1]).encode(
        x="mean(tests)"
    )

    chart = density_chart + mean_chart

    return chart

In [8]:
def CI_90(list):
    p_05 = np.percentile(list,5)
    p_95 = np.percentile(list,95)
    diff = round(abs(p_95 - p_05),2)
    return [p_05, p_95, diff]

In [9]:
def CI_95(list):
    p_025 = np.percentile(list,2.5)
    p_975 = np.percentile(list,97.5)
    diff = round(abs(p_975 - p_025),2)
    return [p_025, p_975, diff]

# Part 2: Testing

In [37]:
responses_1000 = sample_NPS_data(1000, 0.5, 0.1)

In [40]:
bootstrap_1000_1000 = NPS_bootstrap(responses_1000, 1000)
print(CI_90(bootstrap_1000_1000))
print(CI_95(bootstrap_1000_1000))
NPS_density(bootstrap_1000_1000)

[36.4, 43.5, 7.1]
[35.8975, 44.2025, 8.3]


In [10]:
responses_10 = np.random.randint(0,11, 10)

In [11]:
bootstrapped_10_10 = NPS_bootstrap(responses_10, 10)
print(CI_90(bootstrapped_10_10))
print(CI_95(bootstrapped_10_10))
NPS_density(bootstrapped_10_10)

[-75.5, 5.499999999999989, 81.0]
[-77.75, 7.7500000000000036, 85.5]


In [12]:
bootstrapped_10_100 = NPS_bootstrap(responses_10, 100)
print(CI_90(bootstrapped_10_100))
print(CI_95(bootstrapped_10_100))
NPS_density(bootstrapped_10_100)

[-80.0, 0.0, 80.0]
[-80.0, 5.249999999999915, 85.25]


In [13]:
bootstrapped_10_1000 = NPS_bootstrap(responses_10, 1000)
print(CI_90(bootstrapped_10_1000))
print(CI_95(bootstrapped_10_1000))
NPS_density(bootstrapped_10_1000)

[-90.0, -10.0, 80.0]
[-90.24999999999999, 0.0, 90.25]


In [15]:
responses_100 = np.random.randint(0,11, 100)

In [16]:
bootstrapped_100_10 = NPS_bootstrap(responses_100, 10)
print(CI_90(bootstrapped_100_10))
print(CI_95(bootstrapped_100_10))
NPS_density(bootstrapped_100_10)

[-47.1, -29.60000000000001, 17.5]
[-47.55, -27.799999999999997, 19.75]


In [17]:
bootstrapped_100_100 = NPS_bootstrap(responses_100, 100)
print(CI_90(bootstrapped_100_100))
print(CI_95(bootstrapped_100_100))
NPS_density(bootstrapped_100_100)

[-53.1, -26.0, 27.1]
[-56.05, -25.47500000000001, 30.57]


In [18]:
bootstrapped_100_1000 = NPS_bootstrap(responses_100, 1000)
print(CI_90(bootstrapped_100_1000))
print(CI_95(bootstrapped_100_1000))
NPS_density(bootstrapped_100_1000)

[-53.0, -27.0, 26.0]
[-56.0, -24.0, 32.0]


In [20]:
responses_1000 = np.random.randint(0,11, 1000)

In [21]:
bootstrapped_1000_100 = NPS_bootstrap(responses_1000, 100)
print(CI_90(bootstrapped_1000_100))
print(CI_95(bootstrapped_1000_100))
NPS_density(bootstrapped_1000_100)

[-46.1, -38.5, 7.6]
[-46.4675, -37.6475, 8.82]


In [238]:
bootstrapped_1000_1000 = NPS_bootstrap(responses_1000, 1000)
print(CI_95(bootstrapped_1000_1000))
NPS_density(bootstrapped_1000_1000)

[-43.4, -35.2, 8.2]


In [208]:
responses_2000 = np.random.randint(0,11, 2000)

In [239]:
bootstrapped_2000_100 = NPS_bootstrap(responses_2000, 100)
print(CI_95(bootstrapped_2000_100))
NPS_density(bootstrapped_2000_100)

[-45.91, -43.3, 2.61]


In [240]:
bootstrapped_2000_1000 = NPS_bootstrap(responses_2000, 1000)
print(CI_95(bootstrapped_2000_1000))
NPS_density(bootstrapped_2000_1000)

[-45.905, -43.4, 2.51]


In [223]:
responses_10k = np.random.randint(0,11, 10000)

In [224]:
b_10k_5k = NPS_bootstrap(responses_10k, 5000)
print(CI_95(b_10k_5k))
NPS_density(b_10k_5k)

In [231]:
CI_95(b_10k_5k)

[-47.3, -44.7, 2.6]

In [241]:
b_10k_1k = NPS_bootstrap(responses_10k, 1000)
print(CI_95(b_10k_1k))
NPS_density(b_10k_1k)

[-47.3, -44.8, 2.5]


# Generating sample NPS data

In [33]:
def sample_NPS_data(size, promoter_prop, detractor_prop):
    assert promoter_prop + detractor_prop <= 1
    assert promoter_prop >= 0
    assert detractor_prop >= 0
    assert size > 0
    promoters = np.random.choice([10,9], int(round(size*promoter_prop,0)), p=[0.5,0.5])
    detractors = np.random.choice([6,5,4,3,2,1,0], int(round(size*detractor_prop,0)), p=[1/7,1/7,1/7,1/7,1/7,1/7,1/7])
    passives = np.random.choice([8,7], int(round(size*(1-promoter_prop-detractor_prop),0)), p=[0.5,0.5])
    responses = np.concatenate((promoters, detractors, passives))
    return responses

In [35]:
test = sample_NPS_data(-1, 0.9, 0.1)
print(test)

AssertionError: 