In [89]:
import numpy as np
import pandas as pd
import plotly.plotly as py
import plotly.graph_objs as go

In [90]:
# Create some arrays of possible distributionsm 
# in the form of ([[multiples], [associated probabilities])
levine1 = np.array([[0,2,4,7.5,10], [0.5,0.23,0.11,0.08,0.08]])
levine2 = np.array([[0,2,4,7.5,10], [0.56,0.24,0.07,0.06,0.07]])
correlation = np.array([[0,3,7.5,15,35,50], [0.648,0.253,0.059,0.025,0.011,0.004]])
mcclure = np.array([[0,0.5,3,15,50], [0.5,0.25,0.18,0.05,0.02]])
wilson = np.array([[0,1,3,10], [0.4,0.3,0.2,0.1]])
dummy = np.array([[0,5,10,20], [0.7,0.1,0.1,0.1]])

# function to add a cumulative distribution key
def cumulator(dist):
    holder = 0
    cumulative = []
    for i in dist[1]:
        holder += i
        cumulative.append(holder)
    dist = np.insert(dist,2,cumulative, axis=0)
    return dist

levine1 = cumulator(levine1)
levine2 = cumulator(levine2)
correlation = cumulator(correlation)
mcclure = cumulator(mcclure)
wilson = cumulator(wilson)
dummy = cumulator(dummy)

In [91]:
mcclure

array([[0.0e+00, 5.0e-01, 3.0e+00, 1.5e+01, 5.0e+01],
       [5.0e-01, 2.5e-01, 1.8e-01, 5.0e-02, 2.0e-02],
       [5.0e-01, 7.5e-01, 9.3e-01, 9.8e-01, 1.0e+00]])

In [92]:
# function to run a simulated portfolio
def run_portfolio(dist, iterations, number_of_investments):
    total_outcome = []
    # function takes the dist and an array of rand numbers to match to multiple
    def multiple_selector(dist, outcome):
        fund_proceeds = []
        for x in outcome:
            for i in range(0, len(dist[0])):
                if x <= dist[2][i]:
                    fund_proceeds.append(dist[0][i])
                    break
                else:
                    continue
        return fund_proceeds
    
    # iterate through the number of portfolios and append to an average gross multiple for each
    for _ in range(0, iterations):
        rands = np.random.rand(number_of_investments)
        fund_proceeds = multiple_selector(dist, rands)
        average = sum(fund_proceeds)/len(fund_proceeds)
        total_outcome.append(average)
        
    return total_outcome

In [93]:
test = run_portfolio(mcclure, 10, 500)
test


[2.136, 2.159, 3.455, 2.675, 1.987, 2.201, 2.363, 1.714, 2.639, 2.04]

In [94]:
# run various simulations accross the assumptions
levine1_concentrated = run_portfolio(levine1, 10000, 20)
levine1_spread = run_portfolio(levine1, 10000, 200)
levine2_concentrated = run_portfolio(levine2, 10000, 20)
levine2_spread = run_portfolio(levine2, 10000, 200)
correlation_concentrated = run_portfolio(correlation, 10000, 20)
correlation_spread = run_portfolio(correlation, 10000, 200)
mcclure_concentrated = run_portfolio(mcclure, 10000, 20)
mcclure_spread = run_portfolio(mcclure, 10000, 200)
wilson_concentrated = run_portfolio(wilson, 10000, 20)
wilson_spread = run_portfolio(wilson, 10000, 200)

concentrated_ports = [levine1_concentrated, levine2_concentrated, correlation_concentrated, mcclure_concentrated, wilson_concentrated]
spread_ports = [levine1_spread, levine2_spread, correlation_spread, mcclure_spread, wilson_spread]

In [95]:
# chart the concentrated portfolios with 20 investments
trace1 = go.Histogram(x=mcclure_concentrated, opacity=0.75, name='Mcclure - 20')
trace2 = go.Histogram(x=levine1_concentrated, opacity=0.75, name='Levine1 - 20')
trace3 = go.Histogram(x=correlation_concentrated, opacity=0.75, name='Correlation - 20')
trace4 = go.Histogram(x=levine2_concentrated, opacity=0.75, name='Levine2 - 20',)
trace5 = go.Histogram(x=wilson_concentrated, opacity=0.75, name='Wilson - 20')

data = [trace1, trace2, trace3, trace4, trace5]
layout = go.Layout(barmode='overlay')
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='concentrated')

In [96]:
# chart the portfolios with 200 investments
trace5 = go.Histogram(x=mcclure_spread, opacity=0.75, name='Mcclure - 200')
trace6 = go.Histogram(x=levine1_spread, opacity=0.75, name='Levine1 - 200')
trace7 = go.Histogram(x=correlation_spread, opacity=0.75, name='Correlation - 200')
trace8 = go.Histogram(x=levine2_spread, opacity=0.75, name='Levine2 - 200')
trace9 = go.Histogram(x=wilson_spread, opacity=0.75, name='Wilson - 200')

data = [trace5, trace6, trace7, trace8, trace9]
layout = go.Layout(barmode='overlay')
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='spread')

In [97]:
# function to turn the results into a df
def organize(portfolios):
    df = pd.DataFrame(columns=['Bottom Decile', 'Bottom Quartile', 'Median', 'Upper Quartile', 'Upper Decile', 'Mean', 'Std Dev'])
    for portfolio in portfolios:
        bottom_decile = np.percentile(portfolio, 10)
        bottom_quartile = np.percentile(portfolio, 25)
        median = np.percentile(portfolio, 50)
        upper_quartile = np.percentile(portfolio, 75)
        upper_decile = np.percentile(portfolio, 90)
        mean = np.mean(portfolio)
        std = np.std(portfolio)
        stats = pd.Series([bottom_decile, bottom_quartile, median, upper_quartile, upper_decile, mean, std], index=['Bottom Decile', 'Bottom Quartile', 'Median', 'Upper Quartile', 'Upper Decile', 'Mean', 'Std Dev'])
        df = df.append(stats, ignore_index=True)
    return df

In [98]:
test = organize(spread_ports)
test.T

Unnamed: 0,0,1,2,3,4
Bottom Decile,2.0225,1.6425,1.6775,1.755,1.64
Bottom Quartile,2.1525,1.765,1.8875,2.03,1.76
Median,2.3025,1.90375,2.1375,2.375,1.895
Upper Quartile,2.455,2.05,2.4025,2.7425,2.035
Upper Decile,2.5925,2.1825,2.67,3.105,2.17
Mean,2.305485,1.90955,2.16115,2.408576,1.900165
Std Dev,0.222664,0.2106,0.390147,0.530255,0.206466


In [99]:
test1 = organize(concentrated_ports)
test1.T

Unnamed: 0,0,1,2,3,4
Bottom Decile,1.4,1.1,0.9,0.65,1.1
Bottom Quartile,1.8,1.45,1.275,1.15,1.45
Median,2.25,1.875,1.8,1.925,1.85
Upper Quartile,2.75,2.325,2.8,3.5,2.3
Upper Decile,3.2,2.775,3.85,4.625,2.75
Mean,2.292365,1.905485,2.142207,2.415212,1.905715
Std Dev,0.704991,0.658872,1.205131,1.681443,0.647425


In [100]:
wilson1 = run_portfolio(levine1, 10000, 1)
wilson5 = run_portfolio(levine1, 10000, 5)
wilson10 = run_portfolio(levine1, 10000, 10)
wilson20 = run_portfolio(levine1,10000,20)
wilson50 = run_portfolio(levine1,10000,50)
wilson100 = run_portfolio(levine1, 10000, 100)
wilson200 = run_portfolio(levine1,10000, 200)
wilson500 = run_portfolio(levine1,10000,500)


In [101]:
wilsons = [wilson1, wilson5, wilson10, wilson20, wilson50, wilson100, wilson200, wilson500]
summary = organize(wilsons)
summary.T

Unnamed: 0,0,1,2,3,4,5,6,7
Bottom Decile,0.0,0.0,1.0,1.75,2.4,2.7,2.925,3.14
Bottom Quartile,0.0,1.0,2.0,2.5,2.9,3.05,3.2,3.3
Median,0.0,3.0,3.5,3.5,3.5,3.5,3.5,3.49
Upper Quartile,5.0,5.0,5.0,4.5,4.1,3.9,3.8,3.69
Upper Decile,20.0,8.0,6.0,5.25,4.7,4.35,4.075,3.86
Mean,3.5385,3.5461,3.4858,3.47765,3.49541,3.509185,3.498675,3.498189
Std Dev,6.373501,2.878589,2.017312,1.406898,0.900822,0.634502,0.451892,0.282813


In [111]:
for size in wilsons:
    above1 = len([1 for i in size if i > 5.0])
    percentage = above1/len(size)
    print(percentage)

0.2035
0.2384
0.1777
0.1246
0.0474
0.0096
0.001
0.0
