In [2]:
from model_builder import model_builder
from data_gen import simulate_private_equity_cashflows
import numpy as np

In [3]:
cashflows = simulate_private_equity_cashflows()

In [4]:
# Normalize the cashflows of each fund in the market

def fund_weights(cashflows):
    funds = cashflows['FundID'].unique() 
    weights = {}
    for fund in funds:
        fund_cashflows = cashflows[cashflows['FundID'] == fund]
        weights[fund] = -fund_cashflows['Cashflow'].sum()
    return weights

cashflows_contr = cashflows[cashflows['Cashflow'] < 0]
eq_cashflows_contr = fund_weights(cashflows_contr)

def eq_calc(row):
    fund = row['FundID']
    cf = row['Cashflow']
    
    return cf / eq_cashflows_contr[fund]

# Apply the function row-wise
cashflows['eq_cashflow'] = cashflows.apply(eq_calc, axis=1)

# Drop and rename
cashflows = cashflows.drop(columns=['Cashflow']).rename(columns={'eq_cashflow': 'Cashflow'})

cashflows

Unnamed: 0,FundID,VintageYear,Strategy,Geography,FundQuality,Quarter,date,Cashflow
0,0,1992,Venture,North America,Bad,0,1992-03-31,-0.127937
1,0,1992,Venture,North America,Bad,1,1992-06-30,-0.082006
2,0,1992,Venture,North America,Bad,2,1992-09-30,-0.074795
3,0,1992,Venture,North America,Bad,3,1992-12-31,0.000000
4,0,1992,Venture,North America,Bad,4,1993-03-31,-0.065673
...,...,...,...,...,...,...,...,...
118780,2399,1992,Venture,Rest of World,Bad,47,2003-12-31,0.046362
118781,2399,1992,Venture,Rest of World,Bad,48,2004-03-31,0.000000
118782,2399,1992,Venture,Rest of World,Bad,49,2004-06-30,0.022639
118783,2399,1992,Venture,Rest of World,Bad,50,2004-09-30,0.000000


### Shapley value calculation

In [5]:
attributions = []
vintages = cashflows['VintageYear'].unique()
strategies = cashflows['Strategy'].unique()
geos = cashflows['Geography'].unique()

for vintage in vintages:
    for strategy in strategies:
        for geo in geos:
            attributions.append((vintage, strategy, geo))

In [6]:
import random
from pme_calc import moic, xirr
import pandas as pd

def permute_list(input_list):
    output_list = input_list.copy()  # Make a copy to not modify the original
    random.shuffle(output_list)
    return output_list

permutated_attributions = []
numb_of_permutations = 10 # Number of permutations to generate
for i in range(numb_of_permutations):
    permutated_attributions.append(permute_list(attributions))

In [24]:
attribution_values = []
first_df = cashflows[(cashflows['VintageYear'] == attributions[0][0]) & (cashflows['Strategy']== attributions[0][1]) & (cashflows['Geography'] == attributions[0][2])]
first_aggregated_df = first_df.groupby('date', as_index=False)['Cashflow'].sum()
first_attribution_irr = xirr(first_aggregated_df['date'], first_aggregated_df['Cashflow'])
attribution_values.append(first_attribution_irr)

for attrib in attributions[1:]:
    df = cashflows[(cashflows['VintageYear'] == attrib[0]) & (cashflows['Strategy']== attrib[1]) & (cashflows['Geography'] == attrib[2])]
    first_df = pd.concat([first_df, df], ignore_index=True)  # Use pd.concat to concatenate DataFrames
    aggregated_df = first_df.groupby('date', as_index=False)['Cashflow'].sum()
    # attribution_moic = moic(first_df)
    attribution_irr = xirr(aggregated_df['date'], aggregated_df['Cashflow'])
    attribution_values.append(attribution_irr)

final_attribution_values = []
final_attribution_values.append(attribution_values[0])
for i in range(1,len(attribution_values)):
    final_attribution_values.append(attribution_values[i] - attribution_values[i-1])
    

In [27]:
moic(first_aggregated_df)

2.4312816263474373

In [9]:
def attribution_irr_changes(attributions, cashflows):
    attribution_values = []

    # Handle the first attribution separately
    first_df = cashflows[
        (cashflows['VintageYear'] == attributions[0][0]) &
        (cashflows['Strategy'] == attributions[0][1]) &
        (cashflows['Geography'] == attributions[0][2])
    ]
    first_aggregated_df = first_df.groupby('date', as_index=False)['Cashflow'].sum()
    first_attribution_irr = xirr(first_aggregated_df['date'], first_aggregated_df['Cashflow'])
    attribution_values.append(first_attribution_irr)

    # Iterate through the rest
    for attrib in attributions[1:]:
        df = cashflows[
            (cashflows['VintageYear'] == attrib[0]) &
            (cashflows['Strategy'] == attrib[1]) &
            (cashflows['Geography'] == attrib[2])
        ]
        first_df = pd.concat([first_df, df], ignore_index=True)  # Keep building the portfolio
        aggregated_df = first_df.groupby('date', as_index=False)['Cashflow'].sum()
        attribution_irr = xirr(aggregated_df['date'], aggregated_df['Cashflow'])
        attribution_values.append(attribution_irr)

    # Calculate incremental IRRs
    final_attribution_values = [attribution_values[0]]
    for i in range(1, len(attribution_values)):
        delta = attribution_values[i] - attribution_values[i-1]
        final_attribution_values.append(delta)

    # Build the output dict
    result = {attrib: value for attrib, value in zip(attributions, final_attribution_values)}
    
    return result


In [10]:
cumulative_results = {}

# Suppose permutated_attributions is a list of lists (each a permutation)
for permuted_attribution in permutated_attributions:
    result = attribution_irr_changes(permuted_attribution, cashflows)
    for key, value in result.items():
        if key in cumulative_results:
            cumulative_results[key] += value
        else:
            cumulative_results[key] = value

In [11]:
average_results = {key: value / len(permutated_attributions) for key, value in cumulative_results.items()}

In [12]:
len(average_results)

80

### Portfolio generation and attribution analysis

In [13]:
def portfolio_cashflows(cashflows, portfolio_size):
    
    selected_funds = np.random.choice(cashflows['FundID'].unique(), size=portfolio_size, replace=False)
    portfolio = cashflows[cashflows['FundID'].isin(selected_funds)]

    return portfolio

In [14]:
port_cfs = portfolio_cashflows(cashflows, 200)
port_cfs_contr = port_cfs[port_cfs['Cashflow'] < 0]

In [15]:
def calculate_attribution_weights(port_cfs_contr):
    # Group by the attribution dimensions
    grouped = port_cfs_contr.groupby(['VintageYear', 'Strategy', 'Geography'])['Cashflow'].sum()
    
    # Calculate total contributions
    total_contribution = grouped.sum()
    
    # Calculate weights
    weights = grouped / total_contribution
    
    # Convert back to DataFrame (optional, for easy viewing)
    weights_df = weights.reset_index().rename(columns={'Cashflow': 'Weight'})
    
    return weights_df

In [16]:
weight_df = calculate_attribution_weights(port_cfs_contr)

In [17]:
def weighted_attribution(weights_df, attribution_results):
    weighted_results = {}
    
    for _, row in weights_df.iterrows():
        key = (row['VintageYear'], row['Strategy'], row['Geography'])
        
        if key in attribution_results:
            weighted_value = row['Weight'] * attribution_results[key]
            weighted_results[key] = weighted_value
        else:
            # If some attribution is missing from results, assume 0
            weighted_results[key] = 0
    
    return weighted_results


In [18]:
port_result = weighted_attribution(weight_df, average_results)

In [19]:
total_weighted_attribution = sum(port_result.values())
print(total_weighted_attribution*len(average_results))


0.17429433377077025


In [20]:
market_aggregated_df = cashflows.groupby('date', as_index=False)['Cashflow'].sum()
market_irr = xirr(market_aggregated_df['date'], market_aggregated_df['Cashflow'])
port_aggregated_df = port_cfs.groupby('date', as_index=False)['Cashflow'].sum()
port_irr = xirr(port_aggregated_df['date'], port_aggregated_df['Cashflow'])

In [21]:
port_irr

0.11419283437694876

In [22]:
market_irr

0.1076287439745897

In [88]:
# valszeg az első attribucióból ki kéne vonni a marketet, meg a qualityt kivenni a data_genből

In [None]:
# összegezni lehetne vintage, strategy, geography szerint is