# Project Market Share

## Imports

In [1]:
import pandas as pd
from typing import NamedTuple

# Biogeme 
import biogeme.database as db
from biogeme.biogeme import BIOGEME
from biogeme.expressions import Expression
from biogeme.expressions import Variable


# models
from models.logit_lmpc12_model0 import V_0, logprob_0, biogeme_0, results as res_mod0
from models.logit_lmpc12_model1 import V_1, logprob_1, biogeme_1, results as res_mod1
from models.logit_lmpc12_model2 import V_2, logprob_2, biogeme_2, results as res_mod2
from models.logit_lmpc12_model3 import V_3, logprob_3, biogeme_3, results as res_mod3
from models.logit_lmpc12_model4 import results_nested, lognested, biogeme_nested, nests



## Calculate Weights

In [2]:
# Define population sizes for each stratum
census = {
    'female_44_less': 2841376,
    'female_45_more': 1519948,
    'male_44_less': 2926408,
    'male_45_more': 1379198,
}

# Total population size
population_size = sum(census.values())
print(f'Size of the population: {population_size:_}')

# Load the dataset
file_path = 'lpmc12.dat'
df = pd.read_csv('models/lpmc12.dat', sep='\t')

# Define filters for each stratum
filters = {
    'female_44_less': (df['female'] == 1) & (df['age'] <= 44),
    'female_45_more': (df['female'] == 1) & (df['age'] > 44),
    'male_44_less': (df['female'] == 0) & (df['age'] <= 44),
    'male_45_more': (df['female'] == 0) & (df['age'] > 44),
}

# Count the sample size in each stratum
sample_segments = {
    segment_name: segment_rows.sum() for segment_name, segment_rows in filters.items()
}
print(f'Sample segments: {sample_segments}')

# Total sample size
total_sample = sum(sample_segments.values())
print(f'Sample size: {total_sample}')

# Calculate weights
weights = {
    segment_name: census[segment_name] * total_sample / (segment_size * population_size)
    for segment_name, segment_size in sample_segments.items()
}
print(f'Weights: {weights}')

# Apply weights to the dataset
df['weight'] = 0.0
for segment_name, segment_rows in filters.items():
    df.loc[segment_rows, 'weight'] = weights[segment_name]

weight = Variable('weight')

# Verify sum of weights
sum_weights = df['weight'].sum()
print(f'Sum of the weights: {sum_weights}')

Size of the population: 8_666_930
Sample segments: {'female_44_less': np.int64(1631), 'female_45_more': np.int64(1034), 'male_44_less': np.int64(1451), 'male_45_more': np.int64(884)}
Sample size: 5000
Weights: {'female_44_less': np.float64(1.005031010413465), 'female_45_more': np.float64(0.8480333014252863), 'male_44_less': np.float64(1.1635155138048476), 'male_45_more': np.float64(0.9000757667545914)}
Sum of the weights: 5000.0


## Function

In [3]:
class IndicatorTuple(NamedTuple):
    """Tuple storing the value of an indicator, and the bounds on its confidence interval."""

    value: float
    lower: float
    upper: float

In [4]:
def market_share(utilities: dict[int, Expression]) -> dict[str, IndicatorTuple]:
    """Calculate the market shares of all alternatives, given the
    specification of the utility functions.

    :param utilities: Specification of the utility functions. It is a
        dict where the keys are the IDs of the alternatives, and the
        values are the expressions of the utility functions.

    :return: A dictionary where each entry corresponds to an
        alternative, and associates its name with the IndicatorTuple
        containing the value of the market share, and the lower and
        upper bounds of the 90% confidence interval.
    """

    prob_walk = lognested(utilities, None, nests, 1)
    prob_cycle = lognested(utilities, None, nests, 2)
    prob_pt = lognested(utilities, None, nests, 3)
    prob_car = lognested(utilities, None, nests, 4)

    # Simulation setup
    simulate = {
        'weight': weight,  # Assuming normalized weights are provided
        'Prob. walk': prob_walk,
        'Prob. cycle': prob_cycle,
        'Prob. PT': prob_pt,
        'Prob. car': prob_car,
    }
    
    # Creating Biogeme object
    database = db.Database('lpmc12', df)
    biosim = BIOGEME(database, simulate)
    simulated_values = biosim.simulate(results_nested.get_beta_values())
    
    print(simulated_values[['Prob. walk', 'Prob. cycle', 'Prob. PT', 'Prob. car']].describe())

    #Ã€ checker
    #print(simulated_values['weight'].describe())

    #simulated_values['Utility.walk'] = V[1]
    #simulated_values['Utility.cycle'] = V[2]
    #simulated_values['Utility.PT'] = V[3]
    #simulated_values['Utility.car'] = V[4]

    # Confidence intervals
    betas = biogeme_nested.free_beta_names
    sensitivity_betas = results_nested.get_betas_for_sensitivity_analysis(
        betas, use_bootstrap=False
    )
    left, right = biosim.confidence_intervals(sensitivity_betas, 0.9)

    # Initialize market shares
    market_shares = {}

    # Iterate through alternatives
    for alt_name, prob_name in [
        ("Walking", "Prob. walk"),
        ("Cycling", "Prob. cycle"),
        ("Public transportation", "Prob. PT"),
        ("Car", "Prob. car"),
    ]:
        weighted_name = f"Weighted choice_prob. {alt_name.lower()}"

        # Calculate weighted probabilities
        simulated_values[weighted_name] = (
            simulated_values["weight"] * simulated_values[prob_name]
        )
        left[weighted_name] = left["weight"] * left[prob_name]
        right[weighted_name] = right["weight"] * right[prob_name]

        # Calculate mean values and bounds
        market_share_value = simulated_values[weighted_name].mean()
        market_share_lower = left[weighted_name].mean()
        market_share_upper = right[weighted_name].mean()

        # Store results in the market shares dictionary
        market_shares[alt_name] = IndicatorTuple(
            value=market_share_value,
            lower=market_share_lower,
            upper=market_share_upper,
        )

    return market_shares

## Calculate market shares

In [5]:
market_shares_base = market_share(V_3)
for alternative, indicator in market_shares_base.items():
    print(
        f'Market share for {alternative}: {100*indicator.value:.1f}% '
        f'[{100*indicator.lower:.1f}%, '
        f'{100*indicator.upper:.1f}%]'
    )

        Prob. walk  Prob. cycle     Prob. PT    Prob. car
count  5000.000000  5000.000000  5000.000000  5000.000000
mean     -7.207633    -3.660821    -1.294333    -1.028046
std       8.990249     0.552830     0.770301     0.788695
min     -62.894771    -6.624581    -5.235362    -6.087109
25%      -9.895724    -3.891758    -1.863178    -1.295625
50%      -3.482878    -3.561487    -1.261460    -0.801890
75%      -1.141910    -3.311195    -0.700510    -0.498442
max      -0.174686    -2.523339    -0.004673    -0.025862
Market share for Walking: -729.2% [-831.3%, -623.7%]
Market share for Cycling: -366.3% [-406.3%, -342.5%]
Market share for Public transportation: -128.5% [-138.3%, -79.4%]
Market share for Car: -103.5% [-138.0%, -79.3%]
