# Heating Choice Model

In [1]:
#check BIOGEME version
import biogeme.version as ver
print("Biogeme version:", ver.get_version())

#Import necessary libraries
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme import models
from biogeme.expressions import Beta, Variable

Biogeme version: 3.3.0


  from tqdm.autonotebook import tqdm


In [2]:
# Load the database
df = pd.read_csv("quebec.csv", sep=',') # Check the path to your CSV file

#create backup of the original dataframe
df_backup = df.copy()

# Check missing values
# print(df.isnull().sum())
# Define the BIOGEME-specific database
database = db.Database("Heating", df)

In [3]:
obs = Variable("obs")
sector = Variable("sector")
hdd = Variable("hdd")
choice = Variable("choice")
conv_year = Variable("conv_year")
house_type = Variable("house_type")
constr_year = Variable("constr_year")
nb_rooms = Variable("nb_rooms")
nb_pers = Variable("nb_pers")
own_rent = Variable("own_rent")
surface = Variable("surface")
age = Variable("age")
income = Variable("income")
op_cost_1 = Variable("op_cost_1")
op_cost_2 = Variable("op_cost_2")
op_cost_3 = Variable("op_cost_3")
op_cost_4 = Variable("op_cost_4")
op_cost_5 = Variable("op_cost_5")
op_cost_6 = Variable("op_cost_6")
op_cost_7 = Variable("op_cost_7")
op_cost_8 = Variable("op_cost_8")
op_cost_9 = Variable("op_cost_9")
fix_cost_1 = Variable("fix_cost_1")
fix_cost_2 = Variable("fix_cost_2")
fix_cost_3 = Variable("fix_cost_3")
fix_cost_4 = Variable("fix_cost_4")
fix_cost_5 = Variable("fix_cost_5")
fix_cost_6 = Variable("fix_cost_6")
fix_cost_7 = Variable("fix_cost_7")
fix_cost_8 = Variable("fix_cost_8")
fix_cost_9 = Variable("fix_cost_9")
avail_1 = Variable("avail_1")
avail_2 = Variable("avail_2")
avail_3 = Variable("avail_3")
avail_4 = Variable("avail_4")
avail_5 = Variable("avail_5")
avail_6 = Variable("avail_6")
avail_7 = Variable("avail_7")
avail_8 = Variable("avail_8")
avail_9 = Variable("avail_9")
RowID = Variable("RowID")

In [4]:
# Define beta values to be estimated
## Beta('name', initial value, lower bound, upper bound, reference)
## Reference is used to set the reference alternative in a logit model (1 = yes, 0 = no)
asc_gg = Beta('asc_gg', 0, None, None, 1)
asc_ge = Beta('asc_ge', 0, None, None, 0)
asc_deo = Beta('asc_deo', 0, None, None, 0)
asc_dee = Beta('asc_dee', 0, None, None, 0)
asc_oo = Beta('asc_oo', 0, None, None, 0)
asc_oe = Beta('asc_oe', 0, None, None, 0)
asc_ee = Beta('asc_ee', 0, None, None, 0)
asc_we = Beta('asc_we', 0, None, None, 0)
asc_wee = Beta('asc_wee', 0, None, None, 0)
b_opt_cost = Beta('b_opt_cost', 0, None, None, 0)
b_fix_cost = Beta('b_fix_cost', 0, None, None, 0)
b_income = Beta('b_income', 0, None, None, 0)
b_fix_cost_income = Beta('b_fix_cost_income', 0, None, None, 0)

# Utility functions

V_gg  = asc_gg  + b_opt_cost * op_cost_1 + b_fix_cost * fix_cost_1 + b_income * income + b_fix_cost_income * (fix_cost_1 / income)
V_ge  = asc_ge  + b_opt_cost * op_cost_2 + b_fix_cost * fix_cost_2 + b_income * income + b_fix_cost_income * (fix_cost_2 / income)
V_deo = asc_deo + b_opt_cost * op_cost_3 + b_fix_cost * fix_cost_3 + b_income * income + b_fix_cost_income * (fix_cost_3 / income)
V_dee = asc_dee + b_opt_cost * op_cost_4 + b_fix_cost * fix_cost_4 + b_income * income + b_fix_cost_income * (fix_cost_4 / income)
V_oo  = asc_oo  + b_opt_cost * op_cost_5 + b_fix_cost * fix_cost_5 + b_income * income + b_fix_cost_income * (fix_cost_5 / income)
V_oe  = asc_oe  + b_opt_cost * op_cost_6 + b_fix_cost * fix_cost_6 + b_income * income + b_fix_cost_income * (fix_cost_6 / income)
V_ee  = asc_ee  + b_opt_cost * op_cost_7 + b_fix_cost * fix_cost_7 + b_income * income + b_fix_cost_income * (fix_cost_7 / income)
V_we  = asc_we  + b_opt_cost * op_cost_8 + b_fix_cost * fix_cost_8 + b_income * income + b_fix_cost_income * (fix_cost_8 / income)
V_wee = asc_wee + b_opt_cost * op_cost_9 + b_fix_cost * fix_cost_9 + b_income * income + b_fix_cost_income * (fix_cost_9 / income)

# Dictionary defining alternatives mapping
V = {1: V_gg, 2: V_ge, 3: V_deo, 4: V_dee, 5: V_oo, 6: V_oe, 7: V_ee, 8: V_we, 9: V_wee}

# Dictionary defining availability mapping
av = {1: avail_1, 2: avail_2,3: avail_3, 4: avail_4, 5: avail_5, 6: avail_6, 7: avail_7, 8: avail_8, 9: avail_9}

# Define the choice model
# loglogit(Alternatives mapping, Availability mapping, Choice variable)
logprob= models.loglogit(V, av, choice)

# Estimate Model
the_biogeme = bio.BIOGEME(database, logprob)
the_biogeme.model_name = 'heating2' # Set the model name

#Calculate null Loglikelihood
the_biogeme.calculate_null_loglikelihood(av)

# Save the estimation results
heating2 = the_biogeme.estimate()

#Print the results
print(heating2.short_summary())
heating2.get_estimated_parameters()

File biogeme.toml has been created


Results for model heating2
Nbr of parameters:		12
Sample size:			2897
Excluded data:			0
Null log likelihood:		-5773.765
Final log likelihood:		-1827.373
Likelihood ratio test (null):		7892.784
Rho square (null):			0.684
Rho bar square (null):			0.681
Akaike Information Criterion:	3678.747
Bayesian Information Criterion:	3750.404



  heating2.get_estimated_parameters()


Unnamed: 0,Name,Value,Robust std err.,Robust t-stat.,Robust p-value
0,b_opt_cost,-10.39421,0.4791539,-21.69285,0.0
1,b_fix_cost,0.7081388,0.2630033,2.692509,0.007091655
2,b_income,-8.888715e-13,1.797693e+308,-4.946e-321,1.0
3,b_fix_cost_income,0.3307175,0.3674146,0.9001208,0.368056
4,asc_ge,-0.3518389,0.3954475,-0.8897234,0.3736144
5,asc_deo,2.798594,0.3875398,7.221435,5.144773e-13
6,asc_dee,2.8098,0.3315343,8.475141,0.0
7,asc_oo,3.606006,0.4899487,7.359968,1.838529e-13
8,asc_oe,3.675192,0.4611521,7.969589,1.554312e-15
9,asc_ee,5.77424,0.3537971,16.32077,0.0


# Predicted Market Share

In [6]:
#Restore original dataframe
df = df_backup.copy()
#update BIOGEME database
database = db.Database("Heating", df)

#computing the choice probabilities for each row in the database
simulate = {
    'alt_gg': models.logit(V, av, 1),  # P(choice=1)
    'alt_ge': models.logit(V, av, 2),  # P(choice=2)
    'alt_deo': models.logit(V, av, 3),  # P(choice=3)
    'alt_dee': models.logit(V, av, 4),  # P(choice=4)
    'alt_oo': models.logit(V, av, 5),  # P(choice=5)
    'alt_oe': models.logit(V, av, 6),  # P(choice=6)
    'alt_ee': models.logit(V, av, 7),  # P(choice=7)
    'alt_we': models.logit(V, av, 8),  # P(choice=8)
    'alt_wee': models.logit(V, av, 9),  # P(choice=9)
}

biosim = bio.BIOGEME(database, simulate)
prob_values = biosim.simulate(heating2.get_beta_values())
print("Predicted Market Share after decreasing operating cost of gas-gas alternatives by 20%:")
for idx, i in enumerate(prob_values.mean()):
    print(f'{prob_values.mean().index[idx]} : {i*100:.4f} %')

Predicted Market Share after decreasing operating cost of gas-gas alternatives by 20%:
alt_gg : 0.9320 %
alt_ge : 0.3107 %
alt_deo : 2.4853 %
alt_dee : 6.9382 %
alt_oo : 0.4142 %
alt_oe : 0.6904 %
alt_ee : 81.1529 %
alt_we : 4.2803 %
alt_wee : 2.7960 %


# Average Individual

In [None]:
#Restore original dataframe
df = df_backup.copy()

# change the cost value to the average value
df["op_cost_1"] = df["op_cost_1"].mean()
df["op_cost_2"] = df["op_cost_2"].mean()
df["op_cost_3"] = df["op_cost_3"].mean()
df["op_cost_4"] = df["op_cost_4"].mean()
df["op_cost_5"] = df["op_cost_5"].mean()
df["op_cost_6"] = df["op_cost_6"].mean()
df["op_cost_7"] = df["op_cost_7"].mean()
df["op_cost_8"] = df["op_cost_8"].mean()
df["op_cost_9"] = df["op_cost_9"].mean()

df["fix_cost_1"] = df["fix_cost_1"].mean()
df["fix_cost_2"] = df["fix_cost_2"].mean()
df["fix_cost_3"] = df["fix_cost_3"].mean()
df["fix_cost_4"] = df["fix_cost_4"].mean()
df["fix_cost_5"] = df["fix_cost_5"].mean()
df["fix_cost_6"] = df["fix_cost_6"].mean()
df["fix_cost_7"] = df["fix_cost_7"].mean()
df["fix_cost_8"] = df["fix_cost_8"].mean()
df["fix_cost_9"] = df["fix_cost_9"].mean()

#update BIOGEME database
database = db.Database("Heating", df)

#computing the choice probabilities for each row in the database
simulate = {
    'alt_gg': models.logit(V, av, 1),  # P(choice=1)
    'alt_ge': models.logit(V, av, 2),  # P(choice=2)
    'alt_deo': models.logit(V, av, 3),  # P(choice=3)
    'alt_dee': models.logit(V, av, 4),  # P(choice=4)
    'alt_oo': models.logit(V, av, 5),  # P(choice=5)
    'alt_oe': models.logit(V, av, 6),  # P(choice=6)
    'alt_ee': models.logit(V, av, 7),  # P(choice=7)
    'alt_we': models.logit(V, av, 8),  # P(choice=8)
    'alt_wee': models.logit(V, av, 9),  # P(choice=9)
}

biosim = bio.BIOGEME(database, simulate)
prob_values = biosim.simulate(heating2.get_beta_values())
print("Predicted Market Share after decreasing operating cost of gas-gas alternatives by 20%:")
for idx, i in enumerate(prob_values.mean()):
    print(f'{prob_values.mean().index[idx]} : {i*100:.4f} %')

Predicted Market Share after decreasing operating cost of gas-gas alternatives by 20%:
alt_gg : 11.2756 %
alt_ge : 7.4522 %
alt_deo : 1.4170 %
alt_dee : 3.6149 %
alt_oo : 0.1776 %
alt_oe : 0.3655 %
alt_ee : 71.6057 %
alt_we : 1.6749 %
alt_wee : 2.4165 %


# Segmentation

In [27]:
#Restore original dataframe
df = df_backup.copy()

# Segment the dataset based on sector
df_sector1 = df[df['sector'] == 1]
df_sector2 = df[df['sector'] == 2]
df_sector3 = df[df['sector'] == 3]
df_sector4 = df[df['sector'] == 4]

# Segment the dataset based on income
df_low_inc = df[df['income'] < 4.4]
df_high_inc = df[(df['income'] >= 4.4)]

## Segmentation: Sector

### Compute Market Share for each Sector

In [11]:
##### Sector 1 #####

#change dataset to sector 1
df = df_sector1.copy()

df["op_cost_1"] = df["op_cost_1"].mean()
df["op_cost_2"] = df["op_cost_2"].mean()
df["op_cost_3"] = df["op_cost_3"].mean()
df["op_cost_4"] = df["op_cost_4"].mean()
df["op_cost_5"] = df["op_cost_5"].mean()
df["op_cost_6"] = df["op_cost_6"].mean()
df["op_cost_7"] = df["op_cost_7"].mean()
df["op_cost_8"] = df["op_cost_8"].mean()
df["op_cost_9"] = df["op_cost_9"].mean()

df["fix_cost_1"] = df["fix_cost_1"].mean()
df["fix_cost_2"] = df["fix_cost_2"].mean()
df["fix_cost_3"] = df["fix_cost_3"].mean()
df["fix_cost_4"] = df["fix_cost_4"].mean()
df["fix_cost_5"] = df["fix_cost_5"].mean()
df["fix_cost_6"] = df["fix_cost_6"].mean()
df["fix_cost_7"] = df["fix_cost_7"].mean()
df["fix_cost_8"] = df["fix_cost_8"].mean()
df["fix_cost_9"] = df["fix_cost_9"].mean()

#update BIOGEME database
database = db.Database("Heating", df)

#computing the choice probabilities for each row in the database
simulate = {
    'alt_gg': models.logit(V, av, 1),  # P(choice=1)
    'alt_ge': models.logit(V, av, 2),  # P(choice=2)
    'alt_deo': models.logit(V, av, 3),  # P(choice=3)
    'alt_dee': models.logit(V, av, 4),  # P(choice=4)
    'alt_oo': models.logit(V, av, 5),  # P(choice=5)
    'alt_oe': models.logit(V, av, 6),  # P(choice=6)
    'alt_ee': models.logit(V, av, 7),  # P(choice=7)
    'alt_we': models.logit(V, av, 8),  # P(choice=8)
    'alt_wee': models.logit(V, av, 9),  # P(choice=9)
}

biosim = bio.BIOGEME(database, simulate)
prob_values_sector_1 = biosim.simulate(heating2.get_beta_values())

##### Sector 2 #####

#change dataset to sector 2
df = df_sector2.copy()

df["op_cost_1"] = df["op_cost_1"].mean()
df["op_cost_2"] = df["op_cost_2"].mean()
df["op_cost_3"] = df["op_cost_3"].mean()
df["op_cost_4"] = df["op_cost_4"].mean()
df["op_cost_5"] = df["op_cost_5"].mean()
df["op_cost_6"] = df["op_cost_6"].mean()
df["op_cost_7"] = df["op_cost_7"].mean()
df["op_cost_8"] = df["op_cost_8"].mean()
df["op_cost_9"] = df["op_cost_9"].mean()

df["fix_cost_1"] = df["fix_cost_1"].mean()
df["fix_cost_2"] = df["fix_cost_2"].mean()
df["fix_cost_3"] = df["fix_cost_3"].mean()
df["fix_cost_4"] = df["fix_cost_4"].mean()
df["fix_cost_5"] = df["fix_cost_5"].mean()
df["fix_cost_6"] = df["fix_cost_6"].mean()
df["fix_cost_7"] = df["fix_cost_7"].mean()
df["fix_cost_8"] = df["fix_cost_8"].mean()
df["fix_cost_9"] = df["fix_cost_9"].mean()

#update BIOGEME database
database = db.Database("Heating", df)

#computing the choice probabilities for each row in the database
simulate = {
    'alt_gg': models.logit(V, av, 1),  # P(choice=1)
    'alt_ge': models.logit(V, av, 2),  # P(choice=2)
    'alt_deo': models.logit(V, av, 3),  # P(choice=3)
    'alt_dee': models.logit(V, av, 4),  # P(choice=4)
    'alt_oo': models.logit(V, av, 5),  # P(choice=5)
    'alt_oe': models.logit(V, av, 6),  # P(choice=6)
    'alt_ee': models.logit(V, av, 7),  # P(choice=7)
    'alt_we': models.logit(V, av, 8),  # P(choice=8)
    'alt_wee': models.logit(V, av, 9),  # P(choice=9)
}

biosim = bio.BIOGEME(database, simulate)
prob_values_sector_2 = biosim.simulate(heating2.get_beta_values())

##### Sector 3 #####
#change dataset to sector 3
df = df_sector3.copy()

df["op_cost_1"] = df["op_cost_1"].mean()
df["op_cost_2"] = df["op_cost_2"].mean()
df["op_cost_3"] = df["op_cost_3"].mean()
df["op_cost_4"] = df["op_cost_4"].mean()
df["op_cost_5"] = df["op_cost_5"].mean()
df["op_cost_6"] = df["op_cost_6"].mean()
df["op_cost_7"] = df["op_cost_7"].mean()
df["op_cost_8"] = df["op_cost_8"].mean()
df["op_cost_9"] = df["op_cost_9"].mean()

df["fix_cost_1"] = df["fix_cost_1"].mean()
df["fix_cost_2"] = df["fix_cost_2"].mean()
df["fix_cost_3"] = df["fix_cost_3"].mean()
df["fix_cost_4"] = df["fix_cost_4"].mean()
df["fix_cost_5"] = df["fix_cost_5"].mean()
df["fix_cost_6"] = df["fix_cost_6"].mean()
df["fix_cost_7"] = df["fix_cost_7"].mean()
df["fix_cost_8"] = df["fix_cost_8"].mean()
df["fix_cost_9"] = df["fix_cost_9"].mean()

#update BIOGEME database
database = db.Database("Heating", df)

#computing the choice probabilities for each row in the database
simulate = {
    'alt_gg': models.logit(V, av, 1),  # P(choice=1)
    'alt_ge': models.logit(V, av, 2),  # P(choice=2)
    'alt_deo': models.logit(V, av, 3),  # P(choice=3)
    'alt_dee': models.logit(V, av, 4),  # P(choice=4)
    'alt_oo': models.logit(V, av, 5),  # P(choice=5)
    'alt_oe': models.logit(V, av, 6),  # P(choice=6)
    'alt_ee': models.logit(V, av, 7),  # P(choice=7)
    'alt_we': models.logit(V, av, 8),  # P(choice=8)
    'alt_wee': models.logit(V, av, 9),  # P(choice=9)
}

biosim = bio.BIOGEME(database, simulate)
prob_values_sector_3 = biosim.simulate(heating2.get_beta_values())



##### Sector 4 #####
#change dataset to sector 4
df = df_sector4.copy()

df["op_cost_1"] = df["op_cost_1"].mean()
df["op_cost_2"] = df["op_cost_2"].mean()
df["op_cost_3"] = df["op_cost_3"].mean()
df["op_cost_4"] = df["op_cost_4"].mean()
df["op_cost_5"] = df["op_cost_5"].mean()
df["op_cost_6"] = df["op_cost_6"].mean()
df["op_cost_7"] = df["op_cost_7"].mean()
df["op_cost_8"] = df["op_cost_8"].mean()
df["op_cost_9"] = df["op_cost_9"].mean()

df["fix_cost_1"] = df["fix_cost_1"].mean()
df["fix_cost_2"] = df["fix_cost_2"].mean()
df["fix_cost_3"] = df["fix_cost_3"].mean()
df["fix_cost_4"] = df["fix_cost_4"].mean()
df["fix_cost_5"] = df["fix_cost_5"].mean()
df["fix_cost_6"] = df["fix_cost_6"].mean()
df["fix_cost_7"] = df["fix_cost_7"].mean()
df["fix_cost_8"] = df["fix_cost_8"].mean()
df["fix_cost_9"] = df["fix_cost_9"].mean()

#update BIOGEME database
database = db.Database("Heating", df)

#computing the choice probabilities for each row in the database
simulate = {
    'alt_gg': models.logit(V, av, 1),  # P(choice=1)
    'alt_ge': models.logit(V, av, 2),  # P(choice=2)
    'alt_deo': models.logit(V, av, 3),  # P(choice=3)
    'alt_dee': models.logit(V, av, 4),  # P(choice=4)
    'alt_oo': models.logit(V, av, 5),  # P(choice=5)
    'alt_oe': models.logit(V, av, 6),  # P(choice=6)
    'alt_ee': models.logit(V, av, 7),  # P(choice=7)
    'alt_we': models.logit(V, av, 8),  # P(choice=8)
    'alt_wee': models.logit(V, av, 9),  # P(choice=9)
}

biosim = bio.BIOGEME(database, simulate)
prob_values_sector_4 = biosim.simulate(heating2.get_beta_values())

### Compute the overall market prediction

In [26]:
w_sector = (
        len(df_sector1) / len(df_backup)*prob_values_sector_1.mean() + 
        len(df_sector2) / len(df_backup)*prob_values_sector_2.mean() + 
        len(df_sector3) / len(df_backup)*prob_values_sector_3.mean() + 
        len(df_sector4) / len(df_backup)*prob_values_sector_4.mean()
        )
print("Overall Market Prediction after segmentation by sector:")
for idx, i in enumerate(w_sector):
    print(f'{w_sector.index[idx]} : {i*100:.4f} %')

Overall Market Prediction after segmentation by sector:
alt_gg : 11.2998 %
alt_ge : 7.4071 %
alt_deo : 1.4117 %
alt_dee : 3.6031 %
alt_oo : 0.1782 %
alt_oe : 0.3651 %
alt_ee : 71.5856 %
alt_we : 1.7191 %
alt_wee : 2.4304 %


## Segmentation: Income

### Compute Market Share for each Income

In [29]:
##### Low Income #####

#change dataset to low income subset
df = df_low_inc.copy()

df["op_cost_1"] = df["op_cost_1"].mean()
df["op_cost_2"] = df["op_cost_2"].mean()
df["op_cost_3"] = df["op_cost_3"].mean()
df["op_cost_4"] = df["op_cost_4"].mean()
df["op_cost_5"] = df["op_cost_5"].mean()
df["op_cost_6"] = df["op_cost_6"].mean()
df["op_cost_7"] = df["op_cost_7"].mean()
df["op_cost_8"] = df["op_cost_8"].mean()
df["op_cost_9"] = df["op_cost_9"].mean()

df["fix_cost_1"] = df["fix_cost_1"].mean()
df["fix_cost_2"] = df["fix_cost_2"].mean()
df["fix_cost_3"] = df["fix_cost_3"].mean()
df["fix_cost_4"] = df["fix_cost_4"].mean()
df["fix_cost_5"] = df["fix_cost_5"].mean()
df["fix_cost_6"] = df["fix_cost_6"].mean()
df["fix_cost_7"] = df["fix_cost_7"].mean()
df["fix_cost_8"] = df["fix_cost_8"].mean()
df["fix_cost_9"] = df["fix_cost_9"].mean()

#update BIOGEME database
database = db.Database("Heating", df)

#computing the choice probabilities for each row in the database
simulate = {
    'alt_gg': models.logit(V, av, 1),  # P(choice=1)
    'alt_ge': models.logit(V, av, 2),  # P(choice=2)
    'alt_deo': models.logit(V, av, 3),  # P(choice=3)
    'alt_dee': models.logit(V, av, 4),  # P(choice=4)
    'alt_oo': models.logit(V, av, 5),  # P(choice=5)
    'alt_oe': models.logit(V, av, 6),  # P(choice=6)
    'alt_ee': models.logit(V, av, 7),  # P(choice=7)
    'alt_we': models.logit(V, av, 8),  # P(choice=8)
    'alt_wee': models.logit(V, av, 9),  # P(choice=9)
}

biosim = bio.BIOGEME(database, simulate)
prob_values_low_inc = biosim.simulate(heating2.get_beta_values())



##### High Income #####

#change dataset to high income subset
df = df_high_inc.copy()

df["op_cost_1"] = df["op_cost_1"].mean()
df["op_cost_2"] = df["op_cost_2"].mean()
df["op_cost_3"] = df["op_cost_3"].mean()
df["op_cost_4"] = df["op_cost_4"].mean()
df["op_cost_5"] = df["op_cost_5"].mean()
df["op_cost_6"] = df["op_cost_6"].mean()
df["op_cost_7"] = df["op_cost_7"].mean()
df["op_cost_8"] = df["op_cost_8"].mean()
df["op_cost_9"] = df["op_cost_9"].mean()

df["fix_cost_1"] = df["fix_cost_1"].mean()
df["fix_cost_2"] = df["fix_cost_2"].mean()
df["fix_cost_3"] = df["fix_cost_3"].mean()
df["fix_cost_4"] = df["fix_cost_4"].mean()
df["fix_cost_5"] = df["fix_cost_5"].mean()
df["fix_cost_6"] = df["fix_cost_6"].mean()
df["fix_cost_7"] = df["fix_cost_7"].mean()
df["fix_cost_8"] = df["fix_cost_8"].mean()
df["fix_cost_9"] = df["fix_cost_9"].mean()

#update BIOGEME database
database = db.Database("Heating", df)

#computing the choice probabilities for each row in the database
simulate = {
    'alt_gg': models.logit(V, av, 1),  # P(choice=1)
    'alt_ge': models.logit(V, av, 2),  # P(choice=2)
    'alt_deo': models.logit(V, av, 3),  # P(choice=3)
    'alt_dee': models.logit(V, av, 4),  # P(choice=4)
    'alt_oo': models.logit(V, av, 5),  # P(choice=5)
    'alt_oe': models.logit(V, av, 6),  # P(choice=6)
    'alt_ee': models.logit(V, av, 7),  # P(choice=7)
    'alt_we': models.logit(V, av, 8),  # P(choice=8)
    'alt_wee': models.logit(V, av, 9),  # P(choice=9)
}

biosim = bio.BIOGEME(database, simulate)
prob_values_high_inc = biosim.simulate(heating2.get_beta_values())

### Compute the overall market prediction

In [30]:
w_sector = (
        len(df_low_inc)  / len(df_backup)*prob_values_low_inc.mean() + 
        len(df_high_inc) / len(df_backup)*prob_values_high_inc.mean()
        )
print("Overall Market Prediction after segmentation by sector:")
for idx, i in enumerate(w_sector):
    print(f'{w_sector.index[idx]} : {i*100:.4f} %')

Overall Market Prediction after segmentation by sector:
alt_gg : 11.2749 %
alt_ge : 7.4514 %
alt_deo : 1.4246 %
alt_dee : 3.6291 %
alt_oo : 0.1821 %
alt_oe : 0.3722 %
alt_ee : 71.5520 %
alt_we : 1.6968 %
alt_wee : 2.4168 %


# Sample Enumeration

In [None]:
#Restore original dataframe
df = df_backup.copy()

sub_10 = df.sample(round(len(df)*0.1))
sub_25 = df.sample(round(len(df)*0.25))
sub_50 = df.sample(round(len(df)*0.5))
sub_75 = df.sample(round(len(df)*0.75))


In [44]:
##### 10% Sample #####

#change dataset to 10% subset
df = sub_10.copy()

#update BIOGEME database
database = db.Database("Heating", df)

#computing the choice probabilities for each row in the database
simulate = {
    'alt_gg': models.logit(V, av, 1),  # P(choice=1)
    'alt_ge': models.logit(V, av, 2),  # P(choice=2)
    'alt_deo': models.logit(V, av, 3),  # P(choice=3)
    'alt_dee': models.logit(V, av, 4),  # P(choice=4)
    'alt_oo': models.logit(V, av, 5),  # P(choice=5)
    'alt_oe': models.logit(V, av, 6),  # P(choice=6)
    'alt_ee': models.logit(V, av, 7),  # P(choice=7)
    'alt_we': models.logit(V, av, 8),  # P(choice=8)
    'alt_wee': models.logit(V, av, 9),  # P(choice=9)
}

biosim = bio.BIOGEME(database, simulate)
prob_values_sub_10 = biosim.simulate(heating2.get_beta_values())


##### 25% Sample #####

#change dataset to 25% subset
df = sub_25.copy()

#update BIOGEME database
database = db.Database("Heating", df)

#computing the choice probabilities for each row in the database
simulate = {
    'alt_gg': models.logit(V, av, 1),  # P(choice=1)
    'alt_ge': models.logit(V, av, 2),  # P(choice=2)
    'alt_deo': models.logit(V, av, 3),  # P(choice=3)
    'alt_dee': models.logit(V, av, 4),  # P(choice=4)
    'alt_oo': models.logit(V, av, 5),  # P(choice=5)
    'alt_oe': models.logit(V, av, 6),  # P(choice=6)
    'alt_ee': models.logit(V, av, 7),  # P(choice=7)
    'alt_we': models.logit(V, av, 8),  # P(choice=8)
    'alt_wee': models.logit(V, av, 9),  # P(choice=9)
}

biosim = bio.BIOGEME(database, simulate)
prob_values_sub_25 = biosim.simulate(heating2.get_beta_values())



##### 50% Sample #####

#change dataset to 50% subset
df = sub_50.copy()

#update BIOGEME database
database = db.Database("Heating", df)

#computing the choice probabilities for each row in the database
simulate = {
    'alt_gg': models.logit(V, av, 1),  # P(choice=1)
    'alt_ge': models.logit(V, av, 2),  # P(choice=2)
    'alt_deo': models.logit(V, av, 3),  # P(choice=3)
    'alt_dee': models.logit(V, av, 4),  # P(choice=4)
    'alt_oo': models.logit(V, av, 5),  # P(choice=5)
    'alt_oe': models.logit(V, av, 6),  # P(choice=6)
    'alt_ee': models.logit(V, av, 7),  # P(choice=7)
    'alt_we': models.logit(V, av, 8),  # P(choice=8)
    'alt_wee': models.logit(V, av, 9),  # P(choice=9)
}

biosim = bio.BIOGEME(database, simulate)
prob_values_sub_50 = biosim.simulate(heating2.get_beta_values())


##### 75% Sample #####

#change dataset to 75% subset
df = sub_75.copy()

#update BIOGEME database
database = db.Database("Heating", df)

#computing the choice probabilities for each row in the database
simulate = {
    'alt_gg': models.logit(V, av, 1),  # P(choice=1)
    'alt_ge': models.logit(V, av, 2),  # P(choice=2)
    'alt_deo': models.logit(V, av, 3),  # P(choice=3)
    'alt_dee': models.logit(V, av, 4),  # P(choice=4)
    'alt_oo': models.logit(V, av, 5),  # P(choice=5)
    'alt_oe': models.logit(V, av, 6),  # P(choice=6)
    'alt_ee': models.logit(V, av, 7),  # P(choice=7)
    'alt_we': models.logit(V, av, 8),  # P(choice=8)
    'alt_wee': models.logit(V, av, 9),  # P(choice=9)
}

biosim = bio.BIOGEME(database, simulate)
prob_values_sub_75 = biosim.simulate(heating2.get_beta_values())

In [53]:
print("Market Prediction with 10% Sample:")
for idx, i in enumerate(prob_values_sub_10.mean()):
    print(f'{prob_values_sub_10.mean().index[idx]} : {i*100:.4f} %')
print()
print("Market Prediction with 25% Sample:")
for idx, i in enumerate(prob_values_sub_25.mean()):
    print(f'{prob_values_sub_25.mean().index[idx]} : {i*100:.4f} %')
print()
print("Market Prediction with 50% Sample:")
for idx, i in enumerate(prob_values_sub_50.mean()):
    print(f'{prob_values_sub_50.mean().index[idx]} : {i*100:.4f} %')
print()
print("Market Prediction with 75% Sample:")
for idx, i in enumerate(prob_values_sub_75.mean()):
    print(f'{prob_values_sub_75.mean().index[idx]} : {i*100:.4f} %')

Market Prediction with 10% Sample:
alt_gg : 1.1355 %
alt_ge : 0.3765 %
alt_deo : 1.9884 %
alt_dee : 7.3056 %
alt_oo : 0.4380 %
alt_oe : 0.7242 %
alt_ee : 81.5539 %
alt_we : 3.6737 %
alt_wee : 2.8042 %

Market Prediction with 25% Sample:
alt_gg : 0.9718 %
alt_ge : 0.3148 %
alt_deo : 2.4415 %
alt_dee : 7.7039 %
alt_oo : 0.4325 %
alt_oe : 0.7202 %
alt_ee : 80.8000 %
alt_we : 3.8625 %
alt_wee : 2.7528 %

Market Prediction with 50% Sample:
alt_gg : 0.8711 %
alt_ge : 0.2903 %
alt_deo : 2.4309 %
alt_dee : 6.7001 %
alt_oo : 0.4088 %
alt_oe : 0.6809 %
alt_ee : 81.4270 %
alt_we : 4.3742 %
alt_wee : 2.8168 %

Market Prediction with 75% Sample:
alt_gg : 1.0237 %
alt_ge : 0.3354 %
alt_deo : 2.4959 %
alt_dee : 6.7843 %
alt_oo : 0.4104 %
alt_oe : 0.6872 %
alt_ee : 81.1913 %
alt_we : 4.2769 %
alt_wee : 2.7949 %
