In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import statsmodels.api as sm
import os
import sys

sys.path.insert(0, os.path.abspath('../developer'))

from config import MOCK_DATA, CODE, OUT
from developer.utilities import read_yaml
from developer.analysis.model import load_model
import re
import plotly.express as px
import plotly.graph_objects as go


In [10]:
df_total = pd.read_csv(OUT / "data" / "data_regression.csv")

##### regression 1

In [24]:
outcome = 'support'
explanatory_vars = [col for col in df_total.columns if "att_1" in col]

# Having a reference category for each att:
to_remove = ['att_1_Eliminate2070', 'att_2_NothingSoc', 'att_3_NothingEco', 'att_4_GovAlone', 'att_5_NoInterference']
explanatory_vars = [x for x in explanatory_vars if x not in to_remove]


X = df_total[explanatory_vars].astype(int)
y = df_total[outcome].astype(int)

X = sm.add_constant(X)
model = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups':df_total['ID']})

model.summary()

0,1,2,3
Dep. Variable:,support,R-squared:,0.005
Model:,OLS,Adj. R-squared:,0.005
Method:,Least Squares,F-statistic:,33.55
Date:,"Mon, 11 Sep 2023",Prob (F-statistic):,5.77e-15
Time:,11:07:24,Log-Likelihood:,-10961.0
No. Observations:,17112,AIC:,21930.0
Df Residuals:,17109,BIC:,21950.0
Df Model:,2,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.6503,0.009,68.951,0.000,0.632,0.669
att_1_Eliminate2050,0.0612,0.009,6.694,0.000,0.043,0.079
att_1_Reduce2030,0.0737,0.010,7.648,0.000,0.055,0.093

0,1,2,3
Omnibus:,34059.377,Durbin-Watson:,1.313
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3169.039
Skew:,-0.843,Prob(JB):,0.0
Kurtosis:,1.734,Cond. No.,3.74


#### regression 1_c

In [25]:
outcome = 'support'
explanatory_vars = [col for col in df_total.columns if "att_1" in col] + ['ageFilter', 'genderFilter', 'urban', 'district_NorthernZone', 
                                                                        'district_NorthEasternZone', 'district_CentralZone', 'district_EasternZone',
                                                                        'district_WesternZone', 'district_SouthernZone', 'treatment_status']

# Having a reference category for each att:
to_remove = ['att_1_Eliminate2070', 'att_2_NothingSoc', 'att_3_NothingEco', 'att_4_GovAlone', 'att_5_NoInterference']
explanatory_vars = [x for x in explanatory_vars if x not in to_remove]


X = df_total[explanatory_vars].astype(int)
y = df_total[outcome].astype(int)

X = sm.add_constant(X)
model = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups':df_total['ID']})

model.summary()

0,1,2,3
Dep. Variable:,support,R-squared:,0.014
Model:,OLS,Adj. R-squared:,0.013
Method:,Least Squares,F-statistic:,868.6
Date:,"Mon, 11 Sep 2023",Prob (F-statistic):,0.0
Time:,11:07:40,Log-Likelihood:,-10887.0
No. Observations:,17112,AIC:,21800.0
Df Residuals:,17100,BIC:,21890.0
Df Model:,11,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.5227,0.022,24.300,0.000,0.481,0.565
att_1_Eliminate2050,0.0613,0.009,6.743,0.000,0.043,0.079
att_1_Reduce2030,0.0730,0.010,7.589,0.000,0.054,0.092
ageFilter,7.495e-05,0.001,0.129,0.897,-0.001,0.001
genderFilter,0.0206,0.014,1.446,0.148,-0.007,0.049
urban,0.0461,0.015,3.171,0.002,0.018,0.075
district_NorthernZone,0.0807,0.015,5.382,0.000,0.051,0.110
district_NorthEasternZone,0.0562,0.033,1.682,0.093,-0.009,0.122
district_CentralZone,0.0476,0.025,1.903,0.057,-0.001,0.097

0,1,2,3
Omnibus:,26109.188,Durbin-Watson:,1.323
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3075.916
Skew:,-0.831,Prob(JB):,0.0
Kurtosis:,1.754,Cond. No.,5.05e+17


#### regression 2

In [26]:
outcome = 'support'
explanatory_vars = [col for col in df_total.columns if any(att in col for att in ["att_1", "att_2", "att_3"])]


# Having a reference category for each att:
to_remove = ['att_1_Eliminate2070', 'att_2_NothingSoc', 'att_3_NothingEco', 'att_4_GovAlone', 'att_5_NoInterference']
explanatory_vars = [x for x in explanatory_vars if x not in to_remove]


X = df_total[explanatory_vars].astype(int)
y = df_total[outcome].astype(int)

X = sm.add_constant(X)
model = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups':df_total['ID']})

model.summary()

0,1,2,3
Dep. Variable:,support,R-squared:,0.012
Model:,OLS,Adj. R-squared:,0.012
Method:,Least Squares,F-statistic:,21.58
Date:,"Mon, 11 Sep 2023",Prob (F-statistic):,3.3000000000000004e-31
Time:,11:08:53,Log-Likelihood:,-10897.0
No. Observations:,17112,AIC:,21810.0
Df Residuals:,17103,BIC:,21880.0
Df Model:,8,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.5586,0.014,40.455,0.000,0.532,0.586
att_1_Eliminate2050,0.0595,0.009,6.522,0.000,0.042,0.077
att_1_Reduce2030,0.0724,0.010,7.533,0.000,0.054,0.091
att_2_EnergyAccess,0.0639,0.010,6.365,0.000,0.044,0.084
att_2_InsureWorkers,0.0724,0.010,7.041,0.000,0.052,0.093
att_2_LowPrices,0.0754,0.010,7.516,0.000,0.056,0.095
att_3_IdentityCoalRegions,0.0340,0.011,3.211,0.001,0.013,0.055
att_3_IndustryCoalRegions,0.0625,0.010,6.069,0.000,0.042,0.083
att_3_WorkersCoalRegion,0.0623,0.011,5.769,0.000,0.041,0.083

0,1,2,3
Omnibus:,25874.502,Durbin-Watson:,1.302
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3088.84
Skew:,-0.834,Prob(JB):,0.0
Kurtosis:,1.755,Cond. No.,6.16


#### regression 2_c

In [27]:
outcome = 'support'
explanatory_vars = [col for col in df_total.columns if any(att in col for att in ["att_1", "att_2", "att_3"])] + ['ageFilter', 'genderFilter', 'urban', 'district_NorthernZone', 
                                                                        'district_NorthEasternZone', 'district_CentralZone', 'district_EasternZone',
                                                                        'district_WesternZone', 'district_SouthernZone', 'treatment_status']

# Having a reference category for each att:
to_remove = ['att_1_Eliminate2070', 'att_2_NothingSoc', 'att_3_NothingEco', 'att_4_GovAlone', 'att_5_NoInterference']
explanatory_vars = [x for x in explanatory_vars if x not in to_remove]


X = df_total[explanatory_vars].astype(int)
y = df_total[outcome].astype(int)

X = sm.add_constant(X)
model = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups':df_total['ID']})

model.summary()

0,1,2,3
Dep. Variable:,support,R-squared:,0.021
Model:,OLS,Adj. R-squared:,0.02
Method:,Least Squares,F-statistic:,610.1
Date:,"Mon, 11 Sep 2023",Prob (F-statistic):,0.0
Time:,11:09:35,Log-Likelihood:,-10822.0
No. Observations:,17112,AIC:,21680.0
Df Residuals:,17094,BIC:,21820.0
Df Model:,17,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.4449,0.023,19.097,0.000,0.399,0.491
att_1_Eliminate2050,0.0596,0.009,6.574,0.000,0.042,0.077
att_1_Reduce2030,0.0718,0.010,7.472,0.000,0.053,0.091
att_2_EnergyAccess,0.0634,0.010,6.351,0.000,0.044,0.083
att_2_InsureWorkers,0.0725,0.010,7.078,0.000,0.052,0.093
att_2_LowPrices,0.0752,0.010,7.509,0.000,0.056,0.095
att_3_IdentityCoalRegions,0.0342,0.011,3.239,0.001,0.014,0.055
att_3_IndustryCoalRegions,0.0624,0.010,6.093,0.000,0.042,0.082
att_3_WorkersCoalRegion,0.0619,0.011,5.749,0.000,0.041,0.083

0,1,2,3
Omnibus:,21005.399,Durbin-Watson:,1.312
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2997.322
Skew:,-0.822,Prob(JB):,0.0
Kurtosis:,1.774,Cond. No.,2.89e+17


#### regression 3

In [31]:
outcome = 'support'
explanatory_vars = [col for col in df_total.columns if "att" in col]

# Having a reference category for each att:
to_remove = ['att_1_Eliminate2070', 'att_2_NothingSoc', 'att_3_NothingEco', 'att_4_GovAlone', 'att_5_NoInterference']
explanatory_vars = [x for x in explanatory_vars if x not in to_remove]


X = df_total[explanatory_vars].astype(int)
y = df_total[outcome].astype(int)

X = sm.add_constant(X)
model = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups':df_total['ID']})

model.summary()

0,1,2,3
Dep. Variable:,support,R-squared:,0.018
Model:,OLS,Adj. R-squared:,0.017
Method:,Least Squares,F-statistic:,16.35
Date:,"Mon, 11 Sep 2023",Prob (F-statistic):,3.11e-42
Time:,11:11:08,Log-Likelihood:,-10846.0
No. Observations:,17112,AIC:,21730.0
Df Residuals:,17095,BIC:,21860.0
Df Model:,16,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.5002,0.018,28.002,0.000,0.465,0.535
att_1_Eliminate2050,0.0586,0.009,6.433,0.000,0.041,0.076
att_1_Reduce2030,0.0713,0.010,7.421,0.000,0.052,0.090
att_2_EnergyAccess,0.0633,0.010,6.312,0.000,0.044,0.083
att_2_InsureWorkers,0.0722,0.010,7.041,0.000,0.052,0.092
att_2_LowPrices,0.0755,0.010,7.524,0.000,0.056,0.095
att_3_IdentityCoalRegions,0.0337,0.011,3.193,0.001,0.013,0.054
att_3_IndustryCoalRegions,0.0609,0.010,5.937,0.000,0.041,0.081
att_3_WorkersCoalRegion,0.0608,0.011,5.647,0.000,0.040,0.082

0,1,2,3
Omnibus:,22318.467,Durbin-Watson:,1.3
Prob(Omnibus):,0.0,Jarque-Bera (JB):,3028.544
Skew:,-0.826,Prob(JB):,0.0
Kurtosis:,1.768,Cond. No.,9.29


#### regression 3_c

In [64]:
outcome = 'support'
explanatory_vars = [col for col in df_total.columns if "att" in col] +  ['ageFilter', 'genderFilter', 'urban', 'district_NorthernZone', 
                                                                        'district_NorthEasternZone', 'district_CentralZone', 'district_EasternZone',
                                                                        'district_WesternZone', 'district_SouthernZone', 'treatment_status', 'trust_ID',
                                                                        'aware']

# Having a reference category for each att:
to_remove = ['att_1_Eliminate2070', 'att_2_NothingSoc', 'att_3_NothingEco', 'att_4_GovAlone', 'att_5_NoInterference']
explanatory_vars = [x for x in explanatory_vars if x not in to_remove]


X = df_total[explanatory_vars].astype(int)
y = df_total[outcome].astype(int)

X = sm.add_constant(X)
model = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups':df_total['ID']})

model.summary()

0,1,2,3
Dep. Variable:,support,R-squared:,0.063
Model:,OLS,Adj. R-squared:,0.061
Method:,Least Squares,F-statistic:,503.0
Date:,"Mon, 11 Sep 2023",Prob (F-statistic):,0.0
Time:,13:31:24,Log-Likelihood:,-10449.0
No. Observations:,17112,AIC:,20950.0
Df Residuals:,17084,BIC:,21170.0
Df Model:,27,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.2264,0.027,8.321,0.000,0.173,0.280
att_1_Eliminate2050,0.0585,0.009,6.585,0.000,0.041,0.076
att_1_Reduce2030,0.0714,0.009,7.557,0.000,0.053,0.090
att_2_EnergyAccess,0.0651,0.010,6.596,0.000,0.046,0.084
att_2_InsureWorkers,0.0787,0.010,7.871,0.000,0.059,0.098
att_2_LowPrices,0.0771,0.010,7.887,0.000,0.058,0.096
att_3_IdentityCoalRegions,0.0360,0.010,3.484,0.000,0.016,0.056
att_3_IndustryCoalRegions,0.0623,0.010,6.255,0.000,0.043,0.082
att_3_WorkersCoalRegion,0.0607,0.011,5.765,0.000,0.040,0.081

0,1,2,3
Omnibus:,7536.002,Durbin-Watson:,1.35
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2598.51
Skew:,-0.788,Prob(JB):,0.0
Kurtosis:,1.922,Cond. No.,1.2e+17


In [68]:
model.rsquared

0.06277283959006974

In [58]:
def plot_regression(model, data_info, width=1.0, plot_title="Fig 2: on support for policy attributes"):

    order = data_info['order']
    att_1_levels = order['att_1']
    att_2_levels = order['att_2']
    att_3_levels = order['att_3']
    att_4_levels = order['att_4']
    att_5_levels = order['att_5']

    att_levels = [att_5_levels, att_4_levels, att_3_levels, att_2_levels, att_1_levels]

    att_colors = data_info['colors']

    fig = go.Figure()

    total_levels = sum(len(levels) for levels in att_levels) +  5

    # Loop through each attribute group and add the data to the plot
    for i, levels in enumerate(att_levels):
        att_coefficients = [model.params[f'att_{5-i}_{level}'] for level in levels] + [0] 
        att_standard_errors = [model.bse[f'att_{5-i}_{level}']*1.97 for level in levels] + [0]

        reference = ['Eliminate2070', 'NothingSoc', 'NothingEco', 'GovAlone', 'NoInterference']

        levels = levels + [reference[5-i-1]] 

        fig.add_trace(go.Scatter(
            x=att_coefficients,
            y=levels,
            mode='markers',
            error_x=dict(type='data', array=att_standard_errors, color=att_colors[i], thickness=1.5),
            marker=dict(color='#36454F', size=10),
            orientation='h',
            showlegend = False,
        ))

        fig.add_shape(
            type="rect",
            x0=-width,  # Set a fixed value for x0, which is left side of the plot
            x1=width,  # Set the width of the shape to 1000 (right side of the plot)
            y0=total_levels - sum(len(l) + 1 for l in att_levels[i:]),  # Set y0 to the starting level index
            y1=total_levels - sum(len(l) + 1 for l in att_levels[i:]) + len(levels) -1,  # Set y1 to the ending level index
            fillcolor=att_colors[i],
            opacity=0.1,  # Set the opacity for a light transparent effect
            layer="below",  # Place the rectangle below the scatter plot markers
        )
    
    att_1_levels = att_1_levels + [reference[0]]
    att_2_levels = att_2_levels + [reference[1]]
    att_3_levels = att_3_levels + [reference[2]] 
    att_4_levels = att_4_levels + [reference[3]] 
    att_5_levels = att_5_levels + [reference[4]]

    # Add a vertical line at x=0 for reference
    fig.add_shape(type="line", x0=0, x1=0, y0=att_5_levels[0], y1=att_1_levels[-1], line=dict(color="gray", width=1, dash='dash'))

    # Update the layout of the error bar plot
    fig.update_layout(
        title={
            'text': plot_title,
            'x': 0.0,
            'xanchor': 'center',
            'font': {'family': 'Computer Modern'}
        },
        xaxis_title='AMCE on support (0-1)',
        yaxis_title='Attribute Levels',
        yaxis=dict(categoryorder='array', categoryarray=att_5_levels),  # Set the categoryorder for y-axis based on att_1_levels
        xaxis=dict(tickformat='.2f', zeroline=False, range=[-0.3,0.3]),  # Remove x-axis zeroline
        showlegend=True,  # Show legend with attribute names
        margin=dict(l=80, r=30, b=40, t=80),
        height=800,  # Set the height of the plot to 600 pixels
        width=1000,
        title_x=0.50,
        paper_bgcolor="#EADDCA",
        plot_bgcolor='rgba(0,0,0,0)',
    ) 

    # Show the interactive error bar plot
    return fig

data_info = read_yaml(r"C:\Users\sjurl\OneDrive\Desktop\MasterThesis\Analysis\conjoint\developer\final\plot_specs.yaml")

fig = plot_regression(model, data_info, width=1.0)
fig.show()

In [8]:
print("Regression results for Attribute A:")
print(model.summary())

Regression results for Attribute A:
                            OLS Regression Results                            
Dep. Variable:                utility   R-squared:                       0.198
Model:                            OLS   Adj. R-squared:                  0.045
Method:                 Least Squares   F-statistic:                    0.5149
Date:                Sat, 12 Aug 2023   Prob (F-statistic):              0.845
Time:                        12:53:42   Log-Likelihood:                -137.13
No. Observations:                 132   AIC:                             318.3
Df Residuals:                     110   BIC:                             381.7
Df Model:                          21                                         
Covariance Type:              cluster                                         
                                             coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------



## Descriptives


In [16]:
clean_data = pd.read_csv(OUT / "data" / "data_clean.csv")

import pandas as pd
import plotly.graph_objs as go

def attribute_support(df, attribute):
    df = df.copy()

    df = df[[attribute, 'support']]
    df['support'] = df['support'].astype(int)

    categories = df[attribute].unique()

    support = {"Attribute Level": [], "Value": [], "CI_lower": [], "CI_upper": []}
    
    for cat in categories[::-1]:
        group = df[df[attribute] == cat]
        mean = group['support'].mean()
        std_dev = group['support'].std()
        n = len(group)
        confidence_interval = 1.96 * (std_dev / (n**0.5))  # 95% confidence interval

        support["Attribute Level"].append(cat.replace('&', '<br>'))
        support["Value"].append(mean.round(2))
        support["CI_lower"].append((mean - confidence_interval).round(2))
        support["CI_upper"].append((mean + confidence_interval).round(2))

    df = pd.DataFrame(support)

    color_scale = ["rgb(173, 221, 142)", "rgb(127, 188, 65)", "rgb(78, 139, 37)", "rgb(45, 82, 21)"]

    fig = go.Figure()

    for i, row in df.iterrows():
        fig.add_trace(go.Bar(
            x=[row["Attribute Level"]],
            y=[row["Value"]],
            error_y=dict(
                type='data',
                array=[row["CI_upper"] - row["Value"]],
                arrayminus=[row["Value"] - row["CI_lower"]],
                visible=True
            ),
            marker_color=color_scale[i],
            name=row["Attribute Level"]
        ))

    # Set y-axis range from 0 to 1
    fig.update_layout(yaxis_range=[0, 1], width=600, height=500)

    # Add a horizontal line at y=0.5
    
    fig.add_hline(y=0.5, line_dash="dash")


    fig.update_layout(barmode="group", bargap=0.6, bargroupgap=0.1)
    fig.update_layout(
        title={
            'text': "Fig 1: Support of the different phase-out strategies",
            'x': 0.5,
            'xanchor': 'center',
            'font': {'family': 'Computer Modern'}
        },
        margin=dict(l=20, r=20, t=45, b=5),
        paper_bgcolor="#EADDCA",
        plot_bgcolor='rgba(0,0,0,0)',
        showlegend=False,  # Show legend for different Attribute Levels
        xaxis_showticklabels=True,
        xaxis_title=None,
    )

    return df



fig = attribute_support(clean_data, "att_1")

#fig.show()




KeyError: "None of [Index(['att_1', 'support'], dtype='object')] are in the [columns]"

In [3]:
fig

Unnamed: 0,Attribute Level,Value,CI_lower,CI_upper
0,Reduce<br>IncreaseRenewables,0.7,0.66,0.75
1,Eliminate<br>UseAllOther,0.76,0.72,0.81
2,Reduce<br>IncreaseAllOther,0.72,0.68,0.77
3,Eliminate<br>UseRenewables,0.77,0.73,0.82


## Plots

##### 1.1 Relative support plot AMCE (Intra)

In [40]:
model.params[0]['att_6_SignificantEffort']

0.3932151622585344

In [13]:
import plotly.graph_objects as go

list1 = read_yaml(r"C:\Users\sjurl\OneDrive\Desktop\MasterThesis\Analysis\conjoint\developer\final\plot_specs.yaml")


# Assuming 'model' is the variable that holds your regression results
# Extract the coefficients and standard errors for each attribute level
att_1_levels = list1['order']['att_1']
att_2_levels = list1['order']['att_2']
att_3_levels = list1['order']['att_3']
att_4_levels = list1['order']['att_4']
att_5_levels = list1['order']['att_5']
att_6_levels = list1['order']['att_6']



#Remember to add att_6
att_levels = [att_6_levels, att_5_levels, att_4_levels, att_3_levels, att_2_levels, att_1_levels]

att_colors = ['gray', 'red', 'blue', 'green', 'orange', 'purple']  # Colors for each attribute group

fig = go.Figure()

total_levels = sum(len(levels) for levels in att_levels)

# Loop through each attribute group and add the data to the plot
for i, levels in enumerate(att_levels):
    att_coefficients = [model.params[f'att_{6-i}_{level}'] for level in levels]
    att_standard_errors = [model.bse[f'att_{6-i}_{level}'] for level in levels]

    fig.add_trace(go.Scatter(
        x=att_coefficients,
        y=levels,
        mode='markers',
        error_x=dict(type='data', array=att_standard_errors, color=att_colors[i], thickness=1.5),
        marker=dict(color='darkgray', size=10),
        orientation='h',
        showlegend = False,
    ))

    fig.add_shape(
        type="rect",
        x0=-1.5,  # Set a fixed value for x0, which is left side of the plot
        x1=1.5,  # Set the width of the shape to 1000 (right side of the plot)
        y0=total_levels - sum(len(l) for l in att_levels[i:]),  # Set y0 to the starting level index
        y1=total_levels - sum(len(l) for l in att_levels[i:]) + len(levels) - 1,  # Set y1 to the ending level index
        fillcolor=att_colors[i],
        opacity=0.1,  # Set the opacity for a light transparent effect
        layer="below",  # Place the rectangle below the scatter plot markers
    )

# Add a vertical line at x=0 for reference
fig.add_shape(type="line", x0=0, x1=0, y0=att_6_levels[0], y1=att_1_levels[-1], line=dict(color="gray", width=1, dash='dash'))

# Update the layout of the error bar plot
fig.update_layout(
    title='Relative Rating Differences',
    xaxis_title='',
    yaxis_title='Attribute Levels',
    yaxis=dict(categoryorder='array', categoryarray=att_6_levels),  # Set the categoryorder for y-axis based on att_1_levels
    xaxis=dict(tickformat='.2f', zeroline=False),  # Remove x-axis zeroline
    showlegend=True,  # Show legend with attribute names
    margin=dict(l=80, r=30, b=40, t=80),
    height=600,  # Set the height of the plot to 600 pixels
    width=1000,
    title_x=0.62,
)

# Show the interactive error bar plot
fig.show()


Grouped:

In [10]:
import plotly.graph_objects as go

# Assuming 'model' is the variable that holds your regression results
# Extract the coefficients and standard errors for each attribute level
att_1_levels = ['PhaseOut', 'Stop&Reduce', 'Stop&Maintain', 'StatusQuo']
att_2_levels = ['HighInvestment&Int', 'HighInvestment&Int&Consideration', 'LowInvestment&LowConsideration', 'LowInvestment']
att_3_levels = ['HealthEdu', 'EnergyAccess', 'LowPrices', 'Transfers', 'NothingSoc']
att_4_levels = ['CreateJobs', 'EarlyPension', 'JobGuarantee', 'Retrain', 'NothingEco']
att_5_levels = ['CivilNGO', 'EnergySector', 'LabourUnion', 'LocalGov', 'Media', 'Researchers', 'CentralGov']



#Remember to add att_6
att_levels = [att_5_levels, att_4_levels, att_3_levels, att_2_levels, att_1_levels]

att_colors_control = ['red', 'blue', 'green', 'orange', 'purple']  # Colors for each attribute group

fig = go.Figure()

total_levels = sum(len(levels) for levels in att_levels)

# Loop through each attribute group and add the data to the plot
fig = go.Figure()

# Loop through each attribute group and add the data for 'control' to the plot
for i, levels in enumerate(att_levels):
    att_coefficients = [model_control.params[f'att_{5-i}_{level}'] for level in levels]
    att_standard_errors = [model_control.bse[f'att_{5-i}_{level}'] for level in levels]

    relative_differences = [coeff - att_coefficients[-1] for coeff in att_coefficients]

    fig.add_trace(go.Scatter(
        x=relative_differences,
        y=levels,
        mode='markers',
        error_x=dict(type='data', array=att_standard_errors, color=att_colors[i], thickness=1.5),
        marker=dict(color='darkgray', size=10),
        orientation='h',
        showlegend=False,
        name='Control',  # Add a legend name for the control group
    ))

# Loop through each attribute group and add the data for 'treated' to the plot
for i, levels in enumerate(att_levels):
    att_coefficients = [model_treated.params[f'att_{5-i}_{level}'] for level in levels]
    att_standard_errors = [model_treated.bse[f'att_{5-i}_{level}'] for level in levels]

    relative_differences = [coeff - att_coefficients[-1] for coeff in att_coefficients]

    fig.add_trace(go.Scatter(
        x=relative_differences,
        y=levels,
        mode='markers',
        error_x=dict(type='data', array=att_standard_errors, color=att_colors[i], thickness=1.5),
        marker=dict(color=att_colors[i], size=10),  # Use different colors for treated group
        orientation='h',
        showlegend=False,
        name='Treated',  # Add a legend name for the treated group
    ))

    fig.add_shape(
        type="rect",
        x0=-1.5,  # Set a fixed value for x0, which is left side of the plot
        x1=1.5,  # Set the width of the shape to 1000 (right side of the plot)
        y0=total_levels - sum(len(l) for l in att_levels[i:]),  # Set y0 to the starting level index
        y1=total_levels - sum(len(l) for l in att_levels[i:]) + len(levels) - 1,  # Set y1 to the ending level index
        fillcolor=att_colors[i],
        opacity=0.1,  # Set the opacity for a light transparent effect
        layer="below",  # Place the rectangle below the scatter plot markers
    )

# Add a vertical line at x=0 for reference
fig.add_shape(type="line", x0=0, x1=0, y0=att_5_levels[0], y1=att_1_levels[-1], line=dict(color="gray", width=1, dash='dash'))

# Update the layout of the error bar plot
fig.update_layout(
    title='Relative Rating Differences',
    xaxis_title='',
    yaxis_title='Attribute Levels',
    yaxis=dict(categoryorder='array', categoryarray=att_5_levels),  # Set the categoryorder for y-axis based on att_1_levels
    xaxis=dict(tickformat='.2f', zeroline=False),  # Remove x-axis zeroline
    showlegend=True,  # Show legend with attribute names
    margin=dict(l=80, r=30, b=40, t=80),
    height=600,  # Set the height of the plot to 600 pixels
    width=1000,
    title_x=0.62,
)

# Show the interactive error bar plot
fig.show()


In [15]:
def plot_relative_differences_grouped(model_control, model_treated, data_info, group1, group2, width=1.0, plot_title="Marginal Means Treated / Control"):

    nobs_light = model_control.nobs / 12
    nobs_dark = model_treated.nobs / 12

    order = data_info['order']
    att_1_levels = order['att_1']
    att_2_levels = order['att_2']
    att_3_levels = order['att_3']
    att_4_levels = order['att_4']
    att_5_levels = order['att_5']
    att_6_levels = order['att_6']

    att_levels = [att_6_levels, att_5_levels, att_4_levels, att_3_levels, att_2_levels, att_1_levels]

    att_colors_control = data_info['colors_control']
    att_colors_treated = data_info['colors_treated']

    fig = go.Figure()

    total_levels = sum(len(levels) for levels in att_levels)

    # Loop through each attribute group and add the data for 'control' to the plot
    for i, levels in enumerate(att_levels):

        att_coefficients = [model_treated.params[f'att_{6-i}_{level}'] for level in levels]
        att_standard_errors = [model_treated.bse[f'att_{6-i}_{level}'] for level in levels]

        fig.add_trace(go.Scatter(
            x=att_coefficients,
            y=levels,
            mode='markers',
            error_x=dict(type='data', array=att_standard_errors, color=att_colors_treated[i], thickness=1.5),
            marker=dict(color=att_colors_treated[i], size=10),  # Use different colors for treated group
            orientation='h',
            showlegend=False,
            name='Treated',  # Add a legend name for the treated group
        ))
        

    # Loop through each attribute group and add the data for 'treated' to the plot
    for i, levels in enumerate(att_levels):

        att_coefficients = [model_control.params[f'att_{6-i}_{level}'] for level in levels]
        att_standard_errors = [model_control.bse[f'att_{6-i}_{level}'] for level in levels]

        fig.add_trace(go.Scatter(
            x=att_coefficients,
            y=levels,
            mode='markers',
            error_x=dict(type='data', array=att_standard_errors, color=att_colors_control[i], thickness=1.5),
            marker=dict(color=att_colors_control[i], size=10),
            orientation='h',
            showlegend=False,
            name='Control',  # Add a legend name for the control group
        ))

        fig.add_shape(
            type="rect",
            x0=-1.5,  # Set a fixed value for x0, which is left side of the plot
            x1=1.5,  # Set the width of the shape to 1000 (right side of the plot)
            y0=total_levels - sum(len(l) for l in att_levels[i:]),  # Set y0 to the starting level index
            y1=total_levels - sum(len(l) for l in att_levels[i:]) + len(levels) - 1,  # Set y1 to the ending level index
            fillcolor=att_colors_treated[i],
            opacity=0.1,  # Set the opacity for a light transparent effect
            layer="below",  # Place the rectangle below the scatter plot markers
        )

    # Add a vertical line at x=0 for reference
    fig.add_shape(type="line", x0=0, x1=0, y0=att_6_levels[0], y1=att_1_levels[-1], line=dict(color="gray", width=1, dash='dash'))

    fig.add_annotation(
                x=-0.02,  # X-coordinate for the annotation (adjust as needed)
                y=25.0,  # Y-coordinate for the annotation (above the plot)
                text=f"{group1} (lighter): n={(int(nobs_light))}",
                showarrow=False,
                font=dict(
                    family='Computer Modern',
                    size=12,
                    ),
            )
    fig.add_annotation(
                x=-0.02,  # X-coordinate for the annotation (adjust as needed)
                y=26.0,  # Y-coordinate for the annotation (above the plot)
                text=f"{group2} (darker): n={(int(nobs_dark))}",
                showarrow=False,
                font=dict(
                    family='Computer Modern',
                    size=12,
                    ),
            )

    # Update the layout of the error bar plot
    fig.update_layout(
        title={
            'text': plot_title,
            'x': 0.5,
            'font': {'family': 'Computer Modern'}
        },
        xaxis_title='',
        yaxis_title='Attribute Levels',
        yaxis=dict(categoryorder='array', categoryarray=att_6_levels),  # Set the categoryorder for y-axis based on att_1_levels
        xaxis=dict(tickformat='.2f', zeroline=False, range=[-0.15,0.2]),  # Remove x-axis zeroline
        showlegend=True,  # Show legend with attribute names
        margin=dict(l=80, r=30, b=40, t=80),
        height=600,  # Set the height of the plot to 600 pixels
        width=1000,
        paper_bgcolor="#EADDCA",
        plot_bgcolor='rgba(0,0,0,0)',
    )

    # Show the interactive error bar plot
    return fig

specs = read_yaml(r"C:\Users\sjurl\OneDrive\Desktop\MasterThesis\Analysis\conjoint\developer\final\plot_specs.yaml")

model_control = load_model(OUT / "models" / "model_non_coal_region.pickle")
model_treated = load_model(OUT / "models" / "model_coal_region.pickle")



fig = plot_relative_differences_grouped(model_control, model_treated, specs, group1="NonCoal", group2="Coal", width=1.0, plot_title="Marginal Means Coal / NonCoal region")
fig.show()

1.2 Marginal Means

In [11]:
import plotly.graph_objects as go

# Assuming 'model' is the variable that holds your regression results
# Extract the coefficients and standard errors for each attribute level
att_1_levels = ['PhaseOut', 'Stop&Reduce', 'Stop&Maintain', 'StatusQuo']
att_2_levels = ['HighInvestment&Int', 'HighInvestment&Int&Consideration', 'LowInvestment&LowConsideration', 'LowInvestment']
att_3_levels = ['HealthEdu', 'EnergyAccess', 'LowPrices', 'Transfers', 'NothingSoc']
att_4_levels = ['CreateJobs', 'EarlyPension', 'JobGuarantee', 'Retrain', 'NothingEco']
att_5_levels = ['CivilNGO', 'EnergySector', 'LabourUnion', 'LocalGov', 'Media', 'Researchers', 'CentralGov']



#Remember to add att_6
att_levels = [att_5_levels, att_4_levels, att_3_levels, att_2_levels, att_1_levels]

att_colors = ['red', 'blue', 'green', 'orange', 'purple']  # Colors for each attribute group

fig = go.Figure()

total_levels = sum(len(levels) for levels in att_levels)

# Loop through each attribute group and add the data to the plot
for i, levels in enumerate(att_levels):
    att_coefficients = [model.params[f'att_{5-i}_{level}'] for level in levels]
    att_standard_errors = [model.bse[f'att_{5-i}_{level}'] for level in levels]

    fig.add_trace(go.Scatter(
        x=att_coefficients,
        y=levels,
        mode='markers',
        error_x=dict(type='data', array=att_standard_errors, color=att_colors[i], thickness=1.5),
        marker=dict(color='darkgray', size=10),
        orientation='h',
        showlegend = False,
    ))

    fig.add_shape(
        type="rect",
        x0=-1.5,  # Set a fixed value for x0, which is left side of the plot
        x1=1.5,  # Set the width of the shape to 1000 (right side of the plot)
        y0=total_levels - sum(len(l) for l in att_levels[i:]),  # Set y0 to the starting level index
        y1=total_levels - sum(len(l) for l in att_levels[i:]) + len(levels) - 1,  # Set y1 to the ending level index
        fillcolor=att_colors[i],
        opacity=0.1,  # Set the opacity for a light transparent effect
        layer="below",  # Place the rectangle below the scatter plot markers
    )

# Add a vertical line at x=0 for reference
fig.add_shape(type="line", x0=0, x1=0, y0=att_5_levels[0], y1=att_1_levels[-1], line=dict(color="gray", width=1, dash='dash'))

# Update the layout of the error bar plot
fig.update_layout(
    title='Marginal Means',
    xaxis_title='',
    yaxis_title='Attribute Levels',
    yaxis=dict(categoryorder='array', categoryarray=att_5_levels),  # Set the categoryorder for y-axis based on att_1_levels
    xaxis=dict(tickformat='.2f', zeroline=False),  # Remove x-axis zeroline
    showlegend=True,  # Show legend with attribute names
    margin=dict(l=80, r=30, b=40, t=80),
    height=600,  # Set the height of the plot to 600 pixels
    width=1000,
    title_x=0.62,
)

# Show the interactive error bar plot
fig.show()


Grouped

In [12]:
import plotly.graph_objects as go

# Assuming 'model' is the variable that holds your regression results
# Extract the coefficients and standard errors for each attribute level
att_1_levels = ['PhaseOut', 'Stop&Reduce', 'Stop&Maintain', 'StatusQuo']
att_2_levels = ['HighInvestment&Int', 'HighInvestment&Int&Consideration', 'LowInvestment&LowConsideration', 'LowInvestment']
att_3_levels = ['HealthEdu', 'EnergyAccess', 'LowPrices', 'Transfers', 'NothingSoc']
att_4_levels = ['CreateJobs', 'EarlyPension', 'JobGuarantee', 'Retrain', 'NothingEco']
att_5_levels = ['CivilNGO', 'EnergySector', 'LabourUnion', 'LocalGov', 'Media', 'Researchers', 'CentralGov']



#Remember to add att_6
att_levels = [att_5_levels, att_4_levels, att_3_levels, att_2_levels, att_1_levels]

att_colors_control = ['red', 'blue', 'green', 'orange', 'purple']  # Colors for each attribute group

fig = go.Figure()

total_levels = sum(len(levels) for levels in att_levels)

# Loop through each attribute group and add the data to the plot
fig = go.Figure()

# Loop through each attribute group and add the data for 'control' to the plot
for i, levels in enumerate(att_levels):
    att_coefficients = [model_control.params[f'att_{5-i}_{level}'] for level in levels]
    att_standard_errors = [model_control.bse[f'att_{5-i}_{level}'] for level in levels]


    fig.add_trace(go.Scatter(
        x=att_coefficients,
        y=levels,
        mode='markers',
        error_x=dict(type='data', array=att_standard_errors, color=att_colors[i], thickness=1.5),
        marker=dict(color='darkgray', size=10),
        orientation='h',
        showlegend=False,
        name='Control',  # Add a legend name for the control group
    ))

# Loop through each attribute group and add the data for 'treated' to the plot
for i, levels in enumerate(att_levels):
    att_coefficients = [model_treated.params[f'att_{5-i}_{level}'] for level in levels]
    att_standard_errors = [model_treated.bse[f'att_{5-i}_{level}'] for level in levels]

    fig.add_trace(go.Scatter(
        x=att_coefficients,
        y=levels,
        mode='markers',
        error_x=dict(type='data', array=att_standard_errors, color=att_colors[i], thickness=1.5),
        marker=dict(color=att_colors[i], size=10),  # Use different colors for treated group
        orientation='h',
        showlegend=False,
        name='Treated',  # Add a legend name for the treated group
    ))

    fig.add_shape(
        type="rect",
        x0=-1.5,  # Set a fixed value for x0, which is left side of the plot
        x1=1.5,  # Set the width of the shape to 1000 (right side of the plot)
        y0=total_levels - sum(len(l) for l in att_levels[i:]),  # Set y0 to the starting level index
        y1=total_levels - sum(len(l) for l in att_levels[i:]) + len(levels) - 1,  # Set y1 to the ending level index
        fillcolor=att_colors[i],
        opacity=0.1,  # Set the opacity for a light transparent effect
        layer="below",  # Place the rectangle below the scatter plot markers
    )

# Add a vertical line at x=0 for reference
fig.add_shape(type="line", x0=0, x1=0, y0=att_5_levels[0], y1=att_1_levels[-1], line=dict(color="gray", width=1, dash='dash'))

# Update the layout of the error bar plot
fig.update_layout(
    title='Marginal Means Treatment/Control',
    xaxis_title='',
    yaxis_title='Attribute Levels',
    yaxis=dict(categoryorder='array', categoryarray=att_5_levels),  # Set the categoryorder for y-axis based on att_1_levels
    xaxis=dict(tickformat='.2f', zeroline=False),  # Remove x-axis zeroline
    showlegend=True,  # Show legend with attribute names
    margin=dict(l=80, r=30, b=40, t=80),
    height=600,  # Set the height of the plot to 600 pixels
    width=1000,
    title_x=0.62,
)

# Show the interactive error bar plot
fig.show()


##### 1.2 Rating attributes with a Normalization Method.

*Normalizing the coefficients involves transforming them to a common scale, typically between 0 and 1. This makes it easier to compare the relative importance of different attributes.*


To aggregate the importance scores for different levels of the same attribute and obtain an overall importance score for each attribute, you can calculate a weighted average or sum of the importance scores of its individual levels. Here's how you can do it:

Calculate Normalized Importance Scores (NIS) for Attribute Levels:

Follow the normalization method as described earlier to calculate the normalized importance scores (NIS) for each attribute level.
Aggregate Importance Scores for Each Attribute:

a. Weighted Average Method:

Calculate the weighted average importance score for each attribute by taking the sum of the products of each level's NIS and its corresponding weight (frequency or probability of that level's occurrence in the choice sets).
This method considers both the relative importance of each level and its likelihood of being chosen in the experiment.
b. Simple Sum Method:

Sum up the normalized importance scores (NIS) of all levels within an attribute.
This method treats all levels equally in terms of their contribution to the overall importance score.
Attribute Importance Ranking:

Rank the attributes based on the aggregated importance scores. Higher scores indicate greater importance.
Example using the Weighted Average Method:

Let's consider an example with attribute "att_1" from your regression results. You have four levels: "PhaseOut," "StatusQuo," "Stop&Maintain," and "Stop&Reduce." You've already calculated the normalized importance scores (NIS) for each level as follows:

NIS(PhaseOut) = 0.8810 / 0.8810 = 1.0000
NIS(StatusQuo) = 0.2979 / 0.8810 = 0.3379
NIS(Stop&Maintain) = 0.1471 / 0.8810 = 0.1668
NIS(Stop&Reduce) = 0.9077 / 0.8810 = 1.0302
Let's assume that the frequency (or probability) of each level's occurrence in the choice sets is as follows:

PhaseOut: 30%
StatusQuo: 20%
Stop&Maintain: 25%
Stop&Reduce: 25%
Now, calculate the weighted average importance score for "att_1":
Weighted Average Importance Score for att_1 = (NIS(PhaseOut) * 0.30) + (NIS(StatusQuo) * 0.20) + (NIS(Stop&Maintain) * 0.25) + (NIS(Stop&Reduce) * 0.25)
Weighted Average Importance Score for att_1 = (1.0000 * 0.30) + (0.3379 * 0.20) + (0.1668 * 0.25) + (1.0302 * 0.25) ≈ 0.5623

Repeat this process for each attribute to obtain aggregated importance scores, and then rank the attributes based on these scores.

Remember that the choice of the weighting scheme (equal weights, frequency-based weights, or other relevant weights) depends on your specific context and research design.

##### Plot freq

In [25]:
freq = pd.read_csv(OUT / 'data' / 'data_freq.csv')

NIS

##### 1.3 MarginalMeans

In [13]:
df_total = pd.read_csv(OUT / "models" / "model_aware.csv")
df_total.iloc[2,1]


IndexError: index 3 is out of bounds for axis 0 with size 3

In [4]:
df_total = pd.read_csv(OUT / "data" / "data_regression.csv")

import pandas as pd
import numpy as np

def _calculate_conditional_probability(df, column_x, column_y):

    nobs = len(df)
    # Step 1: Count occurrences of X=1 and Y=1 simultaneously
    xy_count = ((df[column_x] == True) & (df[column_y] == True)).sum()
    
    # Step 2: Count occurrences of X=1
    x_count = (df[column_x] == True).sum()
    
    # Step 3: Calculate P(Y=1|X=1)
    if x_count > 0:
        probability_y_given_x = xy_count / x_count
        probability_y_given_x = probability_y_given_x.round(4)
    else:
        probability_y_given_x = np.nan
    
    # Step 4: Calculate standard deviation
    variance_y_given_x = (probability_y_given_x * (1 - probability_y_given_x)) / x_count
    std_deviation = np.sqrt(variance_y_given_x).round(4)
    
    return probability_y_given_x, std_deviation, nobs

def marginal_means(df): 
    attributes_levels = df.columns[df.columns.str.startswith('att')]

    outcome = 'support'

    marginal_means ={}

    for att_level in attributes_levels:
        results = _calculate_conditional_probability(df, att_level, outcome)
        marginal_means[f'{att_level}_MM'] = []
        marginal_means[f'{att_level}_MM'].append(results[0])
        marginal_means[f'{att_level}_MM'].append(results[1])
        marginal_means[f'{att_level}_MM'].append(results[2])

    return pd.DataFrame(marginal_means)

marginal_means = marginal_means(df_total)




In [34]:
data_info = read_yaml(r"C:\Users\sjurl\OneDrive\Desktop\MasterThesis\Analysis\conjoint\developer\final\plot_specs.yaml")


def plot_MM(MM_data, data_info, width=1.0, plot_title="Fig 3: Marginal Means on support for policy attributes"):

    order = data_info['order']
    att_1_levels = order['att_1']
    att_2_levels = order['att_2']
    att_3_levels = order['att_3']
    att_4_levels = order['att_4']
    att_5_levels = order['att_5']

    att_levels = [att_5_levels, att_4_levels, att_3_levels, att_2_levels, att_1_levels]

    att_colors = data_info['colors']

    fig = go.Figure()

    total_levels = sum(len(levels) for levels in att_levels)

    # Loop through each attribute group and add the data to the plot
    for i, levels in enumerate(att_levels):
        att_coefficients = [MM_data.iloc[0][f'att_{5-i}_{level}_MM'] for level in levels]
        att_standard_errors = [MM_data.iloc[1][f'att_{5-i}_{level}_MM']*1.97 for level in levels]

        fig.add_trace(go.Scatter(
            x=att_coefficients,
            y=levels,
            mode='markers',
            error_x=dict(type='data', array=att_standard_errors, color=att_colors[i], thickness=1.5),
            marker=dict(color='#36454F', size=10),
            orientation='h',
            showlegend = False,
        ))

        fig.add_shape(
            type="rect",
            x0=-width,  # Set a fixed value for x0, which is left side of the plot
            x1=width,  # Set the width of the shape to 1000 (right side of the plot)
            y0=total_levels - sum(len(l) for l in att_levels[i:]),  # Set y0 to the starting level index
            y1=total_levels - sum(len(l) for l in att_levels[i:]) + len(levels) - 1,  # Set y1 to the ending level index
            fillcolor=att_colors[i],
            opacity=0.1,  # Set the opacity for a light transparent effect
            layer="below",  # Place the rectangle below the scatter plot markers
        )

    # Add a vertical line at x=0 for reference
    fig.add_shape(type="line", x0=0.5, x1=0.5, y0=att_5_levels[0], y1=att_1_levels[-1], line=dict(color="gray", width=1, dash='dash'))

    # Update the layout of the error bar plot
    fig.update_layout(
        title={
            'text': plot_title,
            'x': 0.0,
            'xanchor': 'center',
            'font': {'family': 'Computer Modern'}
        },
        xaxis_title='AMCE on support (0-1)',
        yaxis_title='Attribute Levels',
        yaxis=dict(categoryorder='array', categoryarray=att_5_levels),  # Set the categoryorder for y-axis based on att_1_levels
        xaxis=dict(tickformat='.2f', zeroline=False, range=[0,1.0]),  # Remove x-axis zeroline
        showlegend=True,  # Show legend with attribute names
        margin=dict(l=80, r=30, b=40, t=80),
        height=800,  # Set the height of the plot to 600 pixels
        width=1000,
        title_x=0.50,
        paper_bgcolor="#EADDCA",
        plot_bgcolor='rgba(0,0,0,0)',
    ) 

    # Show the interactive error bar plot
    return fig

plot_MM(marginal_means, data_info)

##### 1.4 Maybe: PCA Analysis:

Data Preparation:

Prepare your DCE data matrix, where each row represents a respondent's choice set, and columns represent different attribute levels.
Standardization:

Standardize the data by subtracting the mean and dividing by the standard deviation for each attribute. This ensures that all attributes are on similar scales and prevents attributes with larger variances from dominating the PCA.
Perform PCA:

Apply PCA to the standardized data matrix. The output of PCA will include the principal components and their associated eigenvalues.
Interpretation:

Examine the explained variance for each principal component. This helps you understand how much of the total variance in the data each component explains.
Look at the loadings (weights) of the original attributes on each principal component. These loadings indicate the strength and direction of the relationship between the attribute and the principal component.
Attribute Relationships:

PCA can provide insights into how attributes are related to each other. For example, attributes that have high loadings on the same principal component are positively correlated, while those with opposite loadings are negatively correlated.
Decision Support:

While PCA itself may not directly provide attribute importance scores for policy package choices, the derived principal components can help you identify patterns or relationships that might influence choices. These insights can then be used in conjunction with other methods to understand attribute importance.