In [7]:
pip install openpyxl

Note: you may need to restart the kernel to use updated packages.


In [26]:
import pandas as pd
import numpy as np

# --- SECTION 1: LOAD AND PREPARE DATA ---
file_path = 'Survey_input.xlsx'

# Load the sheets
df_survey = pd.read_excel(file_path, sheet_name='Survey Results')
df_pw_raw = pd.read_excel(file_path, sheet_name='Part worths', skiprows=1).dropna(axis=1, how='all')
df_design = pd.read_excel(file_path, sheet_name='Design matrix')

# Extract Part-Worths
part_worths = {'Cut width': {}, 'Engine Disp': {}, 'Price': {}, 'None': {}}
for i in range(len(df_pw_raw)):
    if pd.notna(df_pw_raw.iloc[i, 0]):
        part_worths['Cut width'][str(df_pw_raw.iloc[i, 0]).replace('"', '').strip()] = float(df_pw_raw.iloc[i, 1])
    if pd.notna(df_pw_raw.iloc[i, 2]):
        part_worths['Engine Disp'][str(df_pw_raw.iloc[i, 2]).strip()] = float(df_pw_raw.iloc[i, 3])
    if pd.notna(df_pw_raw.iloc[i, 4]):
        part_worths['Price'][str(df_pw_raw.iloc[i, 4]).strip()] = float(df_pw_raw.iloc[i, 5])
        
part_worths['None']['Bnone'] = float(df_pw_raw.iloc[0, 7])

# Lists for index mapping
cut_levels = list(part_worths['Cut width'].keys())
eng_levels = list(part_worths['Engine Disp'].keys())
price_levels = list(part_worths['Price'].keys())

# Clean Design Matrix
df_design['Task'] = df_design['Task'].fillna(method='ffill')
df_design = df_design.dropna(subset=['Concept']).copy()

# --- SECTION 2: CALCULATE ---
task_columns = [col for col in df_survey.columns if 'Task' in col]
choice_counts = {}

for task_idx, col in enumerate(task_columns, start=1):
    counts = df_survey[col].value_counts().to_dict()
    for concept_choice, count in counts.items():
        # Handle 4 as 'None', others as integer strings
        concept_key = 'None' if concept_choice == 4 else str(int(float(concept_choice)))
        choice_counts[(task_idx, concept_key)] = count

def clean_concept(x):
    if str(x).lower() == 'none' or pd.isna(x):
        return 'None'
    return str(int(float(x)))

df_design['Summation'] = df_design.apply(
    lambda row: choice_counts.get((int(float(row['Task'])), clean_concept(row['Concept'])), 0), axis=1
)

def get_utility(row):
    concept = str(row['Concept']).lower()
    if concept == 'none' or concept == 'nan':
        return part_worths['None']['Bnone']
    u_cut = part_worths['Cut width'][cut_levels[int(float(row['Cut width'])) - 1]]
    u_eng = part_worths['Engine Disp'][eng_levels[int(float(row['Engine disp'])) - 1]]
    u_price = part_worths['Price'][price_levels[int(float(row['Price'])) - 1]]
    return u_cut + u_eng + u_price

df_design['Total V'] = df_design.apply(get_utility, axis=1)
df_design['exp(Vi)'] = np.exp(df_design['Total V'])

# P(i) and Log Likelihood
task_exp_sums = df_design.groupby('Task')['exp(Vi)'].transform('sum')
df_design['P(i)'] = df_design['exp(Vi)'] / task_exp_sums
df_design['Log L components'] = df_design['Summation'] * np.log(df_design['P(i)'] + 1e-10)

total_log_likelihood = df_design['Log L components'].sum()
print(f"Total Log Likelihood: {total_log_likelihood}")

Total Log Likelihood: -10933.177032652435


  df_design['Task'] = df_design['Task'].fillna(method='ffill')


In [22]:
import os
print(os.getcwd())

E:\NCSU Spring '26\Python
