In [7]:
pip install openpyxl

Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
import numpy as np

# --- SECTION 1: LOAD AND PREPARE DATA ---
file_path = 'Survey_input.xlsx'

# Load only the necessary sheets
df_survey = pd.read_excel(file_path, sheet_name='Survey Results')
df_design = pd.read_excel(file_path, sheet_name='Design matrix')

# Clean Design Matrix
df_design['Task'] = df_design['Task'].ffill()
df_design = df_design.dropna(subset=['Concept']).copy()

# --- SECTION 2: CALCULATE SUMMATIONS ---
task_columns = [col for col in df_survey.columns if 'Task' in col]
choice_counts = {}

# Tally up the survey choices
for task_idx, col in enumerate(task_columns, start=1):
    counts = df_survey[col].value_counts().to_dict()
    for concept_choice, count in counts.items():
        # Handle 4 as 'None', others as integer strings
        concept_key = 'None' if concept_choice == 4 else str(int(float(concept_choice)))
        choice_counts[(task_idx, concept_key)] = count

def clean_concept(x):
    if str(x).lower() == 'none' or pd.isna(x):
        return 'None'
    return str(int(float(x)))

# Map the survey counts to the design matrix
df_design['Summation'] = df_design.apply(
    lambda row: choice_counts.get((int(float(row['Task'])), clean_concept(row['Concept'])), 0), axis=1
)

# --- SECTION 3: NULL MODEL CALCULATION ---
# For a null model, all attribute part-worths (betas) are explicitly 0.
# Therefore, the total utility (Total V) is exactly 0.0 for every option.
df_design['Total V'] = 0.0

# Calculate exponent, probabilities, and Log Likelihood
df_design['exp(Vi)'] = np.exp(df_design['Total V']) # This equals 1.0 for all rows

# P(i) calculations
task_exp_sums = df_design.groupby('Task')['exp(Vi)'].transform('sum')
df_design['P(i)'] = df_design['exp(Vi)'] / task_exp_sums

# Log Likelihood components
df_design['Log L components'] = df_design['Summation'] * np.log(df_design['P(i)'] + 1e-10)

null_log_likelihood = df_design['Log L components'].sum()
print(f"Null Log Likelihood: {null_log_likelihood}")

Null Log Likelihood: -801.98697050872
