In [7]:
pip install openpyxl

Note: you may need to restart the kernel to use updated packages.


In [11]:
import pandas as pd
import numpy as np

def load_conjoint_data(file_path):
    # Read the survey results sheet
    # Assuming the first row is the header (Respondent #, Task 1, Task 2, etc.)
    df_survey = pd.read_excel(file_path, sheet_name='Survey Results')
    
    # Read the part-worths sheet
    # We skip the first row (index 0) because it contains the title "Part-worths for the model"
    df_pw_raw = pd.read_excel(file_path, sheet_name='Part worths', skiprows=1)
    
    # Clean up the part-worths DataFrame by dropping completely empty columns
    df_pw_raw = df_pw_raw.dropna(axis=1, how='all')
    
    # Extract part-worths into a structured dictionary
    # The layout has pairs of columns: Attribute Name | Beta
    part_worths = {
        'Cut width': {},
        'Engine Disp': {},
        'Price': {},
        'None': {}
    }
    
    # Extract Cut Width (Columns 0 and 1)
    for i in range(len(df_pw_raw)):
        level = df_pw_raw.iloc[i, 0]
        beta = df_pw_raw.iloc[i, 1]
        if pd.notna(level):
            part_worths['Cut width'][str(level).replace('"', '').strip()] = float(beta)
            
    # Extract Engine Displacement (Columns 2 and 3)
    for i in range(len(df_pw_raw)):
        level = df_pw_raw.iloc[i, 2]
        beta = df_pw_raw.iloc[i, 3]
        if pd.notna(level):
            part_worths['Engine Disp'][str(level).strip()] = float(beta)
            
    # Extract Price (Columns 4 and 5)
    for i in range(len(df_pw_raw)):
        level = df_pw_raw.iloc[i, 4]
        beta = df_pw_raw.iloc[i, 5]
        if pd.notna(level):
            part_worths['Price'][str(level).strip()] = float(beta)
            
    # Extract the 'None' option Beta (Columns 6 and 7)
    # Usually, there's only one row for the 'None' parameter
    none_level = df_pw_raw.iloc[0, 6]
    none_beta = df_pw_raw.iloc[0, 7]
    if pd.notna(none_level):
        part_worths['None'][str(none_level).strip()] = float(none_beta)

    return df_survey, part_worths

# Example usage:
file_path = 'Survey_input.xlsx'
survey_data, initial_part_worths = load_conjoint_data(file_path)

# Print to verify
print("Survey Data Shape:", survey_data.shape)
print("\nExtracted Part-Worths:")
for attribute, levels in initial_part_worths.items():
    print(f"{attribute}: {levels}")

Survey Data Shape: (79, 11)

Extracted Part-Worths:
Cut width: {'18': 0.0, '20': 0.1, '22': 0.2}
Engine Disp: {'140cc': 0.0, '165cc': 1.0, '190cc': 2.0}
Price: {'170': 0.0, '225': 10.0, '280': 20.0}
None: {'Bnone': -5.0}


In [3]:
import os
print(os.getcwd())

E:\NCSU Spring '26\Python
