In [None]:
import pandas as pd

In [12]:
def transform_file(input_file, output_file):
    
    data = pd.read_excel(input_file)
    transformed_rows = []

    for idx, row in data.iterrows():
        max_aspects = int(row['Input.maxAspects'])  # Number of aspects to process for this row
        transformed_row = {
            'WorkerId': row['WorkerId'],
            'Input.profile': row['Input.profile'],
            'Input.businessId': row['Input.businessId'],
            'Input.maxAspects': row['Input.maxAspects'],
        }
        
        # Add dynamic fields based on `max_aspects`
        for i in range(1, max_aspects + 1):
            transformed_row[f'Input.review{i}'] = row.get(f'Input.review{i}', None)
            transformed_row[f'Input.aspect{i}'] = row.get(f'Input.aspect{i}', None)
            transformed_row[f'Answer.utilityValue{i}.High'] = row.get(f'Answer.utilityValue{i}.High', None)
            transformed_row[f'Answer.utilityValue{i}.Low'] = row.get(f'Answer.utilityValue{i}.Low', None)
            transformed_row[f'Answer.utilityValue{i}.Medium'] = row.get(f'Answer.utilityValue{i}.Medium', None)
            transformed_row[f'Answer.utilityValue{i}.None'] = row.get(f'Answer.utilityValue{i}.None', None)
        
        transformed_rows.append(transformed_row)

    transformed_df = pd.DataFrame(transformed_rows)
    transformed_df.to_excel(output_file, index=False)
    print(f"Transformed file saved to {output_file}")

input_file_path = "/Users/innerpiece92/Desktop/NLP_Workspace/AArec/mturk/mturk-marketplace-ready/test/results_evaluation/hair_salons/Worker_3.xlsx"
output_file_path = "/Users/innerpiece92/Desktop/NLP_Workspace/AArec/mturk/mturk-marketplace-ready/test/results_evaluation/hair_salons/Worker_3_filtered_results_F.xlsx"

transform_file(input_file_path, output_file_path)

Transformed file saved to /Users/innerpiece92/Desktop/NLP_Workspace/AArec/mturk/mturk-marketplace-ready/test/results_evaluation/hair_salons/Worker_3_filtered_results_F.xlsx


In [13]:
def transform_input_to_output(input_df):

    transformed_rows = []

    for _, row in input_df.iterrows():
        workerid = row['WorkerId']
        profile = row['Input.profile']
        business_id = row['Input.businessId']
        max_aspects = int(row['Input.maxAspects'])

        for i in range(1, max_aspects + 1):  # Iterate only up to `Input.maxAspects`
            # Dynamically construct column names
            review_col = f'Input.review{i}'
            aspect_col = f'Input.aspect{i}'
            high_col = f'Answer.utilityValue{i}.High'
            low_col = f'Answer.utilityValue{i}.Low'
            medium_col = f'Answer.utilityValue{i}.Medium'
            none_col = f'Answer.utilityValue{i}.None'

            # Check if the required columns exist in the DataFrame
            if review_col in row and aspect_col in row:
                if pd.notna(row[review_col]) and pd.notna(row[aspect_col]):
                    # Determine the utility value based on the boolean columns
                    utility_value = None
                    if high_col in row and row[high_col]:
                        utility_value = 'High'
                    elif medium_col in row and row[medium_col]:
                        utility_value = 'Medium'
                    elif low_col in row and row[low_col]:
                        utility_value = 'Low'
                    elif none_col in row and row[none_col]:
                        utility_value = 'None'

                    if utility_value:  # Add the row only if a utility value is determined
                        transformed_rows.append({
                            'WorkerId': workerid,
                            'Input.profile': profile,
                            'Input.businessId': business_id,
                            'Input.maxAspects': max_aspects,
                            'Input.review': row[review_col],
                            'Input.aspect': row[aspect_col],
                            'Answer.utilityValue': utility_value
                        })

    transformed_df = pd.DataFrame(transformed_rows)
    return transformed_df

input_file_path = "/Users/innerpiece92/Desktop/NLP_Workspace/AArec/mturk/mturk-marketplace-ready/test/results_evaluation/hair_salons/Worker_3_filtered_results_F.xlsx"
output_file_path = "/Users/innerpiece92/Desktop/NLP_Workspace/AArec/mturk/mturk-marketplace-ready/test/results_evaluation/hair_salons/Worker_3_batch_matching_ready_results_F.xlsx"

input2_df = pd.read_excel(input_file_path)
output2_df = transform_input_to_output(input2_df)
output2_df.to_excel(output_file_path, index=False)
print(f"Transformed file saved to {output_file_path}")

Transformed file saved to /Users/innerpiece92/Desktop/NLP_Workspace/AArec/mturk/mturk-marketplace-ready/test/results_evaluation/hair_salons/Worker_3_batch_matching_ready_results_F.xlsx
