In [6]:
import pandas as pd

# Load the CSV file to inspect the data
file_path = 'data/Human_Evaluation.csv'
data = pd.read_csv(file_path)

# Display the first few rows and the structure of the data
data.head()

Unnamed: 0,Question_1,Unnamed: 1,Unnamed: 2,Unnamed: 3,Question_2,Unnamed: 5,Unnamed: 6,Unnamed: 7,Question_3,Unnamed: 9,...,Unnamed: 30,Unnamed: 31,Question_9,Unnamed: 33,Unnamed: 34,Unnamed: 35,Question_10,Unnamed: 37,Unnamed: 38,Unnamed: 39
0,Model_A,Model_B,Model_C,Model_D,Model_A,Model_B,Model_C,Model_D,Model_A,Model_B,...,Model_C,Model_D,Model_A,Model_B,Model_C,Model_D,Model_A,Model_B,Model_C,Model_D
1,4th (Bad),3rd,2nd,1st (Best),4th (Bad),1st (Best),3rd,2nd,2nd,1st (Best),...,4th (Bad),2nd,1st (Best),4th (Bad),2nd,3rd,1st (Best),4th (Bad),3rd,2nd
2,3rd,1st (Best),2nd,4th (Bad),4th (Bad),2nd,3rd,1st (Best),4th (Bad),3rd,...,2nd,1st (Best),4th (Bad),1st (Best),3rd,2nd,4th (Bad),1st (Best),3rd,2nd
3,4th (Bad),3rd,2nd,1st (Best),4th (Bad),2nd,3rd,1st (Best),4th (Bad),3rd,...,4th (Bad),3rd,2nd,4th (Bad),3rd,1st (Best),1st (Best),4th (Bad),3rd,2nd
4,4th (Bad),2nd,3rd,1st (Best),4th (Bad),2nd,3rd,1st (Best),4th (Bad),2nd,...,4th (Bad),3rd,2nd,4th (Bad),3rd,1st (Best),4th (Bad),3rd,2nd,1st (Best)


In [8]:
# Strip the data frame from any redundant columns and rows (like headers repeated)
data_cleaned = data.iloc[1:, :].copy()  # Ignore the first row which repeats the model names

# Rename columns based on the content: each question block has 4 columns for 4 models
columns = []
for i in range(1, 11):  # Assuming there are 10 questions from the data head
    for model in ['Model_A', 'Model_B', 'Model_C', 'Model_D']:
        columns.append(f"Question_{i}_{model}")

data_cleaned.columns = columns

# Replace ordinal rankings with numeric scores
rank_mapping = {'1st (Best)': 1, '2nd': 2, '3rd': 3, '4th (Bad)': 4}
data_cleaned.replace(rank_mapping, inplace=True)

# Display the cleaned data to verify transformations
data_cleaned = data_cleaned.dropna()

data_cleaned.insert(0, "humen number", [1, 2, 3, 4, 5], True)

data_cleaned

Unnamed: 0,humen number,Question_1_Model_A,Question_1_Model_B,Question_1_Model_C,Question_1_Model_D,Question_2_Model_A,Question_2_Model_B,Question_2_Model_C,Question_2_Model_D,Question_3_Model_A,...,Question_8_Model_C,Question_8_Model_D,Question_9_Model_A,Question_9_Model_B,Question_9_Model_C,Question_9_Model_D,Question_10_Model_A,Question_10_Model_B,Question_10_Model_C,Question_10_Model_D
1,1,4.0,3.0,2.0,1.0,4.0,1.0,3.0,2.0,2.0,...,4.0,2.0,1.0,4.0,2.0,3.0,1.0,4.0,3.0,2.0
2,2,3.0,1.0,2.0,4.0,4.0,2.0,3.0,1.0,4.0,...,2.0,1.0,4.0,1.0,3.0,2.0,4.0,1.0,3.0,2.0
3,3,4.0,3.0,2.0,1.0,4.0,2.0,3.0,1.0,4.0,...,4.0,3.0,2.0,4.0,3.0,1.0,1.0,4.0,3.0,2.0
4,4,4.0,2.0,3.0,1.0,4.0,2.0,3.0,1.0,4.0,...,4.0,3.0,2.0,4.0,3.0,1.0,4.0,3.0,2.0,1.0
5,5,4.0,3.0,2.0,1.0,4.0,3.0,2.0,1.0,4.0,...,2.0,1.0,4.0,3.0,2.0,1.0,4.0,3.0,2.0,1.0


In [9]:
data_cleaned.to_csv('data/Human_Evaluation_Cleaned.csv', index=False)

In [13]:
from sklearn.preprocessing import LabelEncoder

# Reshape the data from wide to long format
long_format = pd.melt(data_cleaned, id_vars=['humen number'], var_name='question_model', value_name='rank')

# Split the 'question_model' column into separate 'question' and 'model' columns
long_format[['question', 'model']] = long_format['question_model'].str.rsplit('_', n=1, expand=True)
long_format.drop(columns='question_model', inplace=True)

# Convert 'question' and 'model' to categorical codes
label_encoder_question = LabelEncoder()
label_encoder_model = LabelEncoder()
long_format['question'] = label_encoder_question.fit_transform(long_format['question'])
long_format['model'] = label_encoder_model.fit_transform(long_format['model'])

# View the restructured data
long_format.to_csv('data/Human_Evaluation_Long.csv', index=False)
# long_format

In [38]:
# Load your data
data = pd.read_csv('data/Human_Evaluation_Long.csv')

print(data)

     humen number  rank  question  model
0               1   4.0         1      0
1               2   3.0         1      0
2               3   4.0         1      0
3               4   4.0         1      0
4               5   4.0         1      0
..            ...   ...       ...    ...
195             1   2.0         0      3
196             2   2.0         0      3
197             3   2.0         0      3
198             4   1.0         0      3
199             5   1.0         0      3

[200 rows x 4 columns]


In [39]:
# Calculate average rank for each model
average_rank_per_model_level_1 = data[0:80].groupby('model')['rank'].mean().sort_values()

average_rank_per_model_level_1

model
3    1.40
2    2.30
1    2.45
0    3.85
Name: rank, dtype: float64

In [40]:
# Calculate average rank for each model
average_rank_per_model_level_2 = data[80:160].groupby('model')['rank'].mean().sort_values()

average_rank_per_model_level_2

model
3    1.55
2    2.30
1    2.65
0    3.50
Name: rank, dtype: float64

In [41]:
# Calculate average rank for each model
average_rank_per_model_level_3 = data[160:180].groupby('model')['rank'].mean().sort_values()

average_rank_per_model_level_3

model
3    1.6
0    2.6
2    2.6
1    3.2
Name: rank, dtype: float64

In [42]:
# Calculate average rank for each model
average_rank_per_model_level_4 = data[180:200].groupby('model')['rank'].mean().sort_values()

average_rank_per_model_level_4

model
3    1.6
2    2.6
0    2.8
1    3.0
Name: rank, dtype: float64