In [12]:
import pandas as pd
import numpy as np
import math

env = "Sky" # enter Sky or Varinia

In [13]:
if env == "Sky":
    data_path_1='../Expert_response/all_HIT_answer.csv'
    data_path_2='../Expert_response/master_all_responses_May-14-2023_to_May-18-2023_Dan.csv'

else:
    data_path_1=r'C:\Users\varin\Desktop\UBC\Lameness_project\all_HIT_answer.csv'
    data_path_2=r'C:\Users\varin\Desktop\UBC\Lameness_project\master_all_responses_Dan.csv'
    
df_1= pd.read_csv(data_path_1)
df_2= pd.read_csv(data_path_2)

In [14]:
def process_data(df_1, df_2):
    # Remove all positive and negative attention checks, keep only 1 copy of the positive attention check
    all_test_q = df_1[(df_1['question_type'] != "neg_attention") & (df_1['question_type'] != "pos_attention_easy")].copy()
    pos_attention_q = df_1[df_1['question_type'] == "pos_attention_easy"].head(1).copy()

    # Concatenate the dataframes and reset the index
    all_q = pd.concat([all_test_q, pos_attention_q], ignore_index=True)
    all_q.reset_index(drop=True, inplace=True)

    # Select the desired columns and convert question_num to int
    all_q2 = all_q[['cow_L', 'cow_R', 'question_num', 'HIT']].copy()
    all_q2.loc[:, 'question_num'] = all_q2['question_num'].str[1:].astype(int)

    # Select the columns for response dataframe
    all_q_col = [f"q{i}" for i in range(1, 13)]
    all_col = all_q_col.copy()
    all_col.append('HIT')

    # Get the response dataframe
    response = df_2[all_col].copy()

    return all_q2, response


In [15]:
def reshape_and_remove_nan(df):
    df_melted = df.melt(id_vars='HIT', 
                        value_vars=['q1', 'q2', 'q3', 'q4', 'q5', 'q6', 'q7', 'q8', 'q9', 'q10', 'q11', 'q12'], 
                        var_name='question_num', 
                        value_name='response')
    df_melted['question_num'] = df_melted['question_num'].str[1:].astype(int)
    df_melted = df_melted.dropna()
    return df_melted


In [16]:
def process_and_merge(df_1, df_2):
    all_q2, response = process_data(df_1, df_2)
    response_melted = reshape_and_remove_nan(response)
    merged_df = all_q2.merge(response_melted, on=['HIT', 'question_num'], how='inner')
    merged_df2 = merged_df[['cow_L', 'cow_R', 'response']]

    return merged_df2


In [17]:
def create_winner_loser_degree_df(final_df):
    winner = []
    loser = []
    degree = []

    for _, row in final_df.iterrows():
        if row['response'] <= 0:
            winner.append(row['cow_L'])
            loser.append(row['cow_R'])
            degree.append(abs(row['response']))
        else:
            winner.append(row['cow_R'])
            loser.append(row['cow_L'])
            degree.append(abs(row['response']))

    new_df = pd.DataFrame({
        'winner': winner,
        'loser': loser,
        'degree': degree
    })

    return new_df


In [18]:
final_df = process_and_merge(df_1, df_2)
winner_loser_df = create_winner_loser_degree_df(final_df)
winner_loser_df.to_csv('../Expert_response/processed_result/winner_loser_Dan.csv', index=False)

In [25]:
matrix = winner_loser_df.pivot(index='winner', columns='loser', values='degree').fillna(0)

In [4]:
def direct_responses(df_1,df_2):
    q_order=list(df_2.columns)
    N=len(q_order)
    M=df_2.shape[0]
    matrix=[]
    for j in range(M):
        aux_df=df_2.iloc[[j]]
        for i in range(N):           
            cow_L=df_1[df_1['question_num']==q_order[i]].loc[df_1['HIT']==j,'cow_L'].to_numpy()
            cow_R=df_1[df_1['question_num']==q_order[i]].loc[df_1['HIT']==j,'cow_R'].to_numpy()
            response=aux_df[q_order[i]].to_numpy()
            if(math.isnan(response[0])==False):
                row=np.concatenate((cow_L,cow_R,response))
                matrix.append(row.tolist())
    new_df=pd.DataFrame(matrix)
    new_df.columns=['cow_L', 'cow_R', 'response']     
    return new_df


In [5]:
#responses=direct_responses(df_1,df_2)
#display(responses)

Unnamed: 0,cow_L,cow_R,response
0,6029.0,7138.0,-1.0
1,5087.0,4008.0,-3.0
2,6092.0,7114.0,0.0
3,6053.0,6088.0,1.0
4,4035.0,7060.0,-1.0
...,...,...,...
537,5087.0,5064.0,-1.0
538,6046.0,4035.0,2.0
539,5087.0,6068.0,-2.0
540,7109.0,7153.0,-1.0


In [11]:
def simple_responses(df_1,df_2):
    q_order=list(df_2.columns)
    N=len(q_order)
    M=df_2.shape[0]
    matrix=[]
    for j in range(M):
        aux_df=df_2.iloc[[j]]
        for i in range(N):
            cow_L=df_1[df_1['question_num']==q_order[i]].loc[df_1['HIT']==j,'cow_L'].to_numpy()
            cow_R=df_1[df_1['question_num']==q_order[i]].loc[df_1['HIT']==j,'cow_R'].to_numpy()
            response=aux_df[q_order[i]].to_numpy()
            if(math.isnan(response[0])==False):
                if(response[0]==0):
                    row=np.concatenate((cow_L,cow_R,[0]))
                elif (response[0]<0):    
                    row=np.concatenate((cow_L,cow_R,[-1]))
                else:
                    row=np.concatenate((cow_L,cow_R,[1])) 
                matrix.append(row.tolist())
    new_df=pd.DataFrame(matrix)
    new_df.columns=['cow_L', 'cow_R', 'response']     

    return new_df

In [12]:
#responses=simple_responses(df_1,df_2)
#print(responses)

     cow_L  cow_R  response
0     6029   7138        -1
1     5087   4008        -1
2     6092   7114         0
3     6053   6088         1
4     4035   7060        -1
..     ...    ...       ...
537   5087   5064        -1
538   6046   4035         1
539   5087   6068        -1
540   7109   7153        -1
541   6068   5118        -1

[542 rows x 3 columns]


In [15]:
def winner_loser_degree(df_1,df_2):
    direct_response=direct_responses(df_1,df_2)
    M=direct_response.shape[0]
    matrix=[]
    for j in range(M):
        aux_df=direct_response.iloc[[j]]
        response=aux_df['response'].to_numpy()
        cow_L=aux_df['cow_L'].to_numpy()
        cow_R=aux_df['cow_R'].to_numpy()
        if(response[0]==0):
            row=np.concatenate((cow_L,cow_R,[0])) 
        elif(response[0]<0):
            row=np.concatenate((cow_L,cow_R,[abs(int(response[0]))]))
        else:
            row=np.concatenate((cow_R,cow_L,[abs(int(response[0]))]))                       
        matrix.append(row.tolist())
    new_df=pd.DataFrame(matrix)
    new_df.columns=['winner','loser','degree']     
    return new_df  


In [16]:
responses=winner_loser_degree(df_1,df_2)
print(responses)

     winner   loser  degree
0    6029.0  7138.0     1.0
1    5087.0  4008.0     3.0
2    6092.0  7114.0     0.0
3    6088.0  6053.0     1.0
4    4035.0  7060.0     1.0
..      ...     ...     ...
537  5087.0  5064.0     1.0
538  4035.0  6046.0     2.0
539  5087.0  6068.0     2.0
540  7109.0  7153.0     1.0
541  6068.0  5118.0     1.0

[542 rows x 3 columns]
