#### Packages

In [274]:
import pandas as pd
import numpy as np

#### Input

In [275]:
df_SIN = pd.read_csv("data_CNCF_SIN.csv")
df_TWN = pd.read_csv("data_CNCF_TWN.csv")



### 資料處理

In [276]:
y_values = [110, 120, 130, 140, 150, 160, 170, 180, 190]
x_values = [1, 2, 4, 8]

#### SIN

In [277]:
# Initialize an empty DataFrame to store the transformed data
transformed_data = {}

# Iterate over each row to transform the data as specified
for index, row in df_SIN.iterrows():
    participant_id = row['participant']
    a = row['a']
    b = row['b']
    c = row['c']  # Value of 'a + b' which is also equal to 'c' as given
    d = row['d']  # Range from 1 to 10 (switch point)
    
    # Determine the timeframe type and construct column names based on conditions
    if b == 0 and a in [1, 2, 4, 8]:
        # Immediate timeframe
        for y in y_values:
            col_name = f"immediate_week{a}_{y}"
            # Check the value of 'd' against the tens digit of 'y'
            if d <= ((y-100) // 10) :
                value = "future"
            else:
                value = "now"
            
            # Add or update the entry in the transformed_data dictionary
            transformed_data.setdefault(participant_id, {})[col_name] = value
    
    elif b == 4 and a in [1, 2, 4, 8]:
        # Delayed timeframe
        for y in y_values:
            col_name = f"delayed_week{a}_{y}"
            # Check the value of 'd' against the tens digit of 'y'
            if d <= ((y-100) // 10) :
                value = "future"
            else:
                value = "now"
            
            # Add or update the entry in the transformed_data dictionary
            transformed_data.setdefault(participant_id, {})[col_name] = value

In [278]:
# Convert the transformed data dictionary into a DataFrame
transformed_SIN = pd.DataFrame.from_dict(transformed_data, orient='index').reset_index()
transformed_SIN.rename(columns={'index': 'ID'}, inplace=True)

##### 欄位順序

In [279]:
# Generate ordered columns
ordered_columns = ['ID']  # Start with 'ID' column
# Add immediate and delayed columns in the specified order
for timeframe in ["immediate", "delayed"]:
    for x in x_values:
        for y in y_values:
            ordered_columns.append(f"{timeframe}_week{x}_{y}")

# Reindex the DataFrame with the ordered columns (filling missing columns with NaN if necessary)
transformed_SIN = transformed_SIN.reindex(columns=ordered_columns)

##### 問卷欄位

In [280]:
transformed_SIN = transformed_SIN.merge(
    df_SIN[['participant', 'treatment', 'gender', 'race', 'econ', 'course', 'immi', 'identity', 'r1', 'l1', 'pro1', 'pro2', 'often1', 'often2']],
    left_on='ID', right_on='participant', how='left'
)

# Set the values for each column based on the merged columns
transformed_SIN['treatment'] = transformed_SIN['treatment'].apply(lambda x: "will" if x == "CF" else "no will")
transformed_SIN['race'] = transformed_SIN['race'].str.rstrip(';')
transformed_SIN['major'] = transformed_SIN['econ'].apply(lambda x: "Econ" if x == "Yes" else "Business")
transformed_SIN['year'] = transformed_SIN['course']
transformed_SIN['immigration_status'] = transformed_SIN['immi']
transformed_SIN['identity'] = transformed_SIN.apply(lambda row: row['r1'] if row['identity'] == "Others" else row['identity'], axis=1)
transformed_SIN['language'] = transformed_SIN['l1']
transformed_SIN['proficiency_English'] = transformed_SIN['pro1']
transformed_SIN['proficiency_Mandarin'] = transformed_SIN['pro2']
transformed_SIN['frequency_English'] = transformed_SIN['often1']
transformed_SIN['frequency_Mandarin'] = transformed_SIN['often2']

# Drop the redundant columns used for merging
transformed_SIN = transformed_SIN.drop(columns=['participant', 'econ', 'course', 'immi', 'r1', 'l1', 'pro1', 'pro2', 'often1', 'often2'])
transformed_SIN = transformed_SIN.drop_duplicates()
transformed_SIN['location'] = "SIN"


In [281]:
# Add columns about rational/irrational 
# 因為實驗設計是要求受試者選switch point, 所以單週的選擇一定是理性的
for week in ['week1', 'week2', 'week4', 'week8']:
    transformed_SIN[f'rational_immediate_{week}'] = "rational"
    transformed_SIN[f'rational_delayed_{week}'] = "rational"

# Helper function to determine rationality across weeks for each participant
def get_rationality_mapping(df, timeframe_label):
    sorted_df = df.sort_values(by=['participant', 'a'])
    rationality_mapping = sorted_df.groupby('participant').apply(
        lambda group: "rational" if group['d'].is_monotonic_increasing else "irrational"
    ).reset_index()
    rationality_mapping.columns = ['participant', f'rational_cross_period_{timeframe_label}']
    return rationality_mapping

# Create rationality mappings for immediate and delayed timeframes
rationality_mapping_immi = get_rationality_mapping(df_SIN[df_SIN['b'] == 0], "immediate")
rationality_mapping_del = get_rationality_mapping(df_SIN[df_SIN['b'] == 4], "delayed")

# Merge rationality mappings into transformed_TWN
transformed_SIN = (
    transformed_SIN
    .merge(rationality_mapping_immi, left_on='ID', right_on='participant', how='left')
    .merge(rationality_mapping_del, left_on='ID', right_on='participant', how='left')
    .drop(columns=['participant_x', 'participant_y'])  # Drop redundant columns
)

  rationality_mapping = sorted_df.groupby('participant').apply(
  rationality_mapping = sorted_df.groupby('participant').apply(


##### Output

In [282]:
print(transformed_SIN.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 159 entries, 0 to 158
Data columns (total 96 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   ID                               159 non-null    int64  
 1   immediate_week1_110              159 non-null    object 
 2   immediate_week1_120              159 non-null    object 
 3   immediate_week1_130              159 non-null    object 
 4   immediate_week1_140              159 non-null    object 
 5   immediate_week1_150              159 non-null    object 
 6   immediate_week1_160              159 non-null    object 
 7   immediate_week1_170              159 non-null    object 
 8   immediate_week1_180              159 non-null    object 
 9   immediate_week1_190              159 non-null    object 
 10  immediate_week2_110              159 non-null    object 
 11  immediate_week2_120              159 non-null    object 
 12  immediate_week2_130   

In [283]:
transformed_SIN.to_excel("LSH_SIN.xlsx", index=False)

#### TWN

In [284]:
# Initialize an empty DataFrame to store the transformed data
transformed_data = {}

# Iterate over each row to transform the data as specified
for index, row in df_TWN.iterrows():
    participant_id = row['Participant']
    a = row['a']
    b = row['b']
    c = row['c']  # Value of 'a + b' which is also equal to 'c' as given
    d = row['d']  # Range from 1 to 10 (switch point)
     
    # Determine the timeframe type and construct column names based on conditions
    if b == 0 and a in [1, 2, 4, 8]:
        # Immediate timeframe
        for y in y_values:
            col_name = f"immediate_week{a}_{y}"
            # Check the value of 'd' against the tens digit of 'y'
            if d <= ((y-100) // 10) :
                value = "future"
            else:
                value = "now"
            
            # Add or update the entry in the transformed_data dictionary
            transformed_data.setdefault(participant_id, {})[col_name] = value
    
    elif b == 4 and a in [1, 2, 4, 8]:
        # Delayed timeframe
        for y in y_values:
            col_name = f"delayed_week{a}_{y}"
            # Check the value of 'd' against the tens digit of 'y'
            if d <= ((y-100) // 10) :
                value = "future"
            else:
                value = "now"
            
            # Add or update the entry in the transformed_data dictionary
            transformed_data.setdefault(participant_id, {})[col_name] = value

In [285]:
# Convert the transformed data dictionary into a DataFrame
transformed_TWN = pd.DataFrame.from_dict(transformed_data, orient='index').reset_index()
transformed_TWN.rename(columns={'index': 'ID'}, inplace=True)

In [286]:
# Generate ordered columns
ordered_columns = ['ID']  # Start with 'ID' column
# Add immediate and delayed columns in the specified order
for timeframe in ["immediate", "delayed"]:
    for x in x_values:
        for y in y_values:
            ordered_columns.append(f"{timeframe}_week{x}_{y}")

# Reindex the DataFrame with the ordered columns (filling missing columns with NaN if necessary)
transformed_TWN = transformed_TWN.reindex(columns=ordered_columns)

In [287]:
transformed_TWN = transformed_TWN.merge(
    df_TWN[['Participant', 'Treatment', 'Gender', 'Taiwan', 'Econ', 'Course', 'laugnauge', 'otherLanguage', 'WhichLanguage', 'EngCountry', 'WhichCountry']],
    left_on='ID', right_on='Participant', how='left'
)

# Set the values for each column based on the merged columns
transformed_TWN['treatment'] = transformed_TWN['Treatment'].apply(lambda x: "will" if x == "CF" else "no will")
transformed_TWN['gender'] = transformed_TWN['Gender']
transformed_TWN['race'] = transformed_TWN['Taiwan']
transformed_TWN['major'] = transformed_TWN['Econ'].apply(lambda x: "Econ" if x == "是" else "Others")
transformed_TWN['year'] = transformed_TWN['Course']
transformed_TWN['language'] = transformed_TWN.apply(lambda row: "中文" if row['otherLanguage'] == "否" else row['WhichLanguage'], axis=1)
transformed_TWN['english_speaking_country'] = transformed_TWN.apply(lambda row: row['EngCountry'] if row['EngCountry'] == "否" else row['WhichCountry'], axis=1)

# Drop the redundant columns used for merging
transformed_TWN = transformed_TWN.drop(columns=['Participant', 'Treatment', 'Gender', 'Taiwan', 'Econ', 'Course', 'laugnauge', 'otherLanguage', 'WhichLanguage', 'EngCountry', 'WhichCountry'])
transformed_TWN = transformed_TWN.drop_duplicates()

In [288]:
# Add columns about rational/irrational 
# 因為實驗設計是要求受試者選switch point, 所以單週的選擇一定是理性的
for week in ['week1', 'week2', 'week4', 'week8']:
    transformed_TWN[f'rational_immediate_{week}'] = "rational"
    transformed_TWN[f'rational_delayed_{week}'] = "rational"

# Helper function to determine rationality across weeks for each participant
def get_rationality_mapping(df, timeframe_label):
    sorted_df = df.sort_values(by=['Participant', 'a'])
    rationality_mapping = sorted_df.groupby('Participant').apply(
        lambda group: "rational" if group['d'].is_monotonic_increasing else "irrational"
    ).reset_index()
    rationality_mapping.columns = ['Participant', f'rational_cross_period_{timeframe_label}']
    return rationality_mapping

# Create rationality mappings for immediate and delayed timeframes
rationality_mapping_immi = get_rationality_mapping(df_TWN[df_TWN['b'] == 0], "immediate")
rationality_mapping_del = get_rationality_mapping(df_TWN[df_TWN['b'] == 4], "delayed")

# Merge rationality mappings into transformed_TWN
transformed_TWN = (
    transformed_TWN
    .merge(rationality_mapping_immi, left_on='ID', right_on='Participant', how='left')
    .merge(rationality_mapping_del, left_on='ID', right_on='Participant', how='left')
    .drop(columns=['Participant_x', 'Participant_y'])  # Drop redundant columns
)

  rationality_mapping = sorted_df.groupby('Participant').apply(
  rationality_mapping = sorted_df.groupby('Participant').apply(


In [289]:
transformed_TWN['location'] = "TWN"

Now I want to add a column named "rational_cross_period_immediate".
for each ID in immediate time frame (['b']=0 in df_TWN), 
if its switch point (['d'] in df_TWN) increases when week (['a'] in df_TWN) increase, 
which represents that the subject is rational during the game (a.k.a. transitivity).
Then its transformed_TWN['rational_cross_period_immediate'] should be "rational" o.w. "irrational"


In [290]:
print(transformed_SIN.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 159 entries, 0 to 158
Data columns (total 96 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   ID                               159 non-null    int64  
 1   immediate_week1_110              159 non-null    object 
 2   immediate_week1_120              159 non-null    object 
 3   immediate_week1_130              159 non-null    object 
 4   immediate_week1_140              159 non-null    object 
 5   immediate_week1_150              159 non-null    object 
 6   immediate_week1_160              159 non-null    object 
 7   immediate_week1_170              159 non-null    object 
 8   immediate_week1_180              159 non-null    object 
 9   immediate_week1_190              159 non-null    object 
 10  immediate_week2_110              159 non-null    object 
 11  immediate_week2_120              159 non-null    object 
 12  immediate_week2_130   

In [291]:
print(transformed_TWN.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 155 entries, 0 to 154
Data columns (total 91 columns):
 #   Column                           Non-Null Count  Dtype 
---  ------                           --------------  ----- 
 0   ID                               155 non-null    int64 
 1   immediate_week1_110              155 non-null    object
 2   immediate_week1_120              155 non-null    object
 3   immediate_week1_130              155 non-null    object
 4   immediate_week1_140              155 non-null    object
 5   immediate_week1_150              155 non-null    object
 6   immediate_week1_160              155 non-null    object
 7   immediate_week1_170              155 non-null    object
 8   immediate_week1_180              155 non-null    object
 9   immediate_week1_190              155 non-null    object
 10  immediate_week2_110              155 non-null    object
 11  immediate_week2_120              155 non-null    object
 12  immediate_week2_130              155

In [292]:
transformed_TWN.to_excel("LSH_TWN.xlsx", index=False)

#### 合併SIN, TWN

In [301]:
combined_df = pd.concat([transformed_SIN, transformed_TWN], ignore_index=True, join='outer')

# Extract columns starting with "rational_immediate_" and "rational_delayed_"
immediate_columns = [col for col in combined_df.columns if col.startswith("rational_immediate_")]
delayed_columns = [col for col in combined_df.columns if col.startswith("rational_delayed_")]
cross_columns = [col for col in combined_df.columns if col.startswith("rational_cross_")]
# Ensure "location" is at the end
other_columns = [col for col in combined_df.columns if col not in immediate_columns + delayed_columns + cross_columns + ["location"]]
end_columns = ["location"]

# Define the new column order
new_column_order = other_columns + immediate_columns + delayed_columns + cross_columns + end_columns
# Reorder combined_df based on new_column_order
combined_df = combined_df[new_column_order]

file_path = "TimePreference_FutureTense_歐陽萱.csv"
combined_df.to_csv(file_path, index=False)

In [302]:
combined_df

Unnamed: 0,ID,immediate_week1_110,immediate_week1_120,immediate_week1_130,immediate_week1_140,immediate_week1_150,immediate_week1_160,immediate_week1_170,immediate_week1_180,immediate_week1_190,...,rational_immediate_week2,rational_immediate_week4,rational_immediate_week8,rational_delayed_week1,rational_delayed_week2,rational_delayed_week4,rational_delayed_week8,rational_cross_period_immediate,rational_cross_period_delayed,location
0,1,now,now,now,now,now,now,now,now,future,...,rational,rational,rational,rational,rational,rational,rational,irrational,irrational,SIN
1,2,now,now,now,now,now,now,now,now,future,...,rational,rational,rational,rational,rational,rational,rational,rational,rational,SIN
2,3,now,now,now,now,now,now,now,now,future,...,rational,rational,rational,rational,rational,rational,rational,rational,rational,SIN
3,4,now,now,now,now,future,future,future,future,future,...,rational,rational,rational,rational,rational,rational,rational,irrational,irrational,SIN
4,5,now,now,now,now,future,future,future,future,future,...,rational,rational,rational,rational,rational,rational,rational,irrational,irrational,SIN
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
309,151,future,future,future,future,future,future,future,future,future,...,rational,rational,rational,rational,rational,rational,rational,irrational,irrational,TWN
310,152,now,now,now,now,now,now,future,future,future,...,rational,rational,rational,rational,rational,rational,rational,irrational,irrational,TWN
311,153,now,now,now,now,now,future,future,future,future,...,rational,rational,rational,rational,rational,rational,rational,rational,irrational,TWN
312,154,future,future,future,future,future,future,future,future,future,...,rational,rational,rational,rational,rational,rational,rational,rational,rational,TWN
