In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.api as sm
from statsmodels.formula.api import ols


In [2]:
df = pd.read_csv("/content/combined_output.csv")

In [3]:
df.head()

Unnamed: 0,Module,Target/Check,Condition,Flanker,Response,Correct,RT,Participant_ID,"How positive or negative does this emoji feel to you?\n(1 = Very Negative, 5 = Neutral, 9 = Very Positive)","How strong or calm does this emoji feel?\n(1 = Very Calming, 9 = Very Exciting)",...,"How familiar are you with this emoji?\n(1 = Not familiar at all, 9 = Very familiar).5","How often do you use this emoji in daily conversations?\n(1 = Never use, 9 = Use very frequently).5","When someone else is feeling excited, I tend to get excited too.",It upsets me to see someone being treated disrespectfully.,I enjoy making other people feel better.,"I have tender, concerned feelings for people less fortunate than me.",I can tell when others are sad even when they do not say anything.,I become irritated when someone cries.,I get a strong urge to help when I see someone who is upset.,I am not really interested in how other people feel.
0,Letter Module,D,neutral,X,RIGHT,True,2.499346,farheen,5.0,7.0,...,9.0,4.0,3.0,4.0,4.0,4.0,4.0,2.0,4.0,0.0
1,Letter Module,C,congruent,D,RIGHT,True,1.133785,farheen,,,...,,,,,,,,,,
2,Letter Module,D,neutral,Y,RIGHT,True,0.872719,farheen,,,...,,,,,,,,,,
3,Letter Module,B,congruent,A,LEFT,True,1.126394,farheen,,,...,,,,,,,,,,
4,Letter Module,B,congruent,B,LEFT,True,0.654276,farheen,,,...,,,,,,,,,,


In [4]:
# Keep only required columns
cols_to_keep = ['Module', 'Target/Check', 'Condition', 'Flanker', 'Response', 'Correct', 'RT', 'Participant_ID']
df = df[cols_to_keep]

In [5]:
df.head()

Unnamed: 0,Module,Target/Check,Condition,Flanker,Response,Correct,RT,Participant_ID
0,Letter Module,D,neutral,X,RIGHT,True,2.499346,farheen
1,Letter Module,C,congruent,D,RIGHT,True,1.133785,farheen
2,Letter Module,D,neutral,Y,RIGHT,True,0.872719,farheen
3,Letter Module,B,congruent,A,LEFT,True,1.126394,farheen
4,Letter Module,B,congruent,B,LEFT,True,0.654276,farheen


In [6]:
df['Module'] = df['Module'].str.strip()
df['Condition'] = df['Condition'].str.lower()

In [7]:
agg_df = df.groupby(['Participant_ID', 'Module', 'Condition']).agg(
    accuracy=('Correct', 'mean'),
    mean_rt=('RT', 'mean')
).reset_index()

# Accuracy comparision

In [8]:
print("\n=== Accuracy by Module ===")
print(agg_df.groupby('Module')['accuracy'].describe())


=== Accuracy by Module ===
                     count      mean       std  min   25%  50%  75%  max
Module                                                                  
Emoji Module         123.0  0.898374  0.202032  0.0  0.90  1.0  1.0  1.0
Letter Module        123.0  0.939024  0.167251  0.0  0.95  1.0  1.0  1.0
Letter+Emoji Module  123.0  0.914634  0.181380  0.0  0.90  1.0  1.0  1.0
Shape Module         123.0  0.889431  0.225364  0.0  0.90  1.0  1.0  1.0


In [16]:
condition_summary = (
    df.groupby('Condition')['Correct']
    .mean()
    .reset_index()
    .rename(columns={'Correct': 'accuracy'})
)

# Add a column for inaccuracy (1 - accuracy)
condition_summary['inaccuracy'] = 1 - condition_summary['accuracy']

# Sort by inaccuracy descending
condition_summary = condition_summary.sort_values('inaccuracy', ascending=False)

print("\n=== Inaccuracy by Condition ===")
print(condition_summary)

# Identify the condition with most inaccurate responses
most_inaccurate = condition_summary.iloc[0]
print(f"\n❗ Most inaccurate condition: '{most_inaccurate['Condition']}' "
      f"with {most_inaccurate['inaccuracy']*100:.2f}% incorrect responses")


=== Inaccuracy by Condition ===
     Condition  accuracy  inaccuracy
0    congruent  0.906707    0.093293
1  incongruent  0.907927    0.092073
2      neutral  0.916463    0.083537

❗ Most inaccurate condition: 'congruent' with 9.33% incorrect responses


In [17]:
print("\n=== RT by Module ===")
print(agg_df.groupby('Module')['mean_rt'].describe())

anova_rt_mod = ols('mean_rt ~ C(Module)', data=agg_df).fit()
anova_result_rt_mod = sm.stats.anova_lm(anova_rt_mod, typ=2)
print("\nANOVA: RT across Modules")
print(anova_result_rt_mod)


=== RT by Module ===
                     count      mean       std       min       25%       50%  \
Module                                                                         
Emoji Module         123.0  0.778165  0.188748  0.465317  0.660032  0.748273   
Letter Module        123.0  0.760166  0.223044  0.223557  0.615467  0.716793   
Letter+Emoji Module  123.0  0.748513  0.215103  0.287557  0.617508  0.718701   
Shape Module         123.0  0.859134  0.257687  0.316518  0.674282  0.797348   

                          75%       max  
Module                                   
Emoji Module         0.849011  1.344357  
Letter Module        0.881246  1.593376  
Letter+Emoji Module  0.842337  1.924965  
Shape Module         1.006269  1.655900  

ANOVA: RT across Modules
              sum_sq     df         F    PR(>F)
C(Module)   0.920243    3.0  6.195478  0.000389
Residual   24.161623  488.0       NaN       NaN


In [18]:
emoji_vs_combo = agg_df[agg_df['Module'].isin(['Emoji Module', 'Letter+Emoji Module'])]
print("\nANOVA: Accuracy (Emoji vs Letter+Emoji)")
anova_em_acc = ols('accuracy ~ C(Module)', data=emoji_vs_combo).fit()
print(sm.stats.anova_lm(anova_em_acc, typ=2))

print("\nANOVA: RT (Emoji vs Letter+Emoji)")
anova_em_rt = ols('mean_rt ~ C(Module)', data=emoji_vs_combo).fit()
print(sm.stats.anova_lm(anova_em_rt, typ=2))


ANOVA: Accuracy (Emoji vs Letter+Emoji)
             sum_sq     df         F    PR(>F)
C(Module)  0.016260    1.0  0.441158  0.507191
Residual   8.993333  244.0       NaN       NaN

ANOVA: RT (Emoji vs Letter+Emoji)
             sum_sq     df        F    PR(>F)
C(Module)  0.054072    1.0  1.32053  0.251622
Residual   9.991182  244.0      NaN       NaN


In [19]:
results = []

# Iterate over each module
for module, subdf in df.groupby('Module'):
    # Get mean RTs per condition
    means = subdf.groupby('Condition')['RT'].mean()

    congruent_rt = means.get('congruent', float('nan'))
    incongruent_rt = means.get('incongruent', float('nan'))
    neutral_rt = means.get('neutral', float('nan'))

    # Compute effect size (Incongruent - Congruent)
    effect_size = incongruent_rt - congruent_rt

    # Perform t-test (independent samples)
    if ('congruent' in subdf['Condition'].unique()) and ('incongruent' in subdf['Condition'].unique()):
        t_stat, p_val = stats.ttest_ind(
            subdf.loc[subdf['Condition'] == 'congruent', 'RT'],
            subdf.loc[subdf['Condition'] == 'incongruent', 'RT'],
            equal_var=False
        )
    else:
        p_val = float('nan')

    results.append({
        'Module': module,
        'Congruent RT (ms)': round(congruent_rt * 1000, 1) if pd.notna(congruent_rt) else None,
        'Incongruent RT (ms)': round(incongruent_rt * 1000, 1) if pd.notna(incongruent_rt) else None,
        'Neutral RT (ms)': round(neutral_rt * 1000, 1) if pd.notna(neutral_rt) else None,
        'Effect Size (ms)': round(effect_size * 1000, 1) if pd.notna(effect_size) else None,
        'p-value': round(p_val, 3) if pd.notna(p_val) else None
    })

# Create summary table
summary_df = pd.DataFrame(results)

print("\n=== Reaction Time Summary Table (Only Correct Trials) ===")
print(summary_df)


=== Reaction Time Summary Table (Only Correct Trials) ===
                Module  Congruent RT (ms)  Incongruent RT (ms)  \
0         Emoji Module              762.5                785.2   
1        Letter Module              724.5                782.6   
2  Letter+Emoji Module              743.5                779.8   
3         Shape Module              843.8                840.8   

   Neutral RT (ms)  Effect Size (ms)  p-value  
0            786.8              22.7    0.377  
1            773.4              58.1    0.025  
2            722.3              36.3    0.273  
3            892.7              -3.0    0.934  


In [61]:
from tabulate import tabulate

print("\n=== Reaction Time Summary Table (Only Correct Trials) ===")
print(tabulate(summary_df, headers="keys", tablefmt="fancy_grid", showindex=False))


=== Reaction Time Summary Table (Only Correct Trials) ===
╒═════════════════════╤═════════════════════╤═══════════════════════╤═══════════════════╤════════════════════╤═══════════╕
│ Module              │   Congruent RT (ms) │   Incongruent RT (ms) │   Neutral RT (ms) │   Effect Size (ms) │   p-value │
╞═════════════════════╪═════════════════════╪═══════════════════════╪═══════════════════╪════════════════════╪═══════════╡
│ Emoji Module        │               762.5 │                 785.2 │             786.8 │               22.7 │     0.377 │
├─────────────────────┼─────────────────────┼───────────────────────┼───────────────────┼────────────────────┼───────────┤
│ Letter Module       │               724.5 │                 782.6 │             773.4 │               58.1 │     0.025 │
├─────────────────────┼─────────────────────┼───────────────────────┼───────────────────┼────────────────────┼───────────┤
│ Letter+Emoji Module │               743.5 │                 779.8 │           

In [None]:
Only the Letter Module shows significant interference.
Suggests cognitive control demands are highest when processing purely symbolic, linguistic stimuli (letters).

In [20]:
import pandas as pd

df_correct = df[df['Correct'] == 1]

# Compute median RT per Participant, Module, and Condition
agg = (
    df_correct.groupby(['Participant_ID', 'Module', 'Condition'])['RT']
    .median()
    .reset_index()
)

# Pivot to get separate columns for each Module_Condition
pivot = agg.pivot_table(
    index='Participant_ID',
    columns=['Module', 'Condition'],
    values='RT'
)

# Flatten the multi-level columns
pivot.columns = [f"{mod}_{cond}" for mod, cond in pivot.columns]

# Reset index to make Participant_ID a normal column
pivot = pivot.reset_index()

# Sort columns for readability (optional)
pivot = pivot[['Participant_ID'] + sorted([c for c in pivot.columns if c != 'Participant_ID'])]

pivot.head()


Unnamed: 0,Participant_ID,Emoji Module_congruent,Emoji Module_incongruent,Emoji Module_neutral,Letter Module_congruent,Letter Module_incongruent,Letter Module_neutral,Letter+Emoji Module_congruent,Letter+Emoji Module_incongruent,Letter+Emoji Module_neutral,Shape Module_congruent,Shape Module_incongruent,Shape Module_neutral
0,1,0.721665,0.617804,0.711046,0.615432,0.923931,0.861228,0.548059,0.781784,0.670934,0.867453,0.811205,0.938632
1,2,0.694327,0.578557,0.713278,0.639593,0.545234,0.676201,0.647025,0.640338,0.600211,0.579294,0.595324,0.626852
2,3,0.902752,0.905624,0.966169,0.96402,1.134235,0.845822,0.94896,1.222164,1.000121,0.73935,0.992517,0.954837
3,4,0.620137,0.657916,0.614436,0.544675,0.491399,0.596904,0.646675,0.774611,0.715166,0.690134,0.648806,0.764358
4,5,0.506693,0.750013,0.427161,0.305518,0.26469,0.285664,0.314367,0.175798,0.195716,0.302287,0.261118,0.250925


In [21]:
# Save the resulting table to a CSV file
pivot.to_csv("participant_median_rt_by_module.csv", index=False)


In [38]:
df1 = pd.read_csv("/content/participant_median_rt_by_module.csv")

In [29]:
df2= pd.read_csv("/content/CGS401_Participant (Responses) - Form Responses 1.csv")

In [30]:
df2.head()

Unnamed: 0,Timestamp,Name,Participant ID,"How positive or negative does this emoji feel to you?\n(1 = Very Negative, 5 = Neutral, 9 = Very Positive)","How strong or calm does this emoji feel?\n(1 = Very Calming, 9 = Very Exciting)","How familiar are you with this emoji?\n(1 = Not familiar at all, 9 = Very familiar)","How often do you use this emoji in daily conversations?\n(1 = Never use, 9 = Use very frequently)","How positive or negative does this emoji feel to you?\n(1 = Very Negative, 5 = Neutral, 9 = Very Positive).1","How strong or calm does this emoji feel?\n(1 = Very Calming, 5= Neutral, 9 = Very Exciting)","How familiar are you with this emoji?\n(1 = Not familiar at all, 9 = Very familiar).1",...,"How often do you use this emoji in daily conversations?\n(1 = Never use, 9 = Use very frequently).5","When someone else is feeling excited, I tend to get excited too.",It upsets me to see someone being treated disrespectfully.,I enjoy making other people feel better.,"I have tender, concerned feelings for people less fortunate than me.",I can tell when others are sad even when they do not say anything.,I become irritated when someone cries.,I get a strong urge to help when I see someone who is upset.,I am not really interested in how other people feel.,How did you find the task overall?
0,11/7/2025 11:13:47,Eeshwari Sunkersett,eeshwari,6,1,9,5,9,7,8,...,8.0,2,4.0,4,3,3.0,2,3,1,Good very nice
1,11/7/2025 11:26:28,Priyanka,priyanka,5,1,3,2,9,8,9,...,6.0,3,4.0,4,4,3.0,1,4,0,Cool
2,11/7/2025 11:39:09,surbhi yadav,surbhi,7,7,9,8,9,5,9,...,4.0,4,4.0,3,4,2.0,2,4,2,fun
3,11/7/2025 11:50:23,Farheen Parvez,farheen,5,7,9,8,9,6,9,...,4.0,3,4.0,4,4,4.0,2,4,0,"fun, easy to understand"
4,11/7/2025 14:57:34,Anisha Nanda,2,6,3,9,4,8,6,9,...,1.0,3,4.0,4,4,3.0,0,4,1,


In [31]:
import pandas as pd

# Assuming df2 is already loaded

# 1️⃣ Drop all "positive/negative" and "strong/calm" columns
df2 = df2.drop(
    columns=[col for col in df2.columns
             if "How positive or negative does this emoji feel to you?" in col
             or "How strong or calm does this emoji feel?" in col],
    errors="ignore"
)

# 2️⃣ Select all "familiarity" columns
familiarity_cols = [col for col in df2.columns if "How familiar are you with this emoji?" in col]

# 3️⃣ Select all "usage" columns
usage_cols = [col for col in df2.columns if "How often do you use this emoji in daily conversations?" in col]

# 4️⃣ Compute average familiarity and usage per row
df2["Familiarity"] = df2[familiarity_cols].mean(axis=1, skipna=True)
df2["Usage"] = df2[usage_cols].mean(axis=1, skipna=True)

# 5️⃣ Optionally, drop the original emoji-level familiarity and usage columns to keep dataset clean
df2 = df2.drop(columns=familiarity_cols + usage_cols, errors="ignore")

# ✅ Final cleaned dataframe
df2.head()


Unnamed: 0,Timestamp,Name,Participant ID,"When someone else is feeling excited, I tend to get excited too.",It upsets me to see someone being treated disrespectfully.,I enjoy making other people feel better.,"I have tender, concerned feelings for people less fortunate than me.",I can tell when others are sad even when they do not say anything.,I become irritated when someone cries.,I get a strong urge to help when I see someone who is upset.,I am not really interested in how other people feel.,How did you find the task overall?,Familiarity,Usage
0,11/7/2025 11:13:47,Eeshwari Sunkersett,eeshwari,2,4.0,4,3,3.0,2,3,1,Good very nice,8.0,6.0
1,11/7/2025 11:26:28,Priyanka,priyanka,3,4.0,4,4,3.0,1,4,0,Cool,6.0,4.666667
2,11/7/2025 11:39:09,surbhi yadav,surbhi,4,4.0,3,4,2.0,2,4,2,fun,7.666667,5.333333
3,11/7/2025 11:50:23,Farheen Parvez,farheen,3,4.0,4,4,4.0,2,4,0,"fun, easy to understand",9.0,4.666667
4,11/7/2025 14:57:34,Anisha Nanda,2,3,4.0,4,4,3.0,0,4,1,,9.0,3.166667


In [33]:
df2.columns

Index(['Timestamp', 'Name', 'Participant ID',
       'When someone else is feeling excited, I tend to get excited too.',
       'It upsets me to see someone being treated disrespectfully.',
       'I enjoy making other people feel better.',
       'I have tender, concerned feelings for people less fortunate than me.',
       'I can tell when others are sad even when they do not say anything.',
       'I become irritated when someone cries.',
       'I get a strong urge to help when I see someone who is upset.',
       ' I am not really interested in how other people feel.', 'Familiarity',
       'Usage'],
      dtype='object')

In [36]:
import pandas as pd

df2 = df2.drop(columns=[col for col in df2.columns if "How did you find the task overall?" in col], errors="ignore")

# 2️⃣ Identify empathy item columns (based on your text)
empathy_cols = [
    "When someone else is feeling excited, I tend to get excited too.",
    "It upsets me to see someone being treated disrespectfully.",
    "I enjoy making other people feel better.",
    "I have tender, concerned feelings for people less fortunate than me.",
    "I can tell when others are sad even when they do not say anything.",
    "I become irritated when someone cries.",
    "I get a strong urge to help when I see someone who is upset.",
    " I am not really interested in how other people feel."
]

# 3️⃣ Make sure all columns exist
empathy_cols = [c for c in empathy_cols if c in df2.columns]

# 4️⃣ Reverse score the two reverse-coded items (6 & 8)
df2["I become irritated when someone cries."] = 10 - df2["I become irritated when someone cries."]
df2[" I am not really interested in how other people feel."] = 10 - df2[" I am not really interested in how other people feel."]

# 5️⃣ Compute the empathy score (average of all items)
df2["Empathy_Score"] = df2[empathy_cols].mean(axis=1, skipna=True)

# ✅ Final check
df2[["Participant ID", "Empathy_Score"]].head()


Unnamed: 0,Participant ID,Empathy_Score
0,eeshwari,3.75
1,priyanka,4.125
2,surbhi,3.875
3,farheen,4.375
4,2,3.875


In [37]:
df2.head()

Unnamed: 0,Timestamp,Name,Participant ID,"When someone else is feeling excited, I tend to get excited too.",It upsets me to see someone being treated disrespectfully.,I enjoy making other people feel better.,"I have tender, concerned feelings for people less fortunate than me.",I can tell when others are sad even when they do not say anything.,I become irritated when someone cries.,I get a strong urge to help when I see someone who is upset.,I am not really interested in how other people feel.,Familiarity,Usage,Empathy_Score
0,11/7/2025 11:13:47,Eeshwari Sunkersett,eeshwari,2,4.0,4,3,3.0,2,3,9,8.0,6.0,3.75
1,11/7/2025 11:26:28,Priyanka,priyanka,3,4.0,4,4,3.0,1,4,10,6.0,4.666667,4.125
2,11/7/2025 11:39:09,surbhi yadav,surbhi,4,4.0,3,4,2.0,2,4,8,7.666667,5.333333,3.875
3,11/7/2025 11:50:23,Farheen Parvez,farheen,3,4.0,4,4,4.0,2,4,10,9.0,4.666667,4.375
4,11/7/2025 14:57:34,Anisha Nanda,2,3,4.0,4,4,3.0,0,4,9,9.0,3.166667,3.875


In [40]:
# Clean up column names first (optional but recommended)
df2.columns = df2.columns.str.strip()

# Keep only the required columns
df2 = df2[["Participant ID", "Empathy_Score", "Familiarity", "Usage"]]

# ✅ Final check
df2.head()


Unnamed: 0,Participant ID,Empathy_Score,Familiarity,Usage
0,eeshwari,3.75,8.0,6.0
1,priyanka,4.125,6.0,4.666667
2,surbhi,3.875,7.666667,5.333333
3,farheen,4.375,9.0,4.666667
4,2,3.875,9.0,3.166667


In [39]:
df1.head()

Unnamed: 0,Participant_ID,Emoji Module_congruent,Emoji Module_incongruent,Emoji Module_neutral,Letter Module_congruent,Letter Module_incongruent,Letter Module_neutral,Letter+Emoji Module_congruent,Letter+Emoji Module_incongruent,Letter+Emoji Module_neutral,Shape Module_congruent,Shape Module_incongruent,Shape Module_neutral
0,1,0.721665,0.617804,0.711046,0.615432,0.923931,0.861228,0.548059,0.781784,0.670934,0.867453,0.811205,0.938632
1,2,0.694327,0.578557,0.713278,0.639593,0.545234,0.676201,0.647025,0.640338,0.600211,0.579294,0.595324,0.626852
2,3,0.902752,0.905624,0.966169,0.96402,1.134235,0.845822,0.94896,1.222164,1.000121,0.73935,0.992517,0.954837
3,4,0.620137,0.657916,0.614436,0.544675,0.491399,0.596904,0.646675,0.774611,0.715166,0.690134,0.648806,0.764358
4,5,0.506693,0.750013,0.427161,0.305518,0.26469,0.285664,0.314367,0.175798,0.195716,0.302287,0.261118,0.250925


In [41]:
# First, clean up column names in both dataframes (important!)
df1.columns = df1.columns.str.strip()
df2.columns = df2.columns.str.strip()

# Standardize column name just in case (capitalization differences, etc.)
df1 = df1.rename(columns={'Participant_ID': 'Participant ID'})

# Perform the merge
merged_df = pd.merge(df1, df2, on="Participant ID", how="left")

# ✅ Preview result
merged_df.head()


Unnamed: 0,Participant ID,Emoji Module_congruent,Emoji Module_incongruent,Emoji Module_neutral,Letter Module_congruent,Letter Module_incongruent,Letter Module_neutral,Letter+Emoji Module_congruent,Letter+Emoji Module_incongruent,Letter+Emoji Module_neutral,Shape Module_congruent,Shape Module_incongruent,Shape Module_neutral,Empathy_Score,Familiarity,Usage
0,1,0.721665,0.617804,0.711046,0.615432,0.923931,0.861228,0.548059,0.781784,0.670934,0.867453,0.811205,0.938632,3.0,6.6,5.333333
1,2,0.694327,0.578557,0.713278,0.639593,0.545234,0.676201,0.647025,0.640338,0.600211,0.579294,0.595324,0.626852,3.875,9.0,3.166667
2,3,0.902752,0.905624,0.966169,0.96402,1.134235,0.845822,0.94896,1.222164,1.000121,0.73935,0.992517,0.954837,4.375,8.333333,6.833333
3,4,0.620137,0.657916,0.614436,0.544675,0.491399,0.596904,0.646675,0.774611,0.715166,0.690134,0.648806,0.764358,4.125,7.666667,4.0
4,5,0.506693,0.750013,0.427161,0.305518,0.26469,0.285664,0.314367,0.175798,0.195716,0.302287,0.261118,0.250925,3.75,6.666667,4.333333


In [42]:
# Clean up column names (in case of spaces or hidden characters)
merged_df.columns = merged_df.columns.str.strip()

# Keep only desired columns
keep_cols = ["Participant ID", "Empathy_Score", "Familiarity", "Usage"] + \
            [col for col in merged_df.columns if col.startswith("Emoji Module")] + \
            [col for col in merged_df.columns if col.startswith("Letter+Emoji Module")]

filtered_df = merged_df[keep_cols]

# ✅ Final check
filtered_df.head()


Unnamed: 0,Participant ID,Empathy_Score,Familiarity,Usage,Emoji Module_congruent,Emoji Module_incongruent,Emoji Module_neutral,Letter+Emoji Module_congruent,Letter+Emoji Module_incongruent,Letter+Emoji Module_neutral
0,1,3.0,6.6,5.333333,0.721665,0.617804,0.711046,0.548059,0.781784,0.670934
1,2,3.875,9.0,3.166667,0.694327,0.578557,0.713278,0.647025,0.640338,0.600211
2,3,4.375,8.333333,6.833333,0.902752,0.905624,0.966169,0.94896,1.222164,1.000121
3,4,4.125,7.666667,4.0,0.620137,0.657916,0.614436,0.646675,0.774611,0.715166
4,5,3.75,6.666667,4.333333,0.506693,0.750013,0.427161,0.314367,0.175798,0.195716


In [43]:
import pandas as pd

# Compute average RTs
filtered_df["EmojiModule_AvgRT"] = filtered_df[
    ["Emoji Module_congruent", "Emoji Module_incongruent", "Emoji Module_neutral"]
].mean(axis=1)

filtered_df["LetterEmojiModule_AvgRT"] = filtered_df[
    ["Letter+Emoji Module_congruent", "Letter+Emoji Module_incongruent", "Letter+Emoji Module_neutral"]
].mean(axis=1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["EmojiModule_AvgRT"] = filtered_df[
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df["LetterEmojiModule_AvgRT"] = filtered_df[


In [47]:
filtered_df.head()

Unnamed: 0,Participant ID,Empathy_Score,Familiarity,Usage,Emoji Module_congruent,Emoji Module_incongruent,Emoji Module_neutral,Letter+Emoji Module_congruent,Letter+Emoji Module_incongruent,Letter+Emoji Module_neutral,EmojiModule_AvgRT,LetterEmojiModule_AvgRT
0,1,3.0,6.6,5.333333,0.721665,0.617804,0.711046,0.548059,0.781784,0.670934,0.683505,0.666925
1,2,3.875,9.0,3.166667,0.694327,0.578557,0.713278,0.647025,0.640338,0.600211,0.662054,0.629191
2,3,4.375,8.333333,6.833333,0.902752,0.905624,0.966169,0.94896,1.222164,1.000121,0.924848,1.057082
3,4,4.125,7.666667,4.0,0.620137,0.657916,0.614436,0.646675,0.774611,0.715166,0.63083,0.712151
4,5,3.75,6.666667,4.333333,0.506693,0.750013,0.427161,0.314367,0.175798,0.195716,0.561289,0.228627


In [59]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr, zscore

# 1️⃣ Drop rows with missing values in any key columns
filtered_df_clean = filtered_df.dropna(subset=[
    "Familiarity", "Usage", "EmojiModule_AvgRT",
    "Empathy_Score", "LetterEmojiModule_AvgRT"
]).copy()

# 2️⃣ Remove RT outliers (|z| > 3)
filtered_df_clean["z_EmojiRT"] = zscore(filtered_df_clean["EmojiModule_AvgRT"])
filtered_df_clean["z_LetterEmojiRT"] = zscore(filtered_df_clean["LetterEmojiModule_AvgRT"])

filtered_df_clean = filtered_df_clean[
    (filtered_df_clean["z_EmojiRT"].abs() <= 3) &
    (filtered_df_clean["z_LetterEmojiRT"].abs() <= 3)
]

# 3️⃣ Subsets
emoji_subset = filtered_df_clean.dropna(subset=["Familiarity", "Usage", "EmojiModule_AvgRT"])
letteremoji_subset = filtered_df_clean.dropna(subset=["Empathy_Score", "LetterEmojiModule_AvgRT"])

# 4️⃣ Run correlations again
r_fam, p_fam = pearsonr(emoji_subset["Familiarity"], emoji_subset["EmojiModule_AvgRT"])
r_use, p_use = pearsonr(emoji_subset["Usage"], emoji_subset["EmojiModule_AvgRT"])
r_emp, p_emp = pearsonr(letteremoji_subset["Empathy_Score"], letteremoji_subset["LetterEmojiModule_AvgRT"])

# 5️⃣ Combine results
summary = pd.DataFrame({
    "Comparison": [
        "Familiarity vs Emoji RT",
        "Usage vs Emoji RT",
        "Empathy vs Letter+Emoji RT"
    ],
    "Correlation (r)": [r_fam, r_use, r_emp],
    "p-value": [p_fam, p_use, p_emp]
})

# 6️⃣ Display results
print(summary)


                   Comparison  Correlation (r)   p-value
0     Familiarity vs Emoji RT         0.009870  0.953101
1           Usage vs Emoji RT        -0.100150  0.549674
2  Empathy vs Letter+Emoji RT         0.200947  0.226383


In [60]:
import pandas as pd
from scipy.stats import ttest_ind

# ✅ Drop missing values for relevant columns
filtered_df = filtered_df.dropna(subset=[
    "Familiarity", "Usage", "Empathy_Score",
    "EmojiModule_AvgRT", "LetterEmojiModule_AvgRT"
])

# --- 1️⃣ Familiarity vs Emoji RT ---
median_fam = filtered_df["Familiarity"].median()
high_fam = filtered_df[filtered_df["Familiarity"] > median_fam]["EmojiModule_AvgRT"]
low_fam = filtered_df[filtered_df["Familiarity"] <= median_fam]["EmojiModule_AvgRT"]

t_fam, p_fam = ttest_ind(high_fam, low_fam, equal_var=False)

# --- 2️⃣ Usage vs Emoji RT ---
median_use = filtered_df["Usage"].median()
high_use = filtered_df[filtered_df["Usage"] > median_use]["EmojiModule_AvgRT"]
low_use = filtered_df[filtered_df["Usage"] <= median_use]["EmojiModule_AvgRT"]

t_use, p_use = ttest_ind(high_use, low_use, equal_var=False)

# --- 3️⃣ Empathy vs Letter+Emoji RT ---
median_emp = filtered_df["Empathy_Score"].median()
high_emp = filtered_df[filtered_df["Empathy_Score"] > median_emp]["LetterEmojiModule_AvgRT"]
low_emp = filtered_df[filtered_df["Empathy_Score"] <= median_emp]["LetterEmojiModule_AvgRT"]

t_emp, p_emp = ttest_ind(high_emp, low_emp, equal_var=False)

# ✅ Summarize results
group_summary = pd.DataFrame({
    "Comparison": [
        "Familiarity (High vs Low) - Emoji RT",
        "Usage (High vs Low) - Emoji RT",
        "Empathy (High vs Low) - Letter+Emoji RT"
    ],
    "High Group Mean RT (ms)": [
        high_fam.mean(), high_use.mean(), high_emp.mean()
    ],
    "Low Group Mean RT (ms)": [
        low_fam.mean(), low_use.mean(), low_emp.mean()
    ],
    "t-value": [t_fam, t_use, t_emp],
    "p-value": [p_fam, p_use, p_emp]
})

print("\n=== Group Comparison Results ===")
print(group_summary.round(3))



=== Group Comparison Results ===
                                Comparison  High Group Mean RT (ms)  \
0     Familiarity (High vs Low) - Emoji RT                    0.674   
1           Usage (High vs Low) - Emoji RT                    0.691   
2  Empathy (High vs Low) - Letter+Emoji RT                    0.715   

   Low Group Mean RT (ms)  t-value  p-value  
0                   0.759   -1.692    0.101  
1                   0.744   -1.013    0.319  
2                   0.646    1.244    0.227  
