## Participants / Behavioral Data Analysis

### Import

In [41]:
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.cluster import KMeans

### Data Load

In [27]:
participants_data = pd.read_csv("./data/participants.tsv" , sep='\t')
participants_data_df = pd.DataFrame(participants_data)

participants_info = ['participant_id', 'age', 'sex', 'task_order', 'task_rule', 'AoA']

Language_RT = ["RT_L1S","RT_L1NS","RT_L2S","RT_L2NS"]
Language_ER = ["ER_L1S","ER_L1NS","ER_L2S","ER_L2NS"]

chinese_skills = ["Chinese_writing", "Chinese_listening", "Chinese_speaking", "Chinese_reading"]
english_skills = ["English_writing", "English_listening", "English_speaking", "English_reading"]
raven_score = ["raven_score"]
cet_4_score = ["CET_4_score"]

In [29]:
cor_columns = Language_RT + Language_ER + chinese_skills + english_skills + raven_score + cet_4_score + ["AoA"]
correlation_matrix = participants_data[cor_columns].corr()

In [30]:
# 1. Chinese skills (4 choose 2)
chinese_combinations = list(itertools.combinations(chinese_skills, 2))
chinese_correlations = {f"{x} & {y}": correlation_matrix.loc[x, y] for x, y in chinese_combinations}

# 2. English skills (4 choose 2)
english_combinations = list(itertools.combinations(english_skills, 2))
english_correlations = {f"{x} & {y}": correlation_matrix.loc[x, y] for x, y in english_combinations}

# 3. Raven score vs English skills
raven_english_correlations = {f"raven_score & {skill}": correlation_matrix.loc["raven_score", skill] for skill in english_skills}

# 4. CET 4 score correlations with English skills
cet_4_correlations = {f"CET_4_score & {skill}" : correlation_matrix.loc["CET_4_score", skill] for skill in english_skills}

# 5. Raven score & CET 4 score
raven_cet4_correlation = {"raven_score & CET_4_score": correlation_matrix.loc["raven_score", "CET_4_score"]}

# 6. Age of Acquisition and other english related variables
AoA_correlations = {f"AoA & {skill}" : correlation_matrix.loc["AoA", skill] for skill in english_skills + raven_score + cet_4_score}

# Print results
print("Chinese Skill Correlations:", chinese_correlations)
print("English Skill Correlations:", english_correlations)
print("Raven Score & English Skills Correlations:", raven_english_correlations)
print("CET 4 Score Correlations:", cet_4_correlations)
print("Raven Score & CET 4 Score Correlation:", raven_cet4_correlation)
print("AoA & English skills Correlation:", AoA_correlations)

Chinese Skill Correlations: {'Chinese_writing & Chinese_listening': 0.5369323700415589, 'Chinese_writing & Chinese_speaking': 0.7195877994803657, 'Chinese_writing & Chinese_reading': 0.709667557920316, 'Chinese_listening & Chinese_speaking': 0.6017857599917049, 'Chinese_listening & Chinese_reading': 0.6662341313355201, 'Chinese_speaking & Chinese_reading': 0.6292861708254254}
English Skill Correlations: {'English_writing & English_listening': 0.5324854562438731, 'English_writing & English_speaking': 0.7007945245035324, 'English_writing & English_reading': 0.6179659680475528, 'English_listening & English_speaking': 0.7771198068341004, 'English_listening & English_reading': 0.43228624775876157, 'English_speaking & English_reading': 0.3864251667459408}
Raven Score & English Skills Correlations: {'raven_score & English_writing': 0.15650960709089898, 'raven_score & English_listening': 0.1244779961150536, 'raven_score & English_speaking': 0.06814679023872558, 'raven_score & English_reading':

In [33]:
#reaction time L1 & Chinese skills
RTL1S_Chinese_correlation = {f"RT_L1S & {c}" : correlation_matrix.loc["RT_L1S", c] for c in chinese_skills}
RTL1NS_Chinese_correlation = {f"RT_L1NS & {c}" : correlation_matrix.loc["RT_L1NS", c] for c in chinese_skills}

RTL2S_Chinese_correlation = {f"RT_L2S & {c}" : correlation_matrix.loc["RT_L2S", c] for c in chinese_skills}
RTL2NS_Chinese_correlation = {f"RT_L2NS & {c}" : correlation_matrix.loc["RT_L2NS", c] for c in chinese_skills}

#reaction time L2 & English skills
RTL2S_English_correlation = {f"RT_L2S & {e}" : correlation_matrix.loc["RT_L2S", e] for e in english_skills}
RTL2NS_English_correlation = {f"RT_L2NS & {e}" : correlation_matrix.loc["RT_L2NS", e] for e in english_skills}

RTL1S_English_correlation = {f"RT_L1S & {e}" : correlation_matrix.loc["RT_L1S", e] for e in english_skills}
RTL1NS_English_correlation = {f"RT_L1NS & {e}" : correlation_matrix.loc["RT_L1NS", e] for e in english_skills}

#reaction time L2 & AoA
AoA_RT_correlations = {f"AoA & {rt}" : correlation_matrix.loc["AoA", rt] for rt in Language_RT}
AoA_ER_correlations = {f"AoA & {er}" : correlation_matrix.loc["AoA", er] for er in Language_ER}

#reaction time L2 & raven score
raven_RT_correlations = {f"raven_score & {rt}" : correlation_matrix.loc["raven_score", rt] for rt in Language_RT}
raven_ER_correlations = {f"raven_score & {er}" : correlation_matrix.loc["raven_score", er] for er in Language_ER}

#reaction time L2 & cet 4 score
cet_4_RT_correlations = {f"CET_4_score & {rt}" : correlation_matrix.loc["CET_4_score", rt] for rt in Language_RT}
cet_4_ER_correlations = {f"CET_4_score & {er}" : correlation_matrix.loc["CET_4_score", er] for er in Language_ER}


print("RTL1S_Chinese_correlation:", RTL1S_Chinese_correlation)
print("RTL1NS_Chinese_correlation: ",RTL1NS_Chinese_correlation)
print("RTL2S_English_correlation:", RTL2S_English_correlation)
print("RTL2NS_English_correlation:",RTL2NS_English_correlation)
print("AoA_RT_correlations:", AoA_RT_correlations)
print("AoA_ER_correlations:", AoA_ER_correlations)
print("raven_RT_correlations:", raven_RT_correlations)
print("raven_ER_correlations:", raven_ER_correlations)
print("cet_4_RT_correlations:", cet_4_RT_correlations)
print("cet_4_ER_correlations:", cet_4_ER_correlations)
print()
print()
print("RTL2S_Chinese_correlation:", RTL2S_Chinese_correlation)
print("RTL2NS_Chinese_correlation:",RTL2NS_Chinese_correlation)
print("RTL1S_English_correlation:",RTL1S_English_correlation)
print("RTL1NS_English_correlation:", RTL1NS_English_correlation)

RTL1S_Chinese_correlation: {'RT_L1S & Chinese_writing': -0.17181157875905817, 'RT_L1S & Chinese_listening': -0.13034266083903212, 'RT_L1S & Chinese_speaking': -0.19216220392790564, 'RT_L1S & Chinese_reading': -0.049387453437041456}
RTL1NS_Chinese_correlation:  {'RT_L1NS & Chinese_writing': -0.14431948221685217, 'RT_L1NS & Chinese_listening': -0.10066014422042956, 'RT_L1NS & Chinese_speaking': -0.13552622556975719, 'RT_L1NS & Chinese_reading': -0.05409748262747915}
RTL2S_English_correlation: {'RT_L2S & English_writing': -0.15608745106689872, 'RT_L2S & English_listening': -0.26285325959694766, 'RT_L2S & English_speaking': -0.249225000279765, 'RT_L2S & English_reading': -0.07517967683065574}
RTL2NS_English_correlation: {'RT_L2NS & English_writing': -0.13785607761363033, 'RT_L2NS & English_listening': -0.22679407408979357, 'RT_L2NS & English_speaking': -0.23930748130690666, 'RT_L2NS & English_reading': -0.0905329915511562}
AoA_RT_correlations: {'AoA & RT_L1S': -0.07303418491468348, 'AoA & 