## Data Inspection

### Library

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Data Load

In [2]:
#fMRI_data
fMRI_func_data_sub01_event = pd.read_csv("./data/sub-001/func/sub-001_task-CognitiveControl_run-01_events.tsv" , sep='\t')
fMRI_func_data_sub01_event_df = pd.DataFrame(fMRI_func_data_sub01_event)
fMRI_func_data_sub01_event_df.head(5)

Unnamed: 0,onset,duration,trial_type,stimulus,accuracy,response_time
0,8.5,1,ICS,BlueLeft,1,0.553
1,14.0,1,ICNS,BlueLeft,1,0.497
2,18.5,1,ICNS,BlueRight,1,0.636
3,23.0,1,CS,RedRight,1,0.582
4,27.5,1,CNS,RedLeft,1,0.521


In [45]:
participants_data = pd.read_csv("./data/participants.tsv" , sep='\t')
participants_data_df = pd.DataFrame(participants_data)
participants_data_df.iloc[:, 0: 16].head(5) # excluding the colums related to self-reported score

Unnamed: 0,participant_id,age,sex,task_order,task_rule,raven_score,AoA,CET_4_score,RT_L1S,RT_L1NS,RT_L2S,RT_L2NS,ER_L1S,ER_L1NS,ER_L2S,ER_L2NS
0,sub-001,20,F,Lan,Red,56,12,455,1068,948,1059,1027,0.0,0.0,0.05,0.05
1,sub-002,24,F,Lan,Red,55,13,576,918,832,897,823,0.025,0.0,0.025,0.025
2,sub-003,24,M,Lan,Red,60,7,543,951,859,883,841,0.0,0.0,0.0,0.0
3,sub-004,21,F,Lan,Red,47,8,564,1101,1023,1000,935,0.05,0.075,0.05,0.025
4,sub-005,24,M,Lan,Blue,59,13,539,891,766,760,784,0.025,0.025,0.025,0.0


In [41]:
participants_data_df.columns

Index(['participant_id', 'age', 'sex', 'task_order', 'task_rule',
       'raven_score', 'AoA', 'CET_4_score', 'RT_L1S', 'RT_L1NS', 'RT_L2S',
       'RT_L2NS', 'ER_L1S', 'ER_L1NS', 'ER_L2S', 'ER_L2NS', 'Chinese_reading',
       'Chinese_writing', 'Chinese_speaking', 'Chinese_listening',
       'English_reading', 'English_writing', 'English_speaking',
       'English_listening'],
      dtype='object')

In [46]:
print(participants_data_df["age"].value_counts())

age
20    15
23    13
21    12
22    11
24     9
25     6
19     5
26     3
18     1
30     1
27     1
Name: count, dtype: int64


In [34]:
print(participants_data_df["Chinese_reading"].value_counts())
print(participants_data_df["Chinese_writing"].value_counts())
print(participants_data_df["Chinese_speaking"].value_counts())
print(participants_data_df["Chinese_listening"].value_counts())
print(participants_data_df["English_reading"].value_counts())
print(participants_data_df["English_writing"].value_counts())
print(participants_data_df["English_speaking"].value_counts())
print(participants_data_df["English_listening"].value_counts())

Chinese_reading
10    25
8     23
9     21
7      3
5      3
6      2
Name: count, dtype: int64
Chinese_writing
8     19
7     17
10    17
9     12
6      6
5      6
Name: count, dtype: int64
Chinese_speaking
8     23
9     21
10    18
7      7
6      5
5      3
Name: count, dtype: int64
Chinese_listening
8     25
10    21
9     17
7      8
6      4
5      2
Name: count, dtype: int64
English_reading
7    22
6    20
8    13
5    12
4     4
9     4
3     2
Name: count, dtype: int64
English_writing
6     26
4     16
7     16
5     11
8      4
3      3
10     1
Name: count, dtype: int64
English_speaking
6    17
5    17
4    15
7    12
3     7
8     5
2     3
1     1
Name: count, dtype: int64
English_listening
5    21
6    15
4    12
7    10
3     7
8     5
9     3
2     3
1     1
Name: count, dtype: int64


### Data Info

#### Participants

participant_id : participant identifier

age : Age in years as in the initial session

sex : Self-rated by participant, M for male/F for female

task_order : Lan means participants did the language control task first; Con means participants did the cognitive control task first

task_rule : Red means the red color corresponds to 'naming in L1' in the language control task and 'pressing the same direction' in the cognitive control task; Blue means the blue color corresponds to 'naming in L1' in the language control task and 'pressing the same direction' in the cognitive control task

raven_score : The score of Raven test, full score = 60

AoA : Age of Acquirement, the age to start learning English

CET_4_score : The score of College English Test Band 4, full score = 710

RT_L1S : The reaction time (ms) for L1S condition (first language, switch) of the language control task which re-collected in the behavioral laboratory

RT_L1NS : The reaction time (ms) for L1NS condition (first language, non-switch) of the language control task which re-collected in the behavioral laboratory

RT_L2S : The reaction time (ms) for L2S condition (second language, switch) of the language control task which re-collected in the behavioral laboratory

RT_L2NS : The reaction time (ms) for L2NS condition (second language, non-switch) of the language control task which re-collected in the behavioral laboratory

ER_L1S : The error rate for L1S condition (first language, switch) of the language control task which re-collected in the behavioral laboratory

ER_L1NS : The error rate for L1NS condition (first language, non-switch) of the language control task which re-collected in the behavioral laboratory

ER_L2S : The error rate for L2S condition (second language, switch) of the language control task which re-collected in the behavioral laboratory

ER_L2NS : The error rate for L2NS condition (second language, non-switch) of the language control task which re-collected in the behavioral laboratory

Chinese/English reading, writing, speaking, listening : The self-rating scores for Language on a 10-point scale


#### Cog Control
{
  
  "AcquisitionMatrixPE": 64,

  "AcquisitionNumber": 1,

  "BandwidthPerPixelPhaseEncode": 33.967,

  "BaseResolution": 64,
  
  "CogAtlasID": "http://www.cognitiveatlas.org/task/id/TODO",

  "CoilCombinationMethod": "Sum of Squares",

  "CoilString": "C:HEA;HEP",

  "ConversionSoftware": "dcm2niix",

  "ConversionSoftwareVersion": "v1.0.20220720",

  "DerivedVendorReportedEchoSpacing": 0.000460005,

  "DeviceSerialNumber": "35193",

  "DwellTime": 3.1e-06,

  "EchoTime": 0.03,

  "EffectiveEchoSpacing": 0.000460005,

  "FlipAngle": 90,

  "HeudiconvVersion": "1.1.6",
  
  "ImageType": [
    "ORIGINAL",
    "PRIMARY",
    "M",
    "ND",
    "MOSAIC"
],

  "ImagingFrequency": 123.254,

  "InPlanePhaseEncodingDirectionDICOM": "COL",

  "InstitutionAddress": "Xinjiekouwai Street No 19,Beijing,Haidian District,CN,100875",

  "InstitutionalDepartmentName": "Department",

  "MRAcquisitionType": "2D",

  "MagneticFieldStrength": 3,

  "Manufacturer": "Siemens",

  "ManufacturersModelName": "TrioTim",

  "MatrixCoilMode": "SENSE",

  "Modality": "MR",

  "NonlinearGradientCorrection": false,

  "PartialFourier": 1,

  "PatientPosition": "HFS",

  "PercentPhaseFOV": 100,

  "PercentSampling": 100,

  "PhaseEncodingDirection": "j-",

  "PhaseEncodingSteps": 64,

  "PhaseResolution": 1,

  "PixelBandwidth": 2520,

  "ProtocolName": "ge_func_3x3x4_164",

  "PulseSequenceDetails": "%SiemensSeq%\\ep2d_bold",

  "ReceiveCoilName": "HeadMatrix",

  "ReconMatrixPE": 64,

  "RepetitionTime": 2,

  "ScanOptions": "FS",

  "ScanningSequence": "EP",

  "SequenceName": "*epfid2d1_64",

  "SequenceVariant": "SK",

  "SeriesDescription": "ge_func_3x3x4_164",

  "SliceThickness": 4,

  "SoftwareVersions": "syngo MR B17",

  "SpacingBetweenSlices": 4.6,

  "StationName": "MRC35193",

  "TaskName": "CognitiveControl",

  "TotalReadoutTime": 0.0289803,

  "dcmmeta_reorient_transform": [

    [0.0, -1.0, 0.0, 63.0],
    [1.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 1.0, 0.0],
    [0.0, 0.0, 0.0, 1.0]
],

  "dcmmeta_shape": [64, 64, 33, 164],

  "dcmmeta_slice_dim": 2,

  "dcmmeta_version": 0.6}


#### Lang Control
{

  "AcquisitionMatrixPE": 64,

  "AcquisitionNumber": 1,

  "BandwidthPerPixelPhaseEncode": 33.967,

  "BaseResolution": 64,

  "CogAtlasID": "http://www.cognitiveatlas.org/task/id/TODO",

  "CoilCombinationMethod": "Sum of Squares",

  "CoilString": "C:HEA;HEP",

  "ConversionSoftware": "dcm2niix",

  "ConversionSoftwareVersion": "v1.0.20220720",

  "DerivedVendorReportedEchoSpacing": 0.000460005,

  "DeviceSerialNumber": "35193",

  "DwellTime": 3.1e-06,

  "EchoTime": 0.03,

  "EffectiveEchoSpacing": 0.000460005,

  "FlipAngle": 90,

  "HeudiconvVersion": "1.1.6",

  "ImageType": [
    "ORIGINAL",
    "PRIMARY",
    "M",
    "ND",
    "MOSAIC"
],

  "ImagingFrequency": 123.254,

  "InPlanePhaseEncodingDirectionDICOM": "COL",

  "InstitutionAddress": "Xinjiekouwai Street No 19,Beijing,Haidian District,CN,100875",

  "InstitutionalDepartmentName": "Department",

  "MRAcquisitionType": "2D",

  "MagneticFieldStrength": 3,

  "Manufacturer": "Siemens",

  "ManufacturersModelName": "TrioTim",

  "MatrixCoilMode": "SENSE",

  "Modality": "MR",

  "NonlinearGradientCorrection": false,

  "PartialFourier": 1,

  "PatientPosition": "HFS",

  "PercentPhaseFOV": 100,

  "PercentSampling": 100,

  "PhaseEncodingDirection": "j-",

  "PhaseEncodingSteps": 64,

  "PhaseResolution": 1,

  "PixelBandwidth": 2520,

  "ProtocolName": "ge_func_3x3x4_164",

  "PulseSequenceDetails": "%SiemensSeq%\\ep2d_bold",

  "ReceiveCoilName": "HeadMatrix",

  "ReconMatrixPE": 64,

  "RepetitionTime": 2,

  "ScanOptions": "FS",

  "ScanningSequence": "EP",

  "SequenceName": "*epfid2d1_64",

  "SequenceVariant": "SK",

  "SeriesDescription": "ge_func_3x3x4_164",

  "SliceThickness": 4,

  "SoftwareVersions": "syngo MR B17",

  "SpacingBetweenSlices": 4.6,

  "StationName": "MRC35193",

  "TaskName": "LanguageControl",

  "TotalReadoutTime": 0.0289803,

  "dcmmeta_reorient_transform": [
    [0.0, -1.0, 0.0, 63.0],
    [1.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 1.0, 0.0],
    [0.0, 0.0, 0.0, 1.0]
],

  "dcmmeta_shape": [64, 64, 33, 164],

  "dcmmeta_slice_dim": 2,

  "dcmmeta_version": 0.6}



#### Scans

filename : Name of the nifti file

acq_time : Acquisition time of the particular scan, destroyed for anonymisation in this dataset

operator : Name of the operator

randstr : md5 hash of UIDs (randomly generated string)


### Data Info

#### Participants

participant_id : participant identifier

age : Age in years as in the initial session

sex : Self-rated by participant, M for male/F for female

task_order : Lan means participants did the language control task first; Con means participants did the cognitive control task first

task_rule : Red means the red color corresponds to 'naming in L1' in the language control task and 'pressing the same direction' in the cognitive control task; Blue means the blue color corresponds to 'naming in L1' in the language control task and 'pressing the same direction' in the cognitive control task

raven_score : The score of Raven test, full score = 60

AoA : Age of Acquirement, the age to start learning English

CET_4_score : The score of College English Test Band 4, full score = 710

RT_L1S : The reaction time (ms) for L1S condition (first language, switch) of the language control task which re-collected in the behavioral laboratory

RT_L1NS : The reaction time (ms) for L1NS condition (first language, non-switch) of the language control task which re-collected in the behavioral laboratory

RT_L2S : The reaction time (ms) for L2S condition (second language, switch) of the language control task which re-collected in the behavioral laboratory

RT_L2NS : The reaction time (ms) for L2NS condition (second language, non-switch) of the language control task which re-collected in the behavioral laboratory

ER_L1S : The error rate for L1S condition (first language, switch) of the language control task which re-collected in the behavioral laboratory

ER_L1NS : The error rate for L1NS condition (first language, non-switch) of the language control task which re-collected in the behavioral laboratory

ER_L2S : The error rate for L2S condition (second language, switch) of the language control task which re-collected in the behavioral laboratory

ER_L2NS : The error rate for L2NS condition (second language, non-switch) of the language control task which re-collected in the behavioral laboratory

Chinese/English reading, writing, speaking, listening : The self-rating scores for Language on a 10-point scale


#### Cog Control
{
  
  "AcquisitionMatrixPE": 64,

  "AcquisitionNumber": 1,

  "BandwidthPerPixelPhaseEncode": 33.967,

  "BaseResolution": 64,
  
  "CogAtlasID": "http://www.cognitiveatlas.org/task/id/TODO",

  "CoilCombinationMethod": "Sum of Squares",

  "CoilString": "C:HEA;HEP",

  "ConversionSoftware": "dcm2niix",

  "ConversionSoftwareVersion": "v1.0.20220720",

  "DerivedVendorReportedEchoSpacing": 0.000460005,

  "DeviceSerialNumber": "35193",

  "DwellTime": 3.1e-06,

  "EchoTime": 0.03,

  "EffectiveEchoSpacing": 0.000460005,

  "FlipAngle": 90,

  "HeudiconvVersion": "1.1.6",
  
  "ImageType": [
    "ORIGINAL",
    "PRIMARY",
    "M",
    "ND",
    "MOSAIC"
],

  "ImagingFrequency": 123.254,

  "InPlanePhaseEncodingDirectionDICOM": "COL",

  "InstitutionAddress": "Xinjiekouwai Street No 19,Beijing,Haidian District,CN,100875",

  "InstitutionalDepartmentName": "Department",

  "MRAcquisitionType": "2D",

  "MagneticFieldStrength": 3,

  "Manufacturer": "Siemens",

  "ManufacturersModelName": "TrioTim",

  "MatrixCoilMode": "SENSE",

  "Modality": "MR",

  "NonlinearGradientCorrection": false,

  "PartialFourier": 1,

  "PatientPosition": "HFS",

  "PercentPhaseFOV": 100,

  "PercentSampling": 100,

  "PhaseEncodingDirection": "j-",

  "PhaseEncodingSteps": 64,

  "PhaseResolution": 1,

  "PixelBandwidth": 2520,

  "ProtocolName": "ge_func_3x3x4_164",

  "PulseSequenceDetails": "%SiemensSeq%\\ep2d_bold",

  "ReceiveCoilName": "HeadMatrix",

  "ReconMatrixPE": 64,

  "RepetitionTime": 2,

  "ScanOptions": "FS",

  "ScanningSequence": "EP",

  "SequenceName": "*epfid2d1_64",

  "SequenceVariant": "SK",

  "SeriesDescription": "ge_func_3x3x4_164",

  "SliceThickness": 4,

  "SoftwareVersions": "syngo MR B17",

  "SpacingBetweenSlices": 4.6,

  "StationName": "MRC35193",

  "TaskName": "CognitiveControl",

  "TotalReadoutTime": 0.0289803,

  "dcmmeta_reorient_transform": [

    [0.0, -1.0, 0.0, 63.0],
    [1.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 1.0, 0.0],
    [0.0, 0.0, 0.0, 1.0]
],

  "dcmmeta_shape": [64, 64, 33, 164],

  "dcmmeta_slice_dim": 2,

  "dcmmeta_version": 0.6}


#### Lang Control
{

  "AcquisitionMatrixPE": 64,

  "AcquisitionNumber": 1,

  "BandwidthPerPixelPhaseEncode": 33.967,

  "BaseResolution": 64,

  "CogAtlasID": "http://www.cognitiveatlas.org/task/id/TODO",

  "CoilCombinationMethod": "Sum of Squares",

  "CoilString": "C:HEA;HEP",

  "ConversionSoftware": "dcm2niix",

  "ConversionSoftwareVersion": "v1.0.20220720",

  "DerivedVendorReportedEchoSpacing": 0.000460005,

  "DeviceSerialNumber": "35193",

  "DwellTime": 3.1e-06,

  "EchoTime": 0.03,

  "EffectiveEchoSpacing": 0.000460005,

  "FlipAngle": 90,

  "HeudiconvVersion": "1.1.6",

  "ImageType": [
    "ORIGINAL",
    "PRIMARY",
    "M",
    "ND",
    "MOSAIC"
],

  "ImagingFrequency": 123.254,

  "InPlanePhaseEncodingDirectionDICOM": "COL",

  "InstitutionAddress": "Xinjiekouwai Street No 19,Beijing,Haidian District,CN,100875",

  "InstitutionalDepartmentName": "Department",

  "MRAcquisitionType": "2D",

  "MagneticFieldStrength": 3,

  "Manufacturer": "Siemens",

  "ManufacturersModelName": "TrioTim",

  "MatrixCoilMode": "SENSE",

  "Modality": "MR",

  "NonlinearGradientCorrection": false,

  "PartialFourier": 1,

  "PatientPosition": "HFS",

  "PercentPhaseFOV": 100,

  "PercentSampling": 100,

  "PhaseEncodingDirection": "j-",

  "PhaseEncodingSteps": 64,

  "PhaseResolution": 1,

  "PixelBandwidth": 2520,

  "ProtocolName": "ge_func_3x3x4_164",

  "PulseSequenceDetails": "%SiemensSeq%\\ep2d_bold",

  "ReceiveCoilName": "HeadMatrix",

  "ReconMatrixPE": 64,

  "RepetitionTime": 2,

  "ScanOptions": "FS",

  "ScanningSequence": "EP",

  "SequenceName": "*epfid2d1_64",

  "SequenceVariant": "SK",

  "SeriesDescription": "ge_func_3x3x4_164",

  "SliceThickness": 4,

  "SoftwareVersions": "syngo MR B17",

  "SpacingBetweenSlices": 4.6,

  "StationName": "MRC35193",

  "TaskName": "LanguageControl",

  "TotalReadoutTime": 0.0289803,

  "dcmmeta_reorient_transform": [
    [0.0, -1.0, 0.0, 63.0],
    [1.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 1.0, 0.0],
    [0.0, 0.0, 0.0, 1.0]
],

  "dcmmeta_shape": [64, 64, 33, 164],

  "dcmmeta_slice_dim": 2,

  "dcmmeta_version": 0.6}



#### Scans

filename : Name of the nifti file

acq_time : Acquisition time of the particular scan, destroyed for anonymisation in this dataset

operator : Name of the operator

randstr : md5 hash of UIDs (randomly generated string)
