# Hypotheses Test 2

#### $H_0$: There is no significant relationship between age and the adaptation towards specificity in memory representations.
#### $H_A$: Adaptation towards specificity in memory representations increases/decreases with age. 

In [1]:
# Importing Libraries
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import scipy.stats as stats
import os
from scipy.stats import ttest_ind
from scipy.stats import chi2_contingency
from statsmodels.stats.multitest import multipletests

##  Data Processing

In [2]:
e1_sub_ages = pd.read_csv("../data/e1/e1_sub_ages.csv") 
e1_memory = pd.read_csv("../data/e1/e1_memory_data.csv")

# Merge dataframes
memory_merged = pd.merge(e1_memory, e1_sub_ages, on='subject_id')

In [3]:
e1_sub_ages.head()

Unnamed: 0,subject_id,age
0,3768,8.04
1,3770,8.1
2,3855,16.83
3,3853,16.94
4,3898,22.15


In [4]:
e1_memory.head()

Unnamed: 0,subject_id,task_part,block_order,block_condition,category_type,num_stim_rep,image,category,task_block,stim_num,correct_mem_response,mem_response,confidence,rt,mem_acc,conf_number,conf_rating,foil_type,memory_delay
0,3768,memory_trial,1,2,old,6.0,horse7.png,horse,animals,7,old,old,definitely,1866.0,1.0,1.0,4.0,old,7.0
1,3768,memory_trial,1,2,old,6.0,cow5.png,cow,animals,5,old,old,definitely,1792.0,1.0,1.0,4.0,old,7.0
2,3768,memory_trial,1,2,old,3.0,cow8.png,cow,animals,8,old,new,definitely,3074.0,0.0,1.0,1.0,old,7.0
3,3768,memory_trial,1,2,old,6.0,goat6.png,goat,animals,6,old,old,definitely,1411.0,1.0,1.0,4.0,old,7.0
4,3768,memory_trial,1,2,old,3.0,goat5.png,goat,animals,5,old,new,maybe,3348.0,0.0,0.0,2.0,old,7.0


In [5]:
memory_merged.head()

Unnamed: 0,subject_id,task_part,block_order,block_condition,category_type,num_stim_rep,image,category,task_block,stim_num,correct_mem_response,mem_response,confidence,rt,mem_acc,conf_number,conf_rating,foil_type,memory_delay,age
0,3768,memory_trial,1,2,old,6.0,horse7.png,horse,animals,7,old,old,definitely,1866.0,1.0,1.0,4.0,old,7.0,8.04
1,3768,memory_trial,1,2,old,6.0,cow5.png,cow,animals,5,old,old,definitely,1792.0,1.0,1.0,4.0,old,7.0,8.04
2,3768,memory_trial,1,2,old,3.0,cow8.png,cow,animals,8,old,new,definitely,3074.0,0.0,1.0,1.0,old,7.0,8.04
3,3768,memory_trial,1,2,old,6.0,goat6.png,goat,animals,6,old,old,definitely,1411.0,1.0,1.0,4.0,old,7.0,8.04
4,3768,memory_trial,1,2,old,3.0,goat5.png,goat,animals,5,old,new,maybe,3348.0,0.0,0.0,2.0,old,7.0,8.04


In [8]:
# Check for missing values
print("\nMissing values in memory data:")
print(memory_merged.isnull().sum())

# Remove missing values if any
memory_merged.dropna(inplace=True)


Missing values in memory data:
subject_id              0
task_part               0
block_order             0
block_condition         0
category_type           0
num_stim_rep            0
image                   0
category                0
task_block              0
stim_num                0
correct_mem_response    0
mem_response            0
confidence              0
rt                      0
mem_acc                 0
conf_number             0
conf_rating             0
foil_type               0
memory_delay            0
age                     0
dtype: int64


In [9]:
memory_merged['subject_id'].nunique()

151

In [11]:
memory_data = memory_merged[['age', 'block_condition', 'mem_acc']]
memory_data.head()

Unnamed: 0,age,block_condition,mem_acc
0,8.04,2,1.0
1,8.04,2,1.0
2,8.04,2,0.0
3,8.04,2,1.0
4,8.04,2,0.0


In [19]:
memory_data.dtypes

age                 float64
block_condition       int64
mem_acc             float64
age_group          category
dtype: object

In [18]:
# split into age groups
memory_merged['age_group'] = pd.cut(memory_merged['age'], bins=[0, 13, 18, float('inf')], labels=['Children', 'Adolescents', 'Adults'])
memory_merged.head()

Unnamed: 0,subject_id,task_part,block_order,block_condition,category_type,num_stim_rep,image,category,task_block,stim_num,...,mem_response,confidence,rt,mem_acc,conf_number,conf_rating,foil_type,memory_delay,age,age_group
0,3768,memory_trial,1,2,old,6.0,horse7.png,horse,animals,7,...,old,definitely,1866.0,1.0,1.0,4.0,old,7.0,8.04,Children
1,3768,memory_trial,1,2,old,6.0,cow5.png,cow,animals,5,...,old,definitely,1792.0,1.0,1.0,4.0,old,7.0,8.04,Children
2,3768,memory_trial,1,2,old,3.0,cow8.png,cow,animals,8,...,new,definitely,3074.0,0.0,1.0,1.0,old,7.0,8.04,Children
3,3768,memory_trial,1,2,old,6.0,goat6.png,goat,animals,6,...,old,definitely,1411.0,1.0,1.0,4.0,old,7.0,8.04,Children
4,3768,memory_trial,1,2,old,3.0,goat5.png,goat,animals,5,...,new,maybe,3348.0,0.0,0.0,2.0,old,7.0,8.04,Children


## Statistical Tests

We have seen from the previous hypothesis that: 
- There is a statistically significant association between specificity (`block_condition`) and memory representation(`memory_accuracy`).

- There is a highly significant relationship between memory representations (`memory accuracy` and `confidence level` i.e conf_number) and specificity i.e `block_condition`

So now we check if this effect changes with age. 

### Age Vs Memory Accuracy

#### Chi-Square Test of Independence

We already know that there's a significant relationship between `memory_accuracy` and `block_conditon` or `confidence level` and `block_conditon`. We now check if there significant relationship between age group and memory accuracy.

To test the relationship between age and memory accuracy, we'll use the Chi-Square Test of Independence. 

- This test is appropriate because both age group and memory accuracy can be made categorical variables, and we want to assess if there is an association between them.

In [23]:
# Convert 'mem_accuracy' to categorical: 1 for correct, 0 for incorrect
memory_merged['mem_acc'] = np.where(memory_merged['mem_acc'] == 1.0, 'Correct', 'Incorrect')
memory_merged.head()

Unnamed: 0,subject_id,task_part,block_order,block_condition,category_type,num_stim_rep,image,category,task_block,stim_num,...,mem_response,confidence,rt,mem_acc,conf_number,conf_rating,foil_type,memory_delay,age,age_group
0,3768,memory_trial,1,2,old,6.0,horse7.png,horse,animals,7,...,old,definitely,1866.0,Incorrect,1.0,4.0,old,7.0,8.04,Children
1,3768,memory_trial,1,2,old,6.0,cow5.png,cow,animals,5,...,old,definitely,1792.0,Incorrect,1.0,4.0,old,7.0,8.04,Children
2,3768,memory_trial,1,2,old,3.0,cow8.png,cow,animals,8,...,new,definitely,3074.0,Incorrect,1.0,1.0,old,7.0,8.04,Children
3,3768,memory_trial,1,2,old,6.0,goat6.png,goat,animals,6,...,old,definitely,1411.0,Incorrect,1.0,4.0,old,7.0,8.04,Children
4,3768,memory_trial,1,2,old,3.0,goat5.png,goat,animals,5,...,new,maybe,3348.0,Incorrect,0.0,2.0,old,7.0,8.04,Children


In [26]:
contingency_table = pd.crosstab(memory_merged['age_group'], memory_merged['mem_acc'])

# Perform the Chi-Square Test of Independence
chi2, p_value, _, _ = chi2_contingency(contingency_table)

# Print the results
print("Chi-square Test Results for Age vs. Memory Accuracy:")
print(f"Chi-square Statistic: {chi2}")
print(f"P-value: {p_value}")

Chi-square Test Results for Age vs. Memory Accuracy:
Chi-square Statistic: 0.0
P-value: 1.0
