In [134]:
import pandas as pd
# import cupy as cp
import numpy as np
import time

In [135]:
def pearsonr(half1, half2):
    numerator = np.sum((half1 - half1.mean()) * (half2 - half2.mean()))
    denominator = np.sqrt(np.sum((half1 - half1.mean())**2) * np.sum((half2 - half2.mean())**2))
    corr = numerator / denominator
    return corr

In [139]:
# # Set seed for reproducibility
# np.random.seed(42)

# Generate fake data: 100 participants and ratings for 8 emotions (e.g., scale 1-7)
num_participants = 100
num_emotions = 8
fake_data = np.random.randint(1, 8, size=(num_participants, num_emotions))

In [140]:
# Function to calculate split-half reliability
def split_half_reliability(data, num_iterations=1000):
    all_correlations = []
    
    for _ in range(num_iterations):
        # Randomly split data into two halves
        indices = np.random.permutation(data.shape[0])
        half_size = len(indices) // 2
        half1_indices = indices[:half_size]
        half2_indices = indices[half_size:]
        
        # Compute Pearson correlation for each emotion and store the results
        correlations = []
        for i in range(data.shape[1]):
            half1 = data[half1_indices, i]
            half2 = data[half2_indices, i]
            
            # Calculate Pearson correlation between the two halves for current emotion
            correlation = pearsonr(half1, half2)
            correlations.append(correlation)
        
        if len(all_correlations) == 0:
            all_correlations = np.array(correlations)
        else:
            all_correlations = np.vstack((all_correlations, np.array(correlations)))

    # Compute the average Pearson correlation across all iterations for each emotion
    avg_pearson_correlation = np.mean(all_correlations, axis=0)
    
    reliabilities = []
    for avg_emotion_corr in avg_pearson_correlation:
        # Apply Spearman-Brown correction
        reliability = (2 * avg_emotion_corr) / (1 + avg_emotion_corr)
        reliabilities.append(reliability)
    
    return reliabilities

In [141]:
np.random.seed(42)
s = time.time()
reliabilities = split_half_reliability(fake_data)
print(time.time() - s)
print(reliabilities)

0.3421292304992676
[0.01881706390421111, 0.000775227677495702, 0.01694811676721613, 0.004367968006722288, 0.0026885617523933216, -0.02215949663355407, 0.0006191104701670859, -0.012816092800639452]
