In [None]:
import pandas as pd
import scipy.stats as stats

# Load the CSV file
file_path = input("Please enter the input CSV_with_sentence_pairs_and_it's_avg_traits path: ")
df = pd.read_csv(file_path)

# Sorting the stimulus pairs to ensure ("stimulus1", "stimulus2") and ("stimulus2", "stimulus1") are treated as the same pair
df['sorted_pair'] = df.apply(lambda x: tuple(sorted([x['stimulus1'], x['stimulus2']])), axis=1)

# Grouping by sorted pairs and calculating Pearson and Spearman correlations, p-values, and count
def calculate_correlations(group):
    similarity = group['similarity']
    openness = group['Neuroticism']
    
    # Check if the group has more than one row (i.e., if the pair appears more than once)
    if len(group) < 2:
        return pd.Series({
            'pearson_corr': None,
            'pearson_p_value': None,
            'spearman_corr': None,
            'spearman_p_value': None,
            'count': len(group)
        })
    
    # Pearson correlation
    if len(similarity.unique()) == 1 or len(openness.unique()) == 1:
        pearson_corr, pearson_p_value = (None, None)  # Handle constant input arrays
    else:
        pearson_corr, pearson_p_value = stats.pearsonr(similarity, openness)
    
    # Spearman correlation
    if len(similarity.unique()) == 1 or len(openness.unique()) == 1:
        spearman_corr, spearman_p_value = (None, None)  # Handle constant input arrays
    else:
        spearman_corr, spearman_p_value = stats.spearmanr(similarity, openness)
    
    # Count of occurrences for the pair
    count = len(group)
    
    return pd.Series({
        'pearson_corr': pearson_corr,
        'pearson_p_value': pearson_p_value,
        'spearman_corr': spearman_corr,
        'spearman_p_value': spearman_p_value,
        'count': count
    })

# Grouping the data by the sorted pairs and applying the correlation calculations
correlation_results = df.groupby('sorted_pair', as_index=False).apply(calculate_correlations)

# Resetting the index to prepare for saving to CSV
correlation_results = correlation_results.reset_index()

# Saving the results to a CSV file
output_file_path = input("Please enter the output CSV file path: ")
correlation_results.to_csv(output_file_path, index=False)

# Provide the file path for the saved CSV file
output_file_path