## Get the Heritability results from all the previous tests (Antoine)

In [1]:
import os
import re
import subprocess
import pandas as pd

In [2]:
src_path = '/neurospin/dico/adufournet/Runs/'

for path in os.listdir(src_path):
    if not path.endswith('.txt'):
        for results in os.listdir(f"{src_path}{path}/Heritability/results"):
            print(f'{path[16:]}--results--{results}')
            if os.path.exists(f"{src_path}{path}/Heritability/results/{results}/house/polygenic.out"):
                ROOTDIR=f"{src_path}{path}/Heritability/results/{results}"    
                #with open(f"{src_path}{path}/Heritability/results/{results}/house/polygenic.out") as f:
                    #print(f)



HCP_Neofac--results--neofac_A
HCP_Neofac--results--neofac_N
HCP_Neofac--results--neofac_O
HCP_Neofac--results--neofac_C
HCP_Neofac--results--neofac_E
HCP_Flanker--results--Flanker_AgeAdj
HCP_Flanker--results--Flanker_Unadj
Right_PCS_HCP--results--binary_pred
Right_PCS_HCP--results--prob_pred
Asymmetry_PCS_HCP--results--binary_asymmetry_diff
Asymmetry_PCS_HCP--results--asymmetry_combines_prob_PCS
Asymmetry_PCS_HCP--results--Left_without_Right_PCS
Asymmetry_PCS_HCP--results--binary_asymmetry_prob
Asymmetry_PCS_HCP--results--Two_PCS
Asymmetry_PCS_HCP--results--Zero_PCS
Asymmetry_PCS_HCP--results--Right_without_Left_PCS
Left_PCS_HCP--results--prob_pred_scaled_on_UKB
Left_PCS_HCP--results--prob_pred_scaled_on_UKB_brain_vol
Left_PCS_HCP--results--prob_pred_scaled_on_UKB_brain_vol_sum
Left_PCS_HCP--results--prob_pred_scaled_on_UKB_2
Left_PCS_HCP--results--binary_pred_scaled_on_UKB
Left_PCS_HCP--results--prob_pred_scaled_on_ACCP
Left_PCS_HCP--results--binary_pred_scaled_on_ACCP
Right_PCS_HCP_d

In [3]:
# Initialize an empty list to store the results
results_list = []

# Define the source path
src_path = '/neurospin/dico/adufournet/Runs/'

def extract_heritability_details(file_content):
    heritability = None
    std_error = None
    p_value = None
    covariate = None

    for line in file_content:
        if "H2r is" in line:
            parts = line.split()
            heritability = float(parts[2])
            p_value = float(parts[5])
        elif "H2r Std. Error:" in line:
            parts = line.split()
            std_error = float(parts[3])
        elif "(Significant)" in line:
            parts = line.split()
            covariate = f"{covariate} {parts[0]} {parts[1]} {parts[2]} {parts[3]}"


    
    return heritability, std_error, p_value, covariate

def extract_region_and_side(directory_name):
    # Use regular expressions to find the side and region
    side_match = re.search(r'(Left|Right|Asymmetry)', directory_name)
    region_match = re.search(r'_(PCS|OtherRegions)_', directory_name)  # Adjust 'OtherRegions' as necessary
    database_match = re.search(r'_(HCP|UKB)_', directory_name) 
    
    # Extract the side and region from the matches
    side = side_match.group(0) if side_match else None
    region = region_match.group(1) if region_match else None
    database = database_match.group(1) if database_match else 'HCP'

    return region, side, database

def extract_class_type(results):

    class_type_match = re.search(r'(prob|binary)', results) 
    class_type = class_type_match.group(1) if class_type_match else 'prob'

    return class_type


# Iterate over directories in the source path
for path in os.listdir(src_path):
    if not path.endswith('.txt'):
        for results in os.listdir(f"{src_path}{path}/Heritability/results"):
            polygenic_out_file = f"{src_path}{path}/Heritability/results/{results}/house/polygenic.out"
            if os.path.exists(polygenic_out_file):
                ROOTDIR = f"{src_path}{path}/Heritability/results/{results}"
                try:
                    # Read the file content
                    with open(polygenic_out_file, 'r') as file:
                        file_content = file.readlines()

                    # Print the file content for debugging
                    #print(f"Debug: file_content = {''.join(file_content)}")

                    # Extract heritability details
                    heritability, std_error, p_value, covariate = extract_heritability_details(file_content)

                    # Extract region and side
                    region, side, database = extract_region_and_side(path[16:])
                    class_type = extract_class_type(results)

                    # Extract information from directory and file names
                    model_info = "Barlo Twin"
                    output_dim = "10" if path[16:].endswith('10') else 256

                    # Append the extracted data to the results list
                    results_list.append({
                        "Label" : results, 
                        "Database": database,
                        "Model": model_info,
                        "Output dim": output_dim,
                        "Region": region,
                        "Side": side,
                        "Covariates": covariate,
                        "Class": class_type,
                        "Heritability": heritability,
                        "Std. error": std_error,
                        "P-value": p_value
                    })

                except ValueError as ve:
                    print(f"ValueError: {ve} for {ROOTDIR} ")

# Convert the results list to a DataFrame
results_df = pd.DataFrame(results_list)

results_df["Heritability"] = results_df["Heritability"].round(2)
results_df["Std. error"] = results_df["Std. error"].round(2)
results_df["P-value"] = results_df["P-value"].apply(lambda x: f"{x:.1e}" if x is not None else None)

# Save the DataFrame to a CSV file
results_df.to_csv('heritability_results_summary.csv', index=False)

# Optionally, display the DataFrame
print(results_df)

                                    Label Database       Model Output dim  \
0                                neofac_A      HCP  Barlo Twin        256   
1                                neofac_N      HCP  Barlo Twin        256   
2                                neofac_O      HCP  Barlo Twin        256   
3                                neofac_C      HCP  Barlo Twin        256   
4                                neofac_E      HCP  Barlo Twin        256   
5                          Flanker_AgeAdj      HCP  Barlo Twin        256   
6                           Flanker_Unadj      HCP  Barlo Twin        256   
7                             binary_pred      HCP  Barlo Twin        256   
8                               prob_pred      HCP  Barlo Twin        256   
9                   binary_asymmetry_diff      HCP  Barlo Twin        256   
10            asymmetry_combines_prob_PCS      HCP  Barlo Twin        256   
11                 Left_without_Right_PCS      HCP  Barlo Twin        256   