## Get the Heritability results from all the previous tests (Antoine)

In [1]:
import os
import re
import pandas as pd

In [2]:
src_path = '/neurospin/dico/adufournet/Runs/'

for path in os.listdir(src_path):
    if not path.endswith('.txt'):
        for folder in os.listdir(f"{src_path}{path}"):
            if folder=="Heritability":
                for results in os.listdir(f"{src_path}{path}/Heritability/results"):
                    print(f'{path[16:]}--results--{results}')
                    if os.path.exists(f"{src_path}{path}/Heritability/results/{results}/house/polygenic.out"):
                        ROOTDIR=f"{src_path}{path}/Heritability/results/{results}"    
                        #with open(f"{src_path}{path}/Heritability/results/{results}/house/polygenic.out") as f:
                            #print(f)



HCP_Neofac--results--neofac_A
HCP_Neofac--results--neofac_N
HCP_Neofac--results--neofac_O
HCP_Neofac--results--neofac_C
HCP_Neofac--results--neofac_E
HCP_Flanker--results--Flanker_AgeAdj
HCP_Flanker--results--Flanker_Unadj
Right_PCS_HCP--results--PC8
Right_PCS_HCP--results--PC6
Right_PCS_HCP--results--PC19
Right_PCS_HCP--results--PC14
Right_PCS_HCP--results--PC13
Right_PCS_HCP--results--PC20
Right_PCS_HCP--results--PC15
Right_PCS_HCP--results--PC16
Right_PCS_HCP--results--PC11
Right_PCS_HCP--results--binary_pred
Right_PCS_HCP--results--PC5
Right_PCS_HCP--results--PC18
Right_PCS_HCP--results--PC7
Right_PCS_HCP--results--PC1
Right_PCS_HCP--results--PC2
Right_PCS_HCP--results--PC9
Right_PCS_HCP--results--PC17
Right_PCS_HCP--results--PC10
Right_PCS_HCP--results--prob_pred
Right_PCS_HCP--results--PC12
Right_PCS_HCP--results--PC3
Right_PCS_HCP--results--PC4
Asymmetry_PCS_HCP--results--binary_asymmetry_diff
Asymmetry_PCS_HCP--results--asymmetry_combines_prob_PCS
Asymmetry_PCS_HCP--results--Le

In [3]:
# Initialize an empty list to store the results
results_list = []

# Define the source path
src_path = '/neurospin/dico/adufournet/Runs/'

def extract_heritability_details(file_content):
    heritability = None
    std_error = None
    p_value = None
    covariate = ''

    for line in file_content:
        if "H2r is" in line:
            parts = line.split()
            heritability = float(parts[2])
            p_value = float(parts[5])
        elif "H2r Std. Error:" in line:
            parts = line.split()
            std_error = float(parts[3])
        elif "(Significant)" in line:
            parts = line.split()
            covariate = f"{covariate} {parts[0]} {parts[1]} {parts[2]} {parts[3]}"


    
    return heritability, std_error, p_value, covariate

def extract_region_and_side(directory_name):
    # Use regular expressions to find the side and region
    side_match = re.search(r'(Left|Right|Asymmetry)', directory_name)
    region_match = re.search(r'_(PCS|Orbital|OtherRegions)_', directory_name)  # Adjust 'OtherRegions' as necessary
    database_match = re.search(r'_(HCP|UKB)_', directory_name) 
    
    # Extract the side and region from the matches
    side = side_match.group(0) if side_match else None
    region = region_match.group(1) if region_match else None
    database = database_match.group(1) if database_match else 'HCP'

    return region, side, database

def extract_class_type(results):

    class_type_match = re.search(r'(prob|binary|PC)', results) 
    class_type = class_type_match.group(1) if class_type_match else 'prob'

    return class_type


# Iterate over directories in the source path
for path in os.listdir(src_path):
    if not path.endswith('.txt'):
        for folder in os.listdir(f"{src_path}{path}"):
            if folder=="Heritability":
                for results in os.listdir(f"{src_path}{path}/Heritability/results"):
                    polygenic_out_file = f"{src_path}{path}/Heritability/results/{results}/house/polygenic.out"
                    if os.path.exists(polygenic_out_file):
                        ROOTDIR = f"{src_path}{path}/Heritability/results/{results}"
                        try:
                            # Read the file content
                            with open(polygenic_out_file, 'r') as file:
                                file_content = file.readlines()

                            # Print the file content for debugging
                            #print(f"Debug: file_content = {''.join(file_content)}")

                            # Extract heritability details
                            heritability, std_error, p_value, covariate = extract_heritability_details(file_content)

                            # Extract region and side
                            region, side, database = extract_region_and_side(path[16:])
                            class_type = extract_class_type(results)

                            # Extract information from directory and file names
                            model_info = "Barlo Twin"
                            output_dim = "10" if path[16:].endswith('10') else 256

                            # Append the extracted data to the results list
                            results_list.append({
                                "Label" : results, 
                                "Database": database,
                                "Model": model_info,
                                "Output dim": output_dim,
                                "Region": region,
                                "Side": side,
                                "Covariates": covariate,
                                "Class": class_type,
                                "Heritability": heritability,
                                "Std. error": std_error,
                                "P-value": p_value
                            })

                        except ValueError as ve:
                            print(f"ValueError: {ve} for {ROOTDIR} ")

# Convert the results list to a DataFrame
results_df = pd.DataFrame(results_list)

results_df["Heritability"] = results_df["Heritability"].round(2)
results_df["Std. error"] = results_df["Std. error"].round(2)
results_df["P-value"] = results_df["P-value"].apply(lambda x: f"{x:.1e}" if x is not None else None)

# Save the DataFrame to a CSV file
results_df.to_csv('heritability_results_summary.csv', index=False)


In [6]:
results_df["P-value"] = results_df["P-value"].astype(float)

results_df[results_df["P-value"].astype(float)< 0.001].sort_values("P-value", ascending=True)#.to_csv("heritability_bst_results.csv", index=False)

Unnamed: 0,Label,Database,Model,Output dim,Region,Side,Covariates,Class,Heritability,Std. error,P-value
4,neofac_E,HCP,Barlo Twin,256,,,Age*SEX p = 0.0512588,prob,0.46,0.06,2.8e-13
3,neofac_C,HCP,Barlo Twin,256,,,SEX p = 0.0033783,prob,0.46,0.06,5.5e-13
1,neofac_N,HCP,Barlo Twin,256,,,SEX p = 0.0468656,prob,0.44,0.07,1.2e-09
6,Flanker_Unadj,HCP,Barlo Twin,256,,,SEX p = 0.0075925,prob,0.34,0.06,2.6e-08
5,Flanker_AgeAdj,HCP,Barlo Twin,256,,,SEX p = 0.0275798,prob,0.32,0.06,1e-07
34,Zero_PCS,HCP,Barlo Twin,256,PCS,Asymmetry,SEX p = 0.0151077,PC,0.28,0.07,2.3e-05
68,prob_pred_scaled_on_ACCP,HCP,Barlo Twin,256,PCS,Left,,prob,0.26,0.07,4.2e-05
0,neofac_A,HCP,Barlo Twin,256,,,SEX p = 1.1351969e-08 Age*SEX p = 0.0567923,prob,0.28,0.07,4.7e-05
53,prob_pred_scaled_on_UKB_brain_vol_sum,HCP,Barlo Twin,256,PCS,Left,,prob,0.25,0.07,5.7e-05
51,prob_pred_scaled_on_UKB_brain_vol,HCP,Barlo Twin,256,PCS,Left,,prob,0.25,0.07,5.7e-05
