# Imports

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gzip
import os
import glob

### heritability 

In [161]:
glob.glob('../annots/*')

['../annots/tasha_annots_to_merge.csv',
 '../annots/oliver_conserved_annotations_processed',
 '../annots/tashas_conserved_annotations_processed',
 '../annots/tashas_annotations_processed',
 '../annots/baseline_tss_annotations_processed',
 '../annots/tashas_baseline_not_conserved_annotations_processed',
 '../annots/olivers_annotations_processed',
 '../annots/oliver_annots_to_merge.csv',
 '../annots/tasha_tss_annotations_processed',
 '../annots/oliver_tss_annotations_processed',
 '../annots/tasha_tss_annotations_processed-20241211T170437Z-001.zip',
 '../annots/oliver_baseline_not_conserved_annotations_processed']

In [162]:
annot_files = glob.glob('../annots/tasha_tss_annotations_processed/*')

In [164]:
# Initialize an empty list to store DataFrames
data_frames = []

# Read each file and append the DataFrame to the list
for file in annot_files:
    df = pd.read_csv(file, sep='\t', header=0)
    data_frames.append(df)

# Concatenate all DataFrames into one
tss_annots = pd.concat(data_frames, ignore_index=True)

tss_annots


Unnamed: 0,CHR,BP,SNP,CM,gene1_dist1,gene1_dist2,gene1_dist3,gene2_dist1,gene2_dist2,gene2_dist3,gene3_dist1,gene3_dist2,gene3_dist3,density
0,1,850218.0,1:850218:T:A,0.0,0,0,0,0,0,0,0,0,0,29
1,1,766007.0,1:766007:A:C,0.0,0,0,0,0,0,0,0,0,0,23
2,1,781367.0,1:781367:A:C,0.0,0,0,0,0,0,0,0,0,0,24
3,1,766105.0,1:766105:T:A,0.0,0,0,0,0,0,0,0,0,0,23
4,1,762601.0,1:762601:T:C,0.0,0,0,0,0,0,0,0,0,0,23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13328980,4,190873160.0,4:190873160:T:G,0.0,0,0,0,0,0,0,0,0,0,2
13328981,4,190873162.0,4:190873162:A:C,0.0,0,0,0,0,0,0,0,0,0,2
13328982,4,190880291.0,4:190880291:A:G,0.0,0,0,0,0,0,0,0,0,0,2
13328983,4,190867330.0,4:190867330:C:T,0.0,0,0,0,0,0,0,0,0,0,2


In [None]:
annot_df = pd.read_csv("../tasha_annots_to_merge.csv")

In [86]:
tasha_tss_annots = pd.merge(annot_df, tss_annots, on=['CHR', 'BP', 'SNP'], how='inner')

In [88]:
tasha_tss_annots

Unnamed: 0,CHR,BP,SNP,CM_x,gene1_dist1,gene1_dist2,gene1_dist3,gene2_dist1,gene2_dist2,gene2_dist3,...,Transcribed_Hoffman.extend.500.bed,TSS_Hoffman.bed,TSS_Hoffman.extend.500.bed,UTR_3_UCSC.bed,UTR_3_UCSC.extend.500.bed,UTR_5_UCSC.bed,UTR_5_UCSC.extend.500.bed,WeakEnhancer_Hoffman.bed,WeakEnhancer_Hoffman.extend.500.bed,density
0,10,363275.0,10:363275:C:G,0.0,0,1,0,0,0,1,...,1,0,0,0,0,0,0,0,1,3
1,10,684171.0,10:684171:G:A,0.0,0,0,1,0,0,1,...,1,0,0,0,0,0,0,0,0,6
2,10,363361.0,10:363361:G:A,0.0,0,1,0,0,0,1,...,1,0,0,0,0,0,0,0,1,3
3,10,346817.0,10:346817:T:C,0.0,0,1,0,0,0,1,...,1,0,0,0,0,0,0,0,0,3
4,10,667259.0,10:667259:A:C,0.0,0,0,1,0,0,1,...,1,0,0,0,0,0,0,0,0,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9413624,1,249205823.0,1:249205823:G:A,0.0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,10
9413625,1,249205806.0,1:249205806:T:C,0.0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,10
9413626,1,249205774.0,1:249205774:T:C,0.0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,10
9413627,1,249206890.0,1:249206890:T:A,0.0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,10


## Heritability

In [43]:
import glob

In [54]:
heritability_files = glob.glob('../heritability_results/h*')
heritability_files

['../heritability_results/heritability_4.0_o.results',
 '../heritability_results/heritability_c3.0.results',
 '../heritability_results/heritability_5.0_t.results',
 '../heritability_results/heritability_c3.1.results',
 '../heritability_results/heritability_c2.0.results',
 '../heritability_results/heritability_6.0_t.results',
 '../heritability_results/heritability_c2.1.results',
 '../heritability_results/heritability_c4.0.results',
 '../heritability_results/heritability_4.0_t.results',
 '../heritability_results/heritability_c1.0.results',
 '../heritability_results/heritability_5.0_o.results',
 '../heritability_results/heritability_6.0_o.results']

In [None]:
for file in heritability_files:
    
    variance_df = get_variance_df(heritability, annot_df)

In [115]:
# Load the GTF file into a pandas DataFrame and use the first row as headers
pd.read_csv('../heritability_results/heritability_6.0_o.results', sep='\t', header=0)['Category'].unique()

array(['s_het_post_1L2_0', 's_het_post_2L2_0', 's_het_post_3L2_0',
       'CM_baselineL2_0', 'base_baselineL2_0',
       'Coding_UCSC.bed_baselineL2_0',
       'Coding_UCSC.extend.500.bed_baselineL2_0',
       'CTCF_Hoffman.bed_baselineL2_0',
       'CTCF_Hoffman.extend.500.bed_baselineL2_0',
       'DGF_ENCODE.bed_baselineL2_0',
       'DGF_ENCODE.extend.500.bed_baselineL2_0',
       'DHS_peaks_Trynka.bed_baselineL2_0', 'DHS_Trynka.bed_baselineL2_0',
       'DHS_Trynka.extend.500.bed_baselineL2_0',
       'Enhancer_Andersson.bed_baselineL2_0',
       'Enhancer_Andersson.extend.500.bed_baselineL2_0',
       'Enhancer_Hoffman.bed_baselineL2_0',
       'Enhancer_Hoffman.extend.500.bed_baselineL2_0',
       'FetalDHS_Trynka.bed_baselineL2_0',
       'FetalDHS_Trynka.extend.500.bed_baselineL2_0',
       'H3K27ac_Hnisz.bed_baselineL2_0',
       'H3K27ac_Hnisz.extend.500.bed_baselineL2_0',
       'H3K27ac_PGC2.bed_baselineL2_0',
       'H3K27ac_PGC2.extend.500.bed_baselineL2_0',
       'H3K4

In [116]:
# Load the GTF file into a pandas DataFrame and use the first row as headers
heritability = pd.read_csv('../heritability_results/heritability_6.0_o.results', sep='\t', header=0)

In [124]:
heritability

Unnamed: 0,Category,Prop._SNPs,Prop._h2,Prop._h2_std_error,Enrichment,Enrichment_std_error,Enrichment_p,Coefficient,Coefficient_std_error,Coefficient_z-score
0,s_het_post_1L2_0,0.059852,0.071427,0.003285,1.193401,0.054891,0.0005541563,6.433868e-07,1.930821e-07,3.332193
1,s_het_post_2L2_0,0.048238,0.047677,0.002784,0.988373,0.057719,0.8401003,-3.339009e-08,1.785418e-07,-0.187016
2,s_het_post_3L2_0,0.046931,0.04808,0.002673,1.02448,0.056951,0.667244,4.656737e-08,1.748223e-07,0.26637
3,CM_baselineL2_0,96.18739,98.354003,2.303978,1.022525,0.023953,0.3483077,5.081071e-10,2.931434e-10,1.733306
4,base_baselineL2_0,1.004723,0.996075,0.000249,0.991393,0.000248,6.476398e-43,2.439328e-07,1.651348e-07,1.477173
5,Coding_UCSC.bed_baselineL2_0,0.032967,0.105084,0.020527,3.187512,0.622653,0.0006074708,-4.233955e-07,5.038133e-07,-0.840382
6,Coding_UCSC.extend.500.bed_baselineL2_0,0.104819,0.215415,0.024301,2.055104,0.231837,6.421192e-06,2.147366e-07,2.342712e-07,0.916615
7,CTCF_Hoffman.bed_baselineL2_0,0.027498,0.05326,0.014658,1.936874,0.533046,0.08052416,1.305538e-07,3.800785e-07,0.343492
8,CTCF_Hoffman.extend.500.bed_baselineL2_0,0.077203,0.111978,0.018824,1.450428,0.243826,0.06545816,-1.300055e-07,1.912808e-07,-0.679658
9,DGF_ENCODE.bed_baselineL2_0,0.186834,0.375972,0.037057,2.012333,0.198341,3.950596e-07,1.661245e-07,1.320662e-07,1.257888


In [117]:
heritability.sort_values('Coefficient', ascending = False)['Category'].unique()

array(['UTR_3_UCSC.bed_baselineL2_0',
       'TSS_Hoffman.extend.500.bed_baselineL2_0',
       'H3K4me3_Trynka.bed_baselineL2_0',
       'H3K9ac_peaks_Trynka.bed_baselineL2_0', 's_het_post_1L2_0',
       'Intron_UCSC.extend.500.bed_baselineL2_0',
       'H3K9ac_Trynka.bed_baselineL2_0',
       'SuperEnhancer_Hnisz.extend.500.bed_baselineL2_0',
       'UTR_5_UCSC.bed_baselineL2_0',
       'PromoterFlanking_Hoffman.extend.500.bed_baselineL2_0',
       'H3K27ac_Hnisz.extend.500.bed_baselineL2_0',
       'DHS_Trynka.bed_baselineL2_0',
       'H3K27ac_PGC2.extend.500.bed_baselineL2_0', 'base_baselineL2_0',
       'Coding_UCSC.extend.500.bed_baselineL2_0',
       'H3K4me1_peaks_Trynka.bed_baselineL2_0',
       'DGF_ENCODE.bed_baselineL2_0',
       'H3K4me1_Trynka.extend.500.bed_baselineL2_0',
       'CTCF_Hoffman.bed_baselineL2_0',
       'TFBS_ENCODE.extend.500.bed_baselineL2_0',
       'Promoter_UCSC.extend.500.bed_baselineL2_0',
       'Enhancer_Hoffman.extend.500.bed_baselineL2_0', 's_he

In [102]:
heritability_filename = '../heritability_results/heritability_c2.0.results'

In [145]:
def get_variance_df(heritability_filename, annot_df):
    extra_columns = []
    annot_columns = []
    heritability = pd.read_csv(heritability_filename, sep='\t', header=0)
    # Create a dictionary to map Category to Coefficient
    # Create a dictionary to map Category to Coefficient
    coefficients = dict(zip(heritability['Category'], heritability['Coefficient']))

    # Define the columns to be multiplied
    if heritability_filename[-13] =='4':
        extra_columns = ['density']
    elif heritability_filename[-13] =='5':
        extra_columns = ['Conserved_LindbladToh.extend.500.bed_baseline', 'Conserved_LindbladToh.bed_baseline']
    elif heritability_filename[-13] == '6':
        pre_extra_columns = pd.read_csv(heritability_filename, sep='\t', header=0)['Category'].unique().tolist()
        extra_columns = []
        for extra_col in pre_extra_columns:
            extra_columns.append(extra_col[:-4])
    if heritability_filename[-9] == 'o':
        print('o')
        annot_columns = [f's_het_post_{x}' for x in range(1, 4)]
    #elif heritability_filename[-11] == '3':
    elif heritability_filename[-9] == 't':
        print('t')
        annot_columns = [f'gene{x}_dist{y}' for x in range(1, 4) for y in range(1, 4)]
    #     elif heritability_filename[-10:-8] == '.1' or heritability_filename[-11] == '4':
    #         columns = list(annot_df.columns)[4:]
    else:
        print (str(heritability_filename[-9]))
        print('check heritability filetype')
        return
    if heritability_filename[-13] != '6':
        columns = extra_columns + annot_columns
    else:
        columns = extra_columns
    print(columns)
    # Multiply the specified columns by the corresponding coefficients
    for col in columns:
        heritability_col = f'{col}L2_0'
        if heritability_col in coefficients:
            annot_df[col] *= coefficients[heritability_col]
    # Columns to sum
    cols_to_sum = columns
    # Sum across the specified columns for each row
    annot_df['Variance Explained'] = annot_df[cols_to_sum].sum(axis=1)

    variance_df = annot_df[['CHR', 'BP', 'SNP','Variance Explained']]

    variance_df = variance_df.sort_values('Variance Explained', ascending=False)
    return(variance_df)
    

In [165]:
variance_df = get_variance_df('../heritability_results/heritability_4.0_t.results', tss_annots)

t
['density', 'gene1_dist1', 'gene1_dist2', 'gene1_dist3', 'gene2_dist1', 'gene2_dist2', 'gene2_dist3', 'gene3_dist1', 'gene3_dist2', 'gene3_dist3']


In [166]:
variance_df.to_csv('../heritability_results/variance_estimate_4.0_t.txt', sep = '\t')

In [146]:
variance_df = get_variance_df('../heritability_results/heritability_c4.0.results', tss_annots)

In [147]:
variance_df.to_csv('../heritability_results/variance_estimate_c4.0.txt', sep = '\t')

In [56]:
annot_df_copy

Unnamed: 0,CHR,BP,SNP,CM,gene1_dist1,gene1_dist2,gene1_dist3,gene2_dist1,gene2_dist2,gene2_dist3,gene3_dist1,gene3_dist2,gene3_dist3
0,17,302.0,17:302:T:TA,0.0,0,0,0,0,0,0,0,0,0
1,10,363275.0,10:363275:C:G,0.0,0,1,0,0,0,1,0,0,1
2,10,684171.0,10:684171:G:A,0.0,0,0,1,0,0,1,0,0,1
3,10,363361.0,10:363361:G:A,0.0,0,1,0,0,0,1,0,0,1
4,10,600507.0,10:600507:T:C,0.0,0,0,1,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13364298,1,249205823.0,1:249205823:G:A,0.0,0,0,0,0,0,0,0,0,0
13364299,1,249205806.0,1:249205806:T:C,0.0,0,0,0,0,0,0,0,0,0
13364300,1,249205774.0,1:249205774:T:C,0.0,0,0,0,0,0,0,0,0,0
13364301,1,249206890.0,1:249206890:T:A,0.0,0,0,0,0,0,0,0,0,0


In [68]:
# Create a dictionary to map Category to Coefficient
coefficients = dict(zip(heritability['Category'], heritability['Coefficient']))

# Define the columns to be multiplied
columns = [f'gene{x}_dist{y}' for x in range(1, 4) for y in range(1, 4)]

# Multiply the specified columns by the corresponding coefficients
for col in columns:
    heritability_col = f'{col}L2_0'
    if heritability_col in coefficients:
        annot_df_copy[col] *= coefficients[heritability_col]

annot_df_copy


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  annot_df_copy[col] *= coefficients[heritability_col]


Unnamed: 0,CHR,BP,SNP,CM,gene1_dist1,gene1_dist2,gene1_dist3,gene2_dist1,gene2_dist2,gene2_dist3,gene3_dist1,gene3_dist2,gene3_dist3
0,17,302.0,17:302:T:TA,0.0,0.0,0.000000e+00,-0.000000e+00,0.0,0.0,0.000000e+00,0.0,0.0,0.000000e+00
1,10,363275.0,10:363275:C:G,0.0,0.0,4.407541e-07,-0.000000e+00,0.0,0.0,1.650999e-08,0.0,0.0,6.719109e-08
2,10,684171.0,10:684171:G:A,0.0,0.0,0.000000e+00,-3.721883e-09,0.0,0.0,1.650999e-08,0.0,0.0,6.719109e-08
3,10,363361.0,10:363361:G:A,0.0,0.0,4.407541e-07,-0.000000e+00,0.0,0.0,1.650999e-08,0.0,0.0,6.719109e-08
4,10,600507.0,10:600507:T:C,0.0,0.0,0.000000e+00,-3.721883e-09,0.0,0.0,1.650999e-08,0.0,0.0,6.719109e-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13364298,1,249205823.0,1:249205823:G:A,0.0,0.0,0.000000e+00,-0.000000e+00,0.0,0.0,0.000000e+00,0.0,0.0,0.000000e+00
13364299,1,249205806.0,1:249205806:T:C,0.0,0.0,0.000000e+00,-0.000000e+00,0.0,0.0,0.000000e+00,0.0,0.0,0.000000e+00
13364300,1,249205774.0,1:249205774:T:C,0.0,0.0,0.000000e+00,-0.000000e+00,0.0,0.0,0.000000e+00,0.0,0.0,0.000000e+00
13364301,1,249206890.0,1:249206890:T:A,0.0,0.0,0.000000e+00,-0.000000e+00,0.0,0.0,0.000000e+00,0.0,0.0,0.000000e+00


In [69]:
# Columns to sum
cols_to_sum = ['gene1_dist1', 'gene1_dist2','gene1_dist3','gene2_dist1', 'gene2_dist2','gene2_dist3','gene3_dist1', 'gene3_dist2','gene3_dist3']

# Sum across the specified columns for each row
annot_df_copy['Variance Explained'] = annot_df_copy[cols_to_sum].sum(axis=1)

variance_df = annot_df_copy[['CHR', 'BP', 'SNP','Variance Explained']]

variance_df = variance_df.sort_values('Variance Explained', ascending=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  annot_df_copy['Variance Explained'] = annot_df_copy[cols_to_sum].sum(axis=1)


In [70]:
variance_df

Unnamed: 0,CHR,BP,SNP,Variance Explained
4649032,19,45587005.0,19:45587005:C:T,8.659450e-06
4649005,19,45584692.0,19:45584692:C:T,8.659450e-06
4649010,19,45587405.0,19:45587405:G:A,8.659450e-06
4649009,19,45588166.0,19:45588166:A:C,8.659450e-06
4649008,19,45587342.0,19:45587342:A:G,8.659450e-06
...,...,...,...,...
12296948,6,169732352.0,6:169732352:C:T,-3.721883e-09
12296946,6,169730781.0,6:169730781:G:A,-3.721883e-09
11402621,4,140778884.0,4:140778884:A:G,-3.721883e-09
12296940,6,169728356.0,6:169728356:G:C,-3.721883e-09


In [None]:
# # Load SNP to rsid mapping
# mapping_file = maf01file
# mapping_df = pd.read_csv(mapping_file, delim_whitespace=True)  # Adjust the delimiter if needed
# snp_to_rsid = dict(zip(mapping_df['SNP'], mapping_df['rsid']))

# # Path to the folder containing annot2 files
# annot2_folder = 'annot2'

# # Iterate through each file in the annot2 folder
# for file_name in os.listdir(annot2_folder):
#     if file_name.endswith('.annot'):  # Assuming files are text files
#         file_path = os.path.join(annot2_folder, file_name)
        
#         # Load the annot2 file
#         annot_df = pd.read_csv(file_path, delim_whitespace=True)  # Adjust the delimiter if needed
        
#         # Replace SNP with rsid
#         annot_df['SNP'] = annot_df['SNP'].map(snp_to_rsid)
        
#         # Save the updated file back to the folder
#         annot_df.to_csv(file_path, index=False, sep='\t')  # Adjust the delimiter if needed

# print("SNP columns updated in all files.")


In [40]:
# Load the GTF file into a pandas DataFrame and use the first row as headers
heritability = pd.read_csv('../heritability_results/heritability_6.0_o.results', sep='\t', header=0)

In [60]:
baseline = pd.read_csv('../heritability_results/heritability_c2.1.results', sep='\t', header=0)

In [69]:
baseline['Category'].tolist()

['s_het_post_1L2_0',
 's_het_post_2L2_0',
 's_het_post_3L2_0',
 'baseL2_0',
 'Coding_UCSC.bedL2_0',
 'Coding_UCSC.extend.500.bedL2_0',
 'Conserved_LindbladToh.bedL2_0',
 'Conserved_LindbladToh.extend.500.bedL2_0',
 'CTCF_Hoffman.bedL2_0',
 'CTCF_Hoffman.extend.500.bedL2_0',
 'DGF_ENCODE.bedL2_0',
 'DGF_ENCODE.extend.500.bedL2_0',
 'DHS_peaks_Trynka.bedL2_0',
 'DHS_Trynka.bedL2_0',
 'DHS_Trynka.extend.500.bedL2_0',
 'Enhancer_Andersson.bedL2_0',
 'Enhancer_Andersson.extend.500.bedL2_0',
 'Enhancer_Hoffman.bedL2_0',
 'Enhancer_Hoffman.extend.500.bedL2_0',
 'FetalDHS_Trynka.bedL2_0',
 'FetalDHS_Trynka.extend.500.bedL2_0',
 'H3K27ac_Hnisz.bedL2_0',
 'H3K27ac_Hnisz.extend.500.bedL2_0',
 'H3K27ac_PGC2.bedL2_0',
 'H3K27ac_PGC2.extend.500.bedL2_0',
 'H3K4me1_peaks_Trynka.bedL2_0',
 'H3K4me1_Trynka.bedL2_0',
 'H3K4me1_Trynka.extend.500.bedL2_0',
 'H3K4me3_peaks_Trynka.bedL2_0',
 'H3K4me3_Trynka.bedL2_0',
 'H3K4me3_Trynka.extend.500.bedL2_0',
 'H3K9ac_peaks_Trynka.bedL2_0',
 'H3K9ac_Trynka.bedL2

In [63]:
#baseline & density
baseline.iloc[[0,1,2,6,7]][['Category', 'Coefficient','Coefficient_std_error']]*10**6

Unnamed: 0,Category,Coefficient,Coefficient_std_error
0,s_het_post_1L2_0s_het_post_1L2_0s_het_post_1L2...,0.33793706,0.14132323
1,s_het_post_2L2_0s_het_post_2L2_0s_het_post_2L2...,0.10360321,0.13494184
2,s_het_post_3L2_0s_het_post_3L2_0s_het_post_3L2...,-0.02773133,0.10582349
6,Conserved_LindbladToh.bedL2_0Conserved_Lindbla...,1.01904449,0.19776306
7,Conserved_LindbladToh.extend.500.bedL2_0Conser...,0.08604895,0.05620351


In [59]:
#with conserved only
heritability[['Category', 'Coefficient','Coefficient_std_error']]*10**6

Unnamed: 0,Category,Coefficient,Coefficient_std_error
0,s_het_post_1L2_0s_het_post_1L2_0s_het_post_1L2...,0.80837334,0.20610612
1,s_het_post_2L2_0s_het_post_2L2_0s_het_post_2L2...,0.43287033,0.18844016
2,s_het_post_3L2_0s_het_post_3L2_0s_het_post_3L2...,0.39855011,0.15743239
3,Conserved_LindbladToh.bed_baselineL2_0Conserve...,0.94621207,0.25044343
4,Conserved_LindbladToh.extend.500.bed_baselineL...,0.53582423,0.06688004


In [58]:
without_conserved[['Category','Coefficient','Coefficient_std_error']]*10**6

Unnamed: 0,Category,Coefficient,Coefficient_std_error
0,s_het_post_1L2_0s_het_post_1L2_0s_het_post_1L2...,2.06479733,0.22912619
1,s_het_post_2L2_0s_het_post_2L2_0s_het_post_2L2...,1.58084332,0.24532432
2,s_het_post_3L2_0s_het_post_3L2_0s_het_post_3L2...,1.60958909,0.23555328


In [47]:
(3*10**9)*0.05

150000000.0

In [49]:
(3*10**9)*0.45/1000

1350000.0

In [None]:
1000*150000000

In [42]:
with_conserved = heritability

In [None]:
without_conserved

In [51]:
without_conserved = pd.read_csv('../heritability_results/heritability_c2.0.results', sep='\t', header=0)

In [12]:
with_density['Coefficient'][0:3]

0    9.030827e-07
1    2.748546e-07
2    1.811297e-07
Name: Coefficient, dtype: float64

In [21]:
no_density['Coefficient']

0   0.00000206
1   0.00000158
2   0.00000161
Name: Coefficient, dtype: float64

In [20]:
pd.set_option('display.float_format', '{:.8f}'.format)

In [22]:
with_density['Coefficient'][0:3] / no_density['Coefficient']

0   0.43737111
1   0.17386582
2   0.11253167
Name: Coefficient, dtype: float64

In [24]:
with_density = pd.read_csv('../heritability_results/heritability_4.0_t.results', sep='\t', header=0)

In [34]:
with_density

Unnamed: 0,Category,Prop._SNPs,Prop._h2,Prop._h2_std_error,Enrichment,Enrichment_std_error,Enrichment_p,Coefficient,Coefficient_std_error,Coefficient_z-score
0,gene1_dist1L2_0,0.03391431,0.0918533,0.0109555,2.70839397,0.32303475,3.2e-07,5.3e-07,1.4e-07,3.85861208
1,gene1_dist2L2_0,0.15657018,0.25801307,0.01668816,1.64790681,0.10658583,1e-08,3.1e-07,5e-08,5.75397112
2,gene1_dist3L2_0,0.23976937,0.12879264,0.0106241,0.53715217,0.04430967,0.0,1e-07,2e-08,4.30230092
3,gene2_dist1L2_0,0.0023607,0.00860497,0.00519374,3.64509563,2.20008614,0.23222492,2.8e-07,9.2e-07,0.29937628
4,gene2_dist2L2_0,0.06740534,0.15982544,0.0181578,2.37110957,0.26938218,9.7e-07,3.5e-07,1.4e-07,2.60823024
5,gene2_dist3L2_0,0.26129498,0.18290124,0.01153906,0.69997993,0.04416104,0.0,6e-08,2e-08,2.68719518
6,gene3_dist1L2_0,0.00035605,0.00240855,0.00232319,6.76471354,6.52496685,0.37925603,1.18e-06,2.69e-06,0.43716564
7,gene3_dist2L2_0,0.03671533,0.07998837,0.01603852,2.17860973,0.43683458,0.00737018,-1e-08,2.1e-07,-0.05198222
8,gene3_dist3L2_0,0.28122408,0.21577146,0.01204181,0.76725813,0.04281928,3.5e-07,4e-08,2e-08,1.64972468
9,densityL2_0,6.4846995,12.6240433,0.28497596,1.94674299,0.0439459,0.0,4e-08,0.0,13.22147989


In [38]:
with_density[['Coefficient','Coefficient_std_error']]*10**6

Unnamed: 0,Coefficient,Coefficient_std_error
0,0.52556495,0.1362057
1,0.31093267,0.05403793
2,0.09544755,0.02218523
3,0.27548142,0.92018452
4,0.35270927,0.13522934
5,0.06309617,0.02348031
6,1.17658097,2.69138479
7,-0.01070893,0.20601139
8,0.03508929,0.02126978
9,0.04301108,0.00325312


In [44]:
no_density = pd.read_csv('../heritability_results/heritability_c2.0.results', sep='\t', header=0)

In [45]:
no_density

Unnamed: 0,Category,Prop._SNPs,Prop._h2,Prop._h2_std_error,Enrichment,Enrichment_std_error,Enrichment_p,Coefficient,Coefficient_std_error,Coefficient_z-score
0,s_het_post_1L2_0,0.05985168,0.11924364,0.00491643,1.99231884,0.0821435,0.0,2.06e-06,2.3e-07,9.01161636
1,s_het_post_2L2_0,0.04823788,0.09103284,0.00499704,1.88716518,0.10359153,0.0,1.58e-06,2.5e-07,6.44389169
2,s_het_post_3L2_0,0.04693084,0.09016448,0.00521358,1.92122036,0.11109066,0.0,1.61e-06,2.4e-07,6.83322712


In [39]:
no_density[['Coefficient','Coefficient_std_error']]*10**6

Unnamed: 0,Coefficient,Coefficient_std_error
0,1.27537299,0.19079562
1,0.44075413,0.06124168
2,-0.00372188,0.03283316
3,1.22983361,0.75852346
4,0.42776651,0.10572554
5,0.01650999,0.02942806
6,6.15424317,3.29552139
7,0.97806195,0.19920835
8,0.06719109,0.03484077


In [33]:
with_density['Coefficient'][0:9] / no_density['Coefficient']

0     0.41208725
1     0.70545604
2   -25.64496233
3     0.22399894
4     0.82453688
5     3.82169665
6     0.19118207
7    -0.01094913
8     0.52223127
Name: Coefficient, dtype: float64