## Windows with z-scores for SNV density, and information about the enhancers that overlap those windows 

I used the following script to add enhancer information (target gene, enhancer-gene association, etc)
```
experiments/germline-model/chen-et-al-2022/add-enhancer-info.sh
```

writing the data to

```
${CONSTRAINT_TOOLS_DATA}/benchmark-genome-wide-predictions/chen-et-al-2022/mchale.kmerSizes.trainSets.noisy.overlapAmounts.cpg-islands.enhancer-info.bed
```

Genehancer paper: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5467550/

In [1]:
CONSTRAINT_TOOLS_DATA = '/scratch/ucgd/lustre-work/quinlan/data-shared/constraint-tools'

In [2]:
import pandas as pd 

pd.set_option('display.max_columns', 50)

TRAIN_SET_LABEL = 'noncoding'

def get_chen_mchale_features():
    filename = f'{CONSTRAINT_TOOLS_DATA}/benchmark-genome-wide-predictions/chen-et-al-2022/mchale.kmerSizes.trainSets.noisy.overlapAmounts.cpg-islands.enhancer-info.bed'
    df = pd.read_csv(filename, sep='\t')
    df['elite_enhancer'] = pd.to_numeric(df['elite_enhancer'], errors='coerce')
    df = df.rename(columns={'elite_enhancer': 'eliteness_of_enhancer'})
    df = df.drop(
      [
        f'N_mean_null_7_{TRAIN_SET_LABEL}', 
        f'N_variance_null_7_{TRAIN_SET_LABEL}', 
        f'N_bar_7_{TRAIN_SET_LABEL}',
        f'N_mean_null_3_{TRAIN_SET_LABEL}_noisy', 
        f'N_variance_null_3_{TRAIN_SET_LABEL}_noisy', 
        f'N_bar_3_{TRAIN_SET_LABEL}_noisy'
      ], 
      axis=1
    )
    for kmer_size in [1, 5]: 
      for noisy in ['', '_noisy']:
        df = df.drop(
          [
            f'N_mean_null_{kmer_size}_{TRAIN_SET_LABEL}{noisy}', 
            f'N_variance_null_{kmer_size}_{TRAIN_SET_LABEL}{noisy}', 
            f'N_bar_{kmer_size}_{TRAIN_SET_LABEL}{noisy}'
          ], 
          axis=1
        )
    return df

chen_mchale_zscores_features = get_chen_mchale_features()
chen_mchale_zscores_features

Unnamed: 0,chromosome,start,end,N_observed,N_bar_3_noncoding,N_mean_null_3_noncoding,N_variance_null_3_noncoding,cpg_island overlap,enhancer overlap,merged_exon overlap,window overlaps enhancer,window overlaps merged_exon,window overlaps cpg_island,new chen zscore,negative new chen zscore,enhancer_chromosome,enhancer_start,enhancer_end,GHid,enhancer_type,eliteness_of_enhancer,gene_targeted_by_enhancer|enhancer_gene_association_score|elite_enhancer_gene_association,window_enhancer_overlap_bps
0,chr1,1432000,1433000,260,-0.247615,263.528599,203.072610,,1000.0,,True,False,False,4.299894,-4.299894,chr1,1428218,1438733,GH01J001428,Promoter/Enhancer,1.0,CDK11B|9.84|0;ATAD3B|9.83|0;FAAP20|9.83|0;VWA1...,1000
1,chr1,1435000,1436000,275,-7.547130,387.939429,223.937877,440.0,1000.0,444.0,True,True,True,3.331645,-3.331645,chr1,1428218,1438733,GH01J001428,Promoter/Enhancer,1.0,CDK11B|9.84|0;ATAD3B|9.83|0;FAAP20|9.83|0;VWA1...,1000
2,chr1,1449000,1450000,225,0.258359,221.559138,177.372370,,,311.0,False,True,False,1.817267,-1.817267,.,-1,-1,.,.,,.,0
3,chr1,1450000,1451000,313,2.197534,281.968822,199.400099,,,758.0,False,True,False,1.365863,-1.365863,.,-1,-1,.,.,,.,0
4,chr1,1451000,1452000,293,2.367303,260.131776,192.772432,,,,False,False,False,0.666316,-0.666316,.,-1,-1,.,.,,.,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1826472,chr9,137275000,137276000,296,1.921453,268.932391,198.445131,,712.0,,True,False,False,2.687348,-2.687348,chr9,137273849,137275002,GH09J137273,Enhancer,1.0,NELFB|23.78|0;HSALNG0075385|0.64|0;TOR4A|0.64|0,2
1826473,chr9,137275000,137276000,296,1.921453,268.932391,198.445131,,712.0,,True,False,False,2.687348,-2.687348,chr9,137275290,137279215,GH09J137275,Promoter/Enhancer,1.0,EHMT1|762.48|1;TOR4A|750.64|1;SEC16A|11.98|0;S...,710
1826474,chr9,137282000,137283000,211,-1.919711,237.194349,186.184553,,572.0,641.0,True,True,False,-0.090791,0.090791,chr9,137282428,137283516,GH09J137282,Enhancer,0.0,TOR4A|4.86|1;EXD3|11.79|0;NOXA1|11.79|0;lnc-TO...,572
1826475,chr9,137290000,137291000,224,-0.682673,233.338243,187.113535,,,,False,False,False,2.162811,-2.162811,.,-1,-1,.,.,,.,0


## Create single record for each window

The code in this section is identical to the corresponding code in `Fig_2a.ipynb`. 

In [3]:
chen_mchale_zscores_features = chen_mchale_zscores_features.drop(['enhancer_chromosome', 'enhancer_start', 'enhancer_end'], axis=1)
chen_mchale_zscores_features

Unnamed: 0,chromosome,start,end,N_observed,N_bar_3_noncoding,N_mean_null_3_noncoding,N_variance_null_3_noncoding,cpg_island overlap,enhancer overlap,merged_exon overlap,window overlaps enhancer,window overlaps merged_exon,window overlaps cpg_island,new chen zscore,negative new chen zscore,GHid,enhancer_type,eliteness_of_enhancer,gene_targeted_by_enhancer|enhancer_gene_association_score|elite_enhancer_gene_association,window_enhancer_overlap_bps
0,chr1,1432000,1433000,260,-0.247615,263.528599,203.072610,,1000.0,,True,False,False,4.299894,-4.299894,GH01J001428,Promoter/Enhancer,1.0,CDK11B|9.84|0;ATAD3B|9.83|0;FAAP20|9.83|0;VWA1...,1000
1,chr1,1435000,1436000,275,-7.547130,387.939429,223.937877,440.0,1000.0,444.0,True,True,True,3.331645,-3.331645,GH01J001428,Promoter/Enhancer,1.0,CDK11B|9.84|0;ATAD3B|9.83|0;FAAP20|9.83|0;VWA1...,1000
2,chr1,1449000,1450000,225,0.258359,221.559138,177.372370,,,311.0,False,True,False,1.817267,-1.817267,.,.,,.,0
3,chr1,1450000,1451000,313,2.197534,281.968822,199.400099,,,758.0,False,True,False,1.365863,-1.365863,.,.,,.,0
4,chr1,1451000,1452000,293,2.367303,260.131776,192.772432,,,,False,False,False,0.666316,-0.666316,.,.,,.,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1826472,chr9,137275000,137276000,296,1.921453,268.932391,198.445131,,712.0,,True,False,False,2.687348,-2.687348,GH09J137273,Enhancer,1.0,NELFB|23.78|0;HSALNG0075385|0.64|0;TOR4A|0.64|0,2
1826473,chr9,137275000,137276000,296,1.921453,268.932391,198.445131,,712.0,,True,False,False,2.687348,-2.687348,GH09J137275,Promoter/Enhancer,1.0,EHMT1|762.48|1;TOR4A|750.64|1;SEC16A|11.98|0;S...,710
1826474,chr9,137282000,137283000,211,-1.919711,237.194349,186.184553,,572.0,641.0,True,True,False,-0.090791,0.090791,GH09J137282,Enhancer,0.0,TOR4A|4.86|1;EXD3|11.79|0;NOXA1|11.79|0;lnc-TO...,572
1826475,chr9,137290000,137291000,224,-0.682673,233.338243,187.113535,,,,False,False,False,2.162811,-2.162811,.,.,,.,0


In [4]:
from tqdm import tqdm
import numpy as np 

tqdm.pandas()

def add_enhancer_characteristic_fields(row): 
  enhancer_characteristics_heading = 'gene_targeted_by_enhancer|enhancer_gene_association_score|elite_enhancer_gene_association'
  if row[enhancer_characteristics_heading] == '.': 
    (
      row['optimal_gene_targeted_by_enhancer'], 
      row['optimal_enhancer_gene_association_score'], 
      row['eliteness_of_optimal_enhancer_gene_association'] 
    ) = '.', np.NaN, np.NaN
    return row 

  enhancer_characteristics = row[enhancer_characteristics_heading].split(';')
  optimal_enhancer_characteristic = ''
  max_enhancer_gene_association_score = 0
  for enhancer_characteristic in enhancer_characteristics: 
    enhancer_gene_association_score = enhancer_characteristic.split('|')[1]
    enhancer_gene_association_score = float(enhancer_gene_association_score)
    if enhancer_gene_association_score > max_enhancer_gene_association_score: 
      max_enhancer_gene_association_score = enhancer_gene_association_score
      optimal_enhancer_characteristic = enhancer_characteristic
  (
    row['optimal_gene_targeted_by_enhancer'], 
    row['optimal_enhancer_gene_association_score'], 
    row['eliteness_of_optimal_enhancer_gene_association'] 
  ) = optimal_enhancer_characteristic.split('|')
  row['optimal_enhancer_gene_association_score'] = float(row['optimal_enhancer_gene_association_score'])
  row['eliteness_of_optimal_enhancer_gene_association'] = int(row['eliteness_of_optimal_enhancer_gene_association'])
  return row 

def add_enhancer_characteristic_columns(df): 
  df = df.progress_apply(add_enhancer_characteristic_fields, axis=1)
  return df 

chen_mchale_zscores_features = add_enhancer_characteristic_columns(chen_mchale_zscores_features)
chen_mchale_zscores_features

100%|███████████████████████████████| 1826477/1826477 [33:42<00:00, 903.28it/s]


Unnamed: 0,chromosome,start,end,N_observed,N_bar_3_noncoding,N_mean_null_3_noncoding,N_variance_null_3_noncoding,cpg_island overlap,enhancer overlap,merged_exon overlap,window overlaps enhancer,window overlaps merged_exon,window overlaps cpg_island,new chen zscore,negative new chen zscore,GHid,enhancer_type,eliteness_of_enhancer,gene_targeted_by_enhancer|enhancer_gene_association_score|elite_enhancer_gene_association,window_enhancer_overlap_bps,optimal_gene_targeted_by_enhancer,optimal_enhancer_gene_association_score,eliteness_of_optimal_enhancer_gene_association
0,chr1,1432000,1433000,260,-0.247615,263.528599,203.072610,,1000.0,,True,False,False,4.299894,-4.299894,GH01J001428,Promoter/Enhancer,1.0,CDK11B|9.84|0;ATAD3B|9.83|0;FAAP20|9.83|0;VWA1...,1000,VWA1,758.12,1.0
1,chr1,1435000,1436000,275,-7.547130,387.939429,223.937877,440.0,1000.0,444.0,True,True,True,3.331645,-3.331645,GH01J001428,Promoter/Enhancer,1.0,CDK11B|9.84|0;ATAD3B|9.83|0;FAAP20|9.83|0;VWA1...,1000,VWA1,758.12,1.0
2,chr1,1449000,1450000,225,0.258359,221.559138,177.372370,,,311.0,False,True,False,1.817267,-1.817267,.,.,,.,0,.,,
3,chr1,1450000,1451000,313,2.197534,281.968822,199.400099,,,758.0,False,True,False,1.365863,-1.365863,.,.,,.,0,.,,
4,chr1,1451000,1452000,293,2.367303,260.131776,192.772432,,,,False,False,False,0.666316,-0.666316,.,.,,.,0,.,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1826472,chr9,137275000,137276000,296,1.921453,268.932391,198.445131,,712.0,,True,False,False,2.687348,-2.687348,GH09J137273,Enhancer,1.0,NELFB|23.78|0;HSALNG0075385|0.64|0;TOR4A|0.64|0,2,NELFB,23.78,0.0
1826473,chr9,137275000,137276000,296,1.921453,268.932391,198.445131,,712.0,,True,False,False,2.687348,-2.687348,GH09J137275,Promoter/Enhancer,1.0,EHMT1|762.48|1;TOR4A|750.64|1;SEC16A|11.98|0;S...,710,EHMT1,762.48,1.0
1826474,chr9,137282000,137283000,211,-1.919711,237.194349,186.184553,,572.0,641.0,True,True,False,-0.090791,0.090791,GH09J137282,Enhancer,0.0,TOR4A|4.86|1;EXD3|11.79|0;NOXA1|11.79|0;lnc-TO...,572,EXD3,11.79,0.0
1826475,chr9,137290000,137291000,224,-0.682673,233.338243,187.113535,,,,False,False,False,2.162811,-2.162811,.,.,,.,0,.,,


In [5]:
columns_to_group_by = list(chen_mchale_zscores_features.columns[:15])
columns_to_group_by

['chromosome',
 'start',
 'end',
 'N_observed',
 'N_bar_3_noncoding',
 'N_mean_null_3_noncoding',
 'N_variance_null_3_noncoding',
 'cpg_island overlap',
 'enhancer overlap',
 'merged_exon overlap',
 'window overlaps enhancer',
 'window overlaps merged_exon',
 'window overlaps cpg_island',
 'new chen zscore',
 'negative new chen zscore']

In [6]:
chen_mchale_zscores_features = ( 
    chen_mchale_zscores_features
    .groupby(columns_to_group_by, dropna=False)
    .agg({
      'eliteness_of_enhancer': ['max', 'count'],
      'window_enhancer_overlap_bps': ['sum'],
      'optimal_gene_targeted_by_enhancer': [lambda x: ','.join(list(x))],
      'optimal_enhancer_gene_association_score': ['max'],
      'eliteness_of_optimal_enhancer_gene_association': ['max']
    })
)
chen_mchale_zscores_features

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,eliteness_of_enhancer,eliteness_of_enhancer,window_enhancer_overlap_bps,optimal_gene_targeted_by_enhancer,optimal_enhancer_gene_association_score,eliteness_of_optimal_enhancer_gene_association
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,max,count,sum,<lambda>,max,max
chromosome,start,end,N_observed,N_bar_3_noncoding,N_mean_null_3_noncoding,N_variance_null_3_noncoding,cpg_island overlap,enhancer overlap,merged_exon overlap,window overlaps enhancer,window overlaps merged_exon,window overlaps cpg_island,new chen zscore,negative new chen zscore,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
chr1,1432000,1433000,260,-0.247615,263.528599,203.072610,,1000.0,,True,False,False,4.299894,-4.299894,1.0,1,1000,VWA1,758.12,1.0
chr1,1435000,1436000,275,-7.547130,387.939429,223.937877,440.0,1000.0,444.0,True,True,True,3.331645,-3.331645,1.0,1,1000,VWA1,758.12,1.0
chr1,1449000,1450000,225,0.258359,221.559138,177.372370,,,311.0,False,True,False,1.817267,-1.817267,,0,0,.,,
chr1,1450000,1451000,313,2.197534,281.968822,199.400099,,,758.0,False,True,False,1.365863,-1.365863,,0,0,.,,
chr1,1451000,1452000,293,2.367303,260.131776,192.772432,,,,False,False,False,0.666316,-0.666316,,0,0,.,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
chr9,137269000,137270000,210,-3.445370,257.657992,191.337298,,1000.0,,True,False,False,5.276351,-5.276351,1.0,1,1000,HSALNG0075385,750.64,1.0
chr9,137275000,137276000,296,1.921453,268.932391,198.445131,,712.0,,True,False,False,2.687348,-2.687348,1.0,2,712,"NELFB,EHMT1",762.48,1.0
chr9,137282000,137283000,211,-1.919711,237.194349,186.184553,,572.0,641.0,True,True,False,-0.090791,0.090791,0.0,1,572,EXD3,11.79,0.0
chr9,137290000,137291000,224,-0.682673,233.338243,187.113535,,,,False,False,False,2.162811,-2.162811,,0,0,.,,


In [7]:
chen_mchale_zscores_features = (
    chen_mchale_zscores_features
    .reset_index()
)
chen_mchale_zscores_features.columns = [
  ' '.join(column).rstrip() 
  for column in chen_mchale_zscores_features.columns.values
]
chen_mchale_zscores_features = chen_mchale_zscores_features.rename(columns={'eliteness_of_enhancer count': 'enhancer count'})
chen_mchale_zscores_features

Unnamed: 0,chromosome,start,end,N_observed,N_bar_3_noncoding,N_mean_null_3_noncoding,N_variance_null_3_noncoding,cpg_island overlap,enhancer overlap,merged_exon overlap,window overlaps enhancer,window overlaps merged_exon,window overlaps cpg_island,new chen zscore,negative new chen zscore,eliteness_of_enhancer max,enhancer count,window_enhancer_overlap_bps sum,optimal_gene_targeted_by_enhancer <lambda>,optimal_enhancer_gene_association_score max,eliteness_of_optimal_enhancer_gene_association max
0,chr1,1432000,1433000,260,-0.247615,263.528599,203.072610,,1000.0,,True,False,False,4.299894,-4.299894,1.0,1,1000,VWA1,758.12,1.0
1,chr1,1435000,1436000,275,-7.547130,387.939429,223.937877,440.0,1000.0,444.0,True,True,True,3.331645,-3.331645,1.0,1,1000,VWA1,758.12,1.0
2,chr1,1449000,1450000,225,0.258359,221.559138,177.372370,,,311.0,False,True,False,1.817267,-1.817267,,0,0,.,,
3,chr1,1450000,1451000,313,2.197534,281.968822,199.400099,,,758.0,False,True,False,1.365863,-1.365863,,0,0,.,,
4,chr1,1451000,1452000,293,2.367303,260.131776,192.772432,,,,False,False,False,0.666316,-0.666316,,0,0,.,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1785815,chr9,137269000,137270000,210,-3.445370,257.657992,191.337298,,1000.0,,True,False,False,5.276351,-5.276351,1.0,1,1000,HSALNG0075385,750.64,1.0
1785816,chr9,137275000,137276000,296,1.921453,268.932391,198.445131,,712.0,,True,False,False,2.687348,-2.687348,1.0,2,712,"NELFB,EHMT1",762.48,1.0
1785817,chr9,137282000,137283000,211,-1.919711,237.194349,186.184553,,572.0,641.0,True,True,False,-0.090791,0.090791,0.0,1,572,EXD3,11.79,0.0
1785818,chr9,137290000,137291000,224,-0.682673,233.338243,187.113535,,,,False,False,False,2.162811,-2.162811,,0,0,.,,


## Stratify windows by whether they overlap exons or not

In [8]:
def filter_df(
    feature_to_stratify_by,
    window_overlaps_stratify_feature
): 
    df = chen_mchale_zscores_features
    df = df[
        (df[feature_to_stratify_by] == window_overlaps_stratify_feature) 
    ]
    return df


In [9]:
chen_mchale_zscores_features_noncoding = filter_df(
    feature_to_stratify_by = 'window overlaps merged_exon',
    window_overlaps_stratify_feature = False,
)
chen_mchale_zscores_features_coding = filter_df(
    feature_to_stratify_by = 'window overlaps merged_exon',
    window_overlaps_stratify_feature = True,
)

## McHale’s model enriches for stronger enhancer-gene associations

In [15]:
import warnings

def compute_array_of_quantiles():
    array_of_quantiles = [0.00, 0.005, 0.01, 0.02, 0.05, 0.10, 0.25, 0.5, 0.75, 0.90, 0.95, 0.98, 0.99, 0.995, 1.00]
    # array_of_quantiles = [0.00, 0.01, 0.02, 0.05, 0.10, 0.25, 0.5, 0.75, 0.90, 0.95, 0.98, 0.99, 1.00]
    starts = array_of_quantiles[:-1]
    ends = array_of_quantiles[1:]
    quantile_labels = [f'{start} - {end}' for start, end in zip(starts, ends)]
    return array_of_quantiles, quantile_labels

def label_windows_with_score_quantiles_core(df, score): 
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")        

        array_of_quantiles, quantile_labels = compute_array_of_quantiles()

        df[f'{score} quantile'], bins = pd.qcut(
            df[score],
            q = array_of_quantiles, 
            labels = quantile_labels, 
            retbins = True,
    #         duplicates='drop'
        )

        return df
    
import seaborn as sns
sns.set(font_scale = 1.5)

import plotly.graph_objects as go

def compute_y_vs_quantile(df, score, y): 
    df = label_windows_with_score_quantiles_core(df, score)

    groups = df.groupby([f'{score} quantile'])
    aggregated = groups.agg({
        y: ['mean', 'std', 'count'],
        score: ['mean', 'std', 'count']
    })

    quantiles = np.array(aggregated.index)
    mean_y = np.array(aggregated[(y, 'mean')])
    hover_data = aggregated.to_dict(orient='records')

    return quantiles, mean_y, hover_data

def create_text(hover_datum, score):
    mean_score = hover_datum[(score, 'mean')]
    std_score = hover_datum[(score, 'std')]
    number_windows = hover_datum[(score, 'count')]
    return (
        f'{score} mean: {mean_score}<br>'
        f'{score} std: {std_score}<br>' 
        f'number of windows: {number_windows}<br>' 
    )

def plot_enrichment(df, y, ylabel): 
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")    
    
        sns.set(font_scale = 1.6)
        fig = go.Figure() # type: ignore

        scores_markers = [
            ('negative new chen zscore', 'square'),
            (f'N_bar_3_{TRAIN_SET_LABEL}', 'circle')
        ]

        for (score, marker) in tqdm(scores_markers, desc=f'plot_enrichment'): 
            (
                quantiles, 
                mean_y,
                hover_data 
            ) = compute_y_vs_quantile(df, score, y)
            fig.add_trace(
                go.Scatter(
                    x = quantiles, 
                    y = mean_y,
                    mode = 'lines+markers',
                    marker = {
                        'symbol': marker,
                    },
                    name = score,
                    hovertemplate = (
                        '%{text}'
                    ),
                    text = [create_text(hover_datum, score) for hover_datum in hover_data],
                ) # type: ignore
            )

        fig.update_xaxes( # type: ignore
            title = 'quantile of window score',
#             type = "log", 
#             exponentformat = 'power'
        )
        fig.update_yaxes( # type: ignore
            title = ylabel,
            range=None, 
            # type='log',
            # exponentformat = 'power'
        )    
        fig.update_layout(
            height = 800, 
            width = 1600,
            font = dict(
                size = 20,
            )        
        )
        fig.update_annotations(font_size=24) # type: ignore
        fig.show()

plot_enrichment(
    chen_mchale_zscores_features_noncoding, 
    y='eliteness_of_enhancer max', 
    ylabel='fraction of enhancers that are elite'
)

plot_enrichment: 100%|███████████████████████████| 2/2 [00:00<00:00,  2.67it/s]


In [16]:
plot_enrichment(
    chen_mchale_zscores_features_noncoding, 
    y='optimal_enhancer_gene_association_score max', 
    ylabel='enhancer-gene association'
)

plot_enrichment: 100%|███████████████████████████| 2/2 [00:00<00:00,  2.57it/s]


In [17]:
plot_enrichment(
    chen_mchale_zscores_features_noncoding, 
    y='eliteness_of_optimal_enhancer_gene_association max', 
    ylabel='fraction of enhancer-gene associations that are elite'
)

plot_enrichment: 100%|███████████████████████████| 2/2 [00:00<00:00,  2.58it/s]


## Save data to disk

In [13]:
out_filename = f'{CONSTRAINT_TOOLS_DATA}/benchmark-genome-wide-predictions/chen-et-al-2022/enhancer-characteristics-enrichment.bed'
chen_mchale_zscores_features.to_csv(                                                                                
    out_filename,
    sep = '\t',
    index = False
)
print(f'Wrote data to {out_filename}')

Wrote data to /scratch/ucgd/lustre-work/quinlan/data-shared/constraint-tools/benchmark-genome-wide-predictions/chen-et-al-2022/enhancer-characteristics-enrichment.bed
