In [1]:
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
import os
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from tqdm import tqdm
sns.set_palette('Dark2')
sns.set_style({'axes.axisbelow': True, 'axes.edgecolor': '.15', 'axes.facecolor': 'white',
               'axes.grid': True, 'axes.labelcolor': '.15', 'axes.linewidth': 1.25, 
               'figure.facecolor': 'white', 'font.family': ['sans-serif'], 'grid.color': '.15',
               'grid.linestyle': ':', 'grid.alpha': .5, 'image.cmap': 'Greys', 
               'legend.frameon': False, 'legend.numpoints': 1, 'legend.scatterpoints': 1,
               'lines.solid_capstyle': 'round', 'axes.spines.right': False, 'axes.spines.top': False,  
               'text.color': '.15',  'xtick.top': False, 'ytick.right': False, 'xtick.color': '.15',
               'xtick.direction': 'out', 'xtick.major.size': 6, 'xtick.minor.size': 3,
               'ytick.color': '.15', 'ytick.direction': 'out', 'ytick.major.size': 6,'ytick.minor.size': 3})
sns.set_context('paper')

#http://phyletica.org/matplotlib-fonts/
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

In [2]:
import matplotlib.gridspec as gridspec

In [3]:
from snapanalysis.config import OUTPUT_DIRECTORY as MAIN_OUTPUT_DIRECTORY
OUTPUT_DIRECTORY = os.path.join(MAIN_OUTPUT_DIRECTORY, 'ptm-response')

if not os.path.isdir(OUTPUT_DIRECTORY):
    os.makedirs(OUTPUT_DIRECTORY)
    
OUTPUT_DIRECTORY_BARPLOTS = os.path.join(OUTPUT_DIRECTORY, 'barplots')

if not os.path.isdir(OUTPUT_DIRECTORY_BARPLOTS):
    os.makedirs(OUTPUT_DIRECTORY_BARPLOTS)

In [4]:
from snapanalysis.models.ptm_response.main import OUTPUT_FILE as PTM_RESPONSE_FILE
with pd.HDFStore(PTM_RESPONSE_FILE, 'r') as store:
    
    enrichment_complexes = store['ptm_stats/joint_camera_complexes']

In [5]:
import importlib
import helpers
importlib.reload(helpers)
from helpers import *

In [6]:
import urllib

print('Informative URIs for predictors:')
print()

for predictor in PREDICTOR_ORDER:
    
    uri = predictor_sorted_uri(predictor)
    
    print(f'{predictor:>20}: {uri}')

Informative URIs for predictors:

               H2A.Z: http://ife-snap-data/proteins?pdorder=H36,H26,H37,H43,H45&showsimilar=false&noclusterproteins=true
                H3ac: http://ife-snap-data/proteins?pdorder=H05,H10,H02,H12,H16,H20,H21,H25,H22,H26,H41,H43&showsimilar=false&noclusterproteins=true
             H3K4me1: http://ife-snap-data/proteins?pdorder=H05,H03,H06,H13,H16,H30,H31,H15,H09&showsimilar=false&noclusterproteins=true
             H3K4me3: http://ife-snap-data/proteins?pdorder=H02,H13,H21,H15,H11,H14,H22,H40,H41&showsimilar=false&noclusterproteins=true
         H3K9acK14ac: http://ife-snap-data/proteins?pdorder=H15,H05,H09,H02,H11,H16,H18,H06,H19,H21,H23,H22,H24,H41,H42&showsimilar=false&noclusterproteins=true
             H3K9me2: http://ife-snap-data/proteins?pdorder=H47,H27M,H47M&showsimilar=false&noclusterproteins=true
             H3K9me3: http://ife-snap-data/proteins?pdorder=H01,H27M,H01M,H03,H04,H03M,H04M&showsimilar=false&noclusterproteins=true
             

In [7]:
color_me1 = '#BBE1BB'
color_me2 = '#9FBA7D'
color_me3 = '#6F9979'
color_ac = '#6484A2'

palette = {
     'H2A.Z': '#786D9B',
     'H3K4me1': color_me1,
     'H3K4me3': color_me3,
     'H3K9acK14ac': color_ac,
     'H3K9me2': color_me2,
     'H3K9me3': color_me3,
     'H3K27ac': color_ac,
     'H3K27me2': color_me2,
     'H3K27me3': color_me3,
     'H3ac': color_ac,
     'H4K16ac': color_ac,
     'H4K20me2': color_me2,
     'H4K20me3': color_me3,
     'H4ac': color_ac,
     'DNA Methylation': '#85B3B2',
}




In [8]:
GROUPED_ORDER = [
    ['H2A.Z', 'DNA Methylation'],
    
    ['H3K4me1', 'H3K4me3'],
    ['H3ac', 'H3K9acK14ac', 'H3K27ac'],
    
    ['H3K9me2', 'H3K9me3'],
    ['H3K27me2', 'H3K27me3'],
    
    ['H4ac', 'H4K16ac'],
    ['H4K20me2', 'H4K20me3']
]

In [9]:
limma_results = load_limma_data()

In [10]:
complexes_to_plot = sorted(enrichment_complexes.reset_index().query('significant')['Complex'].unique())

In [11]:
enrichment_complexes

Unnamed: 0_level_0,Unnamed: 1_level_0,NGenes,Direction,PValue,FDR,mean_logFC,mean_CI.L,mean_CI.R,mean_AveExpr,mean_t,mean_P.Value,...,mean_confint_half_width,mean_neg_log10_p,mean_neg_log10_p_adjust,mean_significant,mean_significant_and_large_fc,mean_proteins,empirical_median,empirical_median_ci_left,empirical_median_ci_right,significant
predictor,Complex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
H3K9me3,ncPRC1.6 (exclusive subunits),6.0,Up,3.348327e-09,4.386309e-07,0.849107,0.634297,1.06392,0.335809,11.2397,0.0316113,...,0.21481,5.90205,4.16808,0.833333,0.5,E2F6/L3MBTL2/MAX (2)/MGA (1)/MGA (2)/PCGF6,0.957673,0.852877,1.080185,True
H3K9me3,ncPRC1.6,15.0,Up,4.996852e-08,3.272938e-06,0.435135,0.238182,0.632088,0.0493912,6.22172,0.114882,...,0.196953,3.58803,2.40837,0.466667,0.2,E2F6/HDAC1/HDAC2/L3MBTL2/MAX (1)/MAX (2)/MGA (...,0.236092,0.183013,0.300212,True
H3K9me3,SIN3A/B (exclusive subunits),9.0,Up,1.235410e-06,5.394624e-05,0.229611,0.108907,0.350316,0.199731,4.52591,0.011284,...,0.120704,3.10023,1.83574,0.444444,0,ARID4A/ARID4B/BRMS1/BRMS1L/ING1/ING2/SAP30L/SI...,0.227357,0.178071,0.279933,True
H3K9me3,PRC1,9.0,Up,5.527297e-06,1.780781e-04,0.251455,0.0870596,0.41585,0.952773,4.84648,0.108419,...,0.164395,3.21306,2.02485,0.555556,0,CBX2/CBX4/CBX8/COMMD3-BMI1/BMI1/PHC2/PHC3/RING...,0.256866,0.203880,0.319279,True
H3K9me3,CAF-1,3.0,Up,7.327983e-06,1.780781e-04,1.86441,1.51666,2.21216,1.17732,15.0551,0.00286562,...,0.347753,6.57454,4.69765,0.666667,0.666667,CHAF1A/CHAF1B/RBBP4,2.300868,1.630583,2.946132,True
H3K9me3,BHC,8.0,Up,8.156250e-06,1.780781e-04,0.412906,0.25004,0.575772,0.221205,8.01459,0.171096,...,0.162866,3.98686,2.82245,0.375,0.125,GTF2I/HDAC1/HDAC2/HMG20B/KDM1A/PHF21A/ZMYM2/ZMYM3,0.225189,0.133913,0.335547,True
H3K9me3,HUSH,4.0,Up,1.332954e-05,2.494529e-04,0.878475,0.615936,1.14101,0.609998,7.32464,0.000987726,...,0.262538,4.67905,3.10424,0.75,0.25,MPHOSPH8/PPHLN1 (1)/PPHLN1 (3)/TASOR,0.697427,0.534392,0.870787,True
H3K9me3,SIN3A/B,18.0,Up,4.177514e-05,6.840679e-04,0.176159,0.0378029,0.314515,0.260388,3.04936,0.074532,...,0.138356,2.41184,1.38515,0.222222,0,ARID4A/ARID4B/BRMS1/BRMS1L/HDAC1/HDAC2/ING1/IN...,0.203865,0.162103,0.244236,True
H3K9me3,KAP1-SETDB1-DNMT1-ZNF304,3.0,Up,1.007542e-04,1.466533e-03,1.23289,0.92361,1.54216,0.48333,9.84516,0.00345532,...,0.309276,5.23366,3.58788,0.666667,0.666667,DNMT1 (1)/DNMT1 (2)/TRIM28,1.042827,0.600757,1.486559,True
H3K9me3,SF3B,7.0,Down,1.128506e-04,1.478343e-03,-0.105004,-0.245177,0.0351686,0.033141,-2.02126,0.116892,...,0.140173,1.20209,0.611724,0,0,PHF5A/SF3B1/SF3B2/SF3B3/SF3B4/SF3B5/SF3B6,-0.078086,-0.117538,-0.041204,True


In [12]:
import re

complexes_to_plot = sorted(enrichment_complexes.reset_index().query('significant')['Complex'].unique())

max_proteins = 25

for complex_ in tqdm(complexes_to_plot):
    
    # Title of plot
    name = complex_
    complex_filename = re.sub('[^a-zA-Z0-9]+', '_', complex_).lower()
    filename = f'barplot-{complex_filename}.pdf'
    
    print(complex_)
    
    proteins = members_of(complex_)
    
    if len(proteins) > max_proteins:
        print(f'Skipping {complex_} because it has more than {max_proteins} proteins')
        continue
        
    df = limma_results.loc(axis=0)[:, proteins]
    
    if (df.groupby(level='predictor').size() > 2).sum() < 10:
        print(f'Skipping {complex_} because it does not match quality criteria')
        continue
   

    # PLOT
    fig = plt.figure(figsize=(5, 6.45), 
                     constrained_layout=True)

    n_groups = len(GROUPED_ORDER)

    spec = gridspec.GridSpec(ncols=1,
                             nrows=n_groups, 
                             figure=fig,
                             height_ratios=[len(x) for x in GROUPED_ORDER])

    ax = None
    for i, predictor_subgroup in enumerate(GROUPED_ORDER):
        ax = fig.add_subplot(spec[i, :], sharex=ax)

        medians = []
        ci = []


        subgroup_df = df.loc[predictor_subgroup]
        
        significant_predictors = set()

        for predictor in predictor_subgroup:
            
            try:
                subdf = subgroup_df.loc[predictor]
                subdf_enrichment = enrichment_complexes.loc[predictor, complex_]
            except KeyError:
                medians.append(0)
                ci.append([0, 0])
                
                continue
            
            
            mu = subdf_enrichment['empirical_median']
            ci_low = subdf_enrichment['empirical_median_ci_left']
            ci_high = subdf_enrichment['empirical_median_ci_right']
            
            if subdf_enrichment['significant']:
                significant_predictors.add(predictor)
            
            err_low = mu - ci_low
            err_high = ci_high - mu

            medians.append(mu)
            ci.append([err_low, err_high])


        for y, (predictor, mu, ci) in enumerate(zip(predictor_subgroup, medians, ci)):
            ax.barh(y, mu, 
                    xerr=np.atleast_2d(ci).T,
                    color=palette[predictor],
                    error_kw=dict(elinewidth=3, capsize=5, capthick=2),
                    edgecolor='black', linewidth=1.0)
        
     
        for significant, subsubgroup_df in subgroup_df.groupby('significant'):
        
            sns.stripplot(y='predictor', x='logFC',
                          hue='predictor',
                          hue_order=predictor_subgroup,
                          dodge=False,
                          palette=palette,
                          order=predictor_subgroup,
                          size=5,
                          alpha=.8,
                          edgecolor='black',
                          linewidth=1.0,
                          jitter=0.25,
                          marker='o' if significant else 'X',
                          ax=ax, data=subsubgroup_df.reset_index())

    #     ax.axvline(0, color='#666666')
        ax.legend_.set_visible(False)

        ax.set_ylabel('')
        
        ax.yaxis.set_tick_params(length=0, labelsize=10)
        ax.xaxis.set_tick_params(length=0)
        
        for predictor, tick in zip(predictor_subgroup, ax.get_yticklabels()):
            if predictor in significant_predictors:
                tick.set_fontweight('bold')

        if i != n_groups-1:
            sns.despine(left=True, bottom=True, offset=5, ax=ax)
            for tic in ax.xaxis.get_major_ticks():
                tic.tick1On = tic.tick2On = False
                tic.label1On = tic.label2On = False
            ax.set_xlabel('')
        else:
            sns.despine(left=True, bottom=False, offset=5, ax=ax)
            ax.set_xlabel('Change to H/L ratio attributed to modification', 
                          fontsize=10)

        if i == 0:
            ax.set_title(name, fontsize=10)


    fig.savefig(os.path.join(OUTPUT_DIRECTORY_BARPLOTS, filename), 
                bbox_inches='tight')
    plt.close()
#     break


  0%|          | 0/96 [00:00<?, ?it/s]

40S Ribosomal subunit
Skipping 40S Ribosomal subunit because it has more than 25 proteins
AP-2 dimers


  2%|▏         | 2/96 [00:02<02:16,  1.45s/it]

APC/C


  3%|▎         | 3/96 [00:05<02:45,  1.78s/it]

ASAP


  4%|▍         | 4/96 [00:07<02:44,  1.79s/it]

ATAC


  5%|▌         | 5/96 [00:08<02:36,  1.71s/it]

ATAC (exclusive subunits)


  6%|▋         | 6/96 [00:10<02:31,  1.68s/it]

B-MYB-MuvB


  7%|▋         | 7/96 [00:11<02:26,  1.64s/it]

B-MYB-MuvB (exclusive subunits)


  8%|▊         | 8/96 [00:13<02:25,  1.66s/it]

BAF


  9%|▉         | 9/96 [00:15<02:24,  1.66s/it]

BHC


 10%|█         | 10/96 [00:16<02:23,  1.66s/it]

BRCA1-A


 11%|█▏        | 11/96 [00:18<02:22,  1.68s/it]

BRCC


 12%|█▎        | 12/96 [00:20<02:14,  1.61s/it]

BRISC


 14%|█▎        | 13/96 [00:21<02:08,  1.54s/it]

Base Excision Repair Complex (exclusive subunits)


 15%|█▍        | 14/96 [00:23<02:06,  1.54s/it]

CAF-1


 16%|█▌        | 15/96 [00:24<02:03,  1.52s/it]

CHRAC


 17%|█▋        | 16/96 [00:26<02:04,  1.56s/it]

CtBP


 18%|█▊        | 17/96 [00:27<02:02,  1.56s/it]

EBAFA/B


 19%|█▉        | 18/96 [00:29<02:02,  1.57s/it]

EMSY


 20%|█▉        | 19/96 [00:31<02:10,  1.69s/it]

EMSY (exclusive subunits)


 21%|██        | 20/96 [00:34<02:43,  2.15s/it]

Fanconi Anemia


 22%|██▏       | 21/96 [00:36<02:40,  2.13s/it]

GBAF


 23%|██▎       | 22/96 [00:38<02:31,  2.05s/it]

GBAF (exclusive subunits)
Skipping GBAF (exclusive subunits) because it does not match quality criteria
HBO1


 25%|██▌       | 24/96 [00:40<02:00,  1.68s/it]

HBO1 (exclusive subunits)


 26%|██▌       | 25/96 [00:41<01:55,  1.63s/it]

HDAC-ELMSAN1-DNTTIP1


 27%|██▋       | 26/96 [00:43<01:53,  1.62s/it]

HUSH


 28%|██▊       | 27/96 [00:44<01:53,  1.64s/it]

INO80


 29%|██▉       | 28/96 [00:46<01:50,  1.62s/it]

INO80 (exclusive subunits)


 30%|███       | 29/96 [00:48<01:47,  1.61s/it]

Integrator


 31%|███▏      | 30/96 [00:49<01:49,  1.66s/it]

Integrator (exclusive subunits)


 32%|███▏      | 31/96 [00:51<01:44,  1.61s/it]

KAP1-HP1


 33%|███▎      | 32/96 [00:52<01:42,  1.61s/it]

KAP1-SETDB1-DNMT1-ZNF304
Skipping KAP1-SETDB1-DNMT1-ZNF304 because it does not match quality criteria
LSD-CoREST


 35%|███▌      | 34/96 [00:54<01:25,  1.38s/it]

MLL1/2


 36%|███▋      | 35/96 [00:56<01:29,  1.47s/it]

MLL1/2 (exclusive subunits)


 38%|███▊      | 36/96 [00:57<01:28,  1.48s/it]

MLL3/4


 39%|███▊      | 37/96 [00:59<01:30,  1.54s/it]

MLL3/4 (exclusive subunits)


 40%|███▉      | 38/96 [01:01<01:31,  1.57s/it]

MOZ/MORF


 41%|████      | 39/96 [01:03<01:38,  1.72s/it]

MRN


 42%|████▏     | 40/96 [01:04<01:33,  1.66s/it]

MSL


 43%|████▎     | 41/96 [01:06<01:28,  1.61s/it]

Mediator


 44%|████▍     | 42/96 [01:07<01:25,  1.59s/it]

N-CoR1


 45%|████▍     | 43/96 [01:09<01:24,  1.59s/it]

N-CoR2


 46%|████▌     | 44/96 [01:10<01:22,  1.59s/it]

NSL


 47%|████▋     | 45/96 [01:12<01:23,  1.63s/it]

NSL (exclusive subunits)


 48%|████▊     | 46/96 [01:14<01:21,  1.63s/it]

NuA4


 49%|████▉     | 47/96 [01:15<01:20,  1.64s/it]

NuA4 (exclusive subunits)


 50%|█████     | 48/96 [01:17<01:23,  1.74s/it]

NuRD


 51%|█████     | 49/96 [01:19<01:21,  1.73s/it]

NuRD (exclusive subunits)


 52%|█████▏    | 50/96 [01:21<01:24,  1.84s/it]

NuRF


 53%|█████▎    | 51/96 [01:23<01:25,  1.90s/it]

Nuclear cap-binding complex


 54%|█████▍    | 52/96 [01:28<01:58,  2.69s/it]

ORC


 55%|█████▌    | 53/96 [01:30<01:45,  2.46s/it]

PBAF


 56%|█████▋    | 54/96 [01:32<01:37,  2.33s/it]

PBAF (exclusive subunits)


 57%|█████▋    | 55/96 [01:34<01:33,  2.27s/it]

PCAF


 58%|█████▊    | 56/96 [01:36<01:25,  2.13s/it]

PR-DUB


 59%|█████▉    | 57/96 [01:37<01:19,  2.04s/it]

PRC1


 60%|██████    | 58/96 [01:40<01:17,  2.03s/it]

PRC1 (exclusive subunits)


 61%|██████▏   | 59/96 [01:41<01:11,  1.93s/it]

PRC2.1


 62%|██████▎   | 60/96 [01:43<01:06,  1.84s/it]

PRC2.1 (exclusive subunits)


 64%|██████▎   | 61/96 [01:45<01:04,  1.85s/it]

PRC2.2


 65%|██████▍   | 62/96 [01:46<01:01,  1.82s/it]

PSAP


 66%|██████▌   | 63/96 [01:49<01:03,  1.92s/it]

Paf1C


 67%|██████▋   | 64/96 [01:50<00:56,  1.76s/it]

Piccolo NuA4


 68%|██████▊   | 65/96 [01:52<00:52,  1.69s/it]

RFC


 69%|██████▉   | 66/96 [01:53<00:49,  1.66s/it]

RMI/BLM


 70%|██████▉   | 67/96 [01:55<00:48,  1.66s/it]

RNA Pol II


 71%|███████   | 68/96 [01:57<00:47,  1.69s/it]

RNA Pol II (exclusive subunits)


 72%|███████▏  | 69/96 [01:58<00:45,  1.69s/it]

RNA Pol III


 73%|███████▎  | 70/96 [02:00<00:43,  1.67s/it]

RPA


 74%|███████▍  | 71/96 [02:02<00:45,  1.84s/it]

SAGA


 75%|███████▌  | 72/96 [02:04<00:43,  1.81s/it]

SCF


 76%|███████▌  | 73/96 [02:06<00:40,  1.78s/it]

SET1A/B


 77%|███████▋  | 74/96 [02:07<00:38,  1.77s/it]

SF3B


 78%|███████▊  | 75/96 [02:09<00:35,  1.71s/it]

SIN3A/B


 79%|███████▉  | 76/96 [02:11<00:34,  1.72s/it]

SIN3A/B (exclusive subunits)


 80%|████████  | 77/96 [02:12<00:32,  1.69s/it]

SLF1/2-RAD18


 81%|████████▏ | 78/96 [02:14<00:29,  1.64s/it]

SNARP


 82%|████████▏ | 79/96 [02:16<00:31,  1.85s/it]

SRCAP


 83%|████████▎ | 80/96 [02:18<00:28,  1.76s/it]

Shelterin


 84%|████████▍ | 81/96 [02:19<00:25,  1.68s/it]

Shelterin (exclusive subunits)


 85%|████████▌ | 82/96 [02:21<00:23,  1.66s/it]

TFIID


 86%|████████▋ | 83/96 [02:23<00:22,  1.71s/it]

TFTC


 88%|████████▊ | 84/96 [02:24<00:20,  1.71s/it]

THO


 89%|████████▊ | 85/96 [02:26<00:17,  1.64s/it]

TREX


 90%|████████▉ | 86/96 [02:27<00:15,  1.58s/it]

XPC


 91%|█████████ | 87/96 [02:29<00:13,  1.54s/it]

XPC (exclusive subunits)


 92%|█████████▏| 88/96 [02:30<00:12,  1.50s/it]

ZAP3


 93%|█████████▎| 89/96 [02:32<00:10,  1.51s/it]

ZAP3 (exclusive subunits)


 94%|█████████▍| 90/96 [02:33<00:09,  1.57s/it]

ncPRC1


 95%|█████████▍| 91/96 [02:35<00:08,  1.61s/it]

ncPRC1.1


 96%|█████████▌| 92/96 [02:37<00:06,  1.71s/it]

ncPRC1.1 (exclusive subunits)


 97%|█████████▋| 93/96 [02:39<00:05,  1.72s/it]

ncPRC1.3/5


 98%|█████████▊| 94/96 [02:41<00:03,  1.79s/it]

ncPRC1.6


 99%|█████████▉| 95/96 [02:43<00:01,  1.86s/it]

ncPRC1.6 (exclusive subunits)


100%|██████████| 96/96 [02:44<00:00,  1.80s/it]


In [13]:
[c for c in complexes_to_plot if 'ino80' in c.lower()]

['INO80', 'INO80 (exclusive subunits)']

In [14]:
import re

complexes_to_plot = sorted(enrichment_complexes.reset_index().query('significant')['Complex'].unique())

max_proteins = 25

for complex_ in tqdm(complexes_to_plot):
    
    # Title of plot
    name = complex_
    complex_filename = re.sub('[^a-zA-Z0-9]+', '_', complex_).lower()
    filename = f'barplot-horizontal-{complex_filename}.pdf'
    
    print(complex_)
    
    proteins = members_of(complex_)
    
    if len(proteins) > max_proteins:
        print(f'Skipping {complex_} because it has more than {max_proteins} proteins')
        continue
        
    df = limma_results.loc(axis=0)[:, proteins]
    
    if (df.groupby(level='predictor').size() > 2).sum() < 10:
        print(f'Skipping {complex_} because it does not match quality criteria')
        continue
   

    # PLOT
    fig = plt.figure(figsize=(6.25, 4.3), 
                     constrained_layout=True)

    n_groups = len(GROUPED_ORDER)

    spec = gridspec.GridSpec(ncols=n_groups,
                             nrows=1, 
                             figure=fig,
                             width_ratios=[len(x) for x in GROUPED_ORDER])

    ax = None
    for i, predictor_subgroup in enumerate(GROUPED_ORDER):
        ax = fig.add_subplot(spec[:, i], sharey=ax)

        medians = []
        ci = []


        subgroup_df = df.loc[predictor_subgroup]
        
        significant_predictors = set()

        for predictor in predictor_subgroup:
            
            try:
                subdf = subgroup_df.loc[predictor]
                subdf_enrichment = enrichment_complexes.loc[predictor, complex_]
            except KeyError:
                medians.append(0)
                ci.append([0, 0])
                
                continue
            
            
            mu = subdf_enrichment['empirical_median']
            ci_low = subdf_enrichment['empirical_median_ci_left']
            ci_high = subdf_enrichment['empirical_median_ci_right']
            
            if subdf_enrichment['significant']:
                significant_predictors.add(predictor)
            
            err_low = mu - ci_low
            err_high = ci_high - mu

            medians.append(mu)
            ci.append([err_low, err_high])


        for y, (predictor, mu, ci) in enumerate(zip(predictor_subgroup, medians, ci)):
            ax.bar(y, mu, 
                   yerr=np.atleast_2d(ci).T,
                   color=palette[predictor],
                   error_kw=dict(elinewidth=3, capsize=5, capthick=2),
                   edgecolor='black', linewidth=1.0)
        
     
        for significant, subsubgroup_df in subgroup_df.groupby('significant'):
        
            sns.stripplot(x='predictor', y='logFC',
                          hue='predictor',
                          hue_order=predictor_subgroup,
                          dodge=False,
                          palette=palette,
                          order=predictor_subgroup,
                          size=5,
                          alpha=.8,
                          edgecolor='black',
                          linewidth=1.0,
                          jitter=0.25,
                          marker='o' if significant else 'X',
                          ax=ax, data=subsubgroup_df.reset_index())

    #     ax.axvline(0, color='#666666')
        ax.legend_.set_visible(False)

        ax.set_xlabel('')
        
        ax.xaxis.set_tick_params(length=0, labelsize=10, rotation=90)
        ax.yaxis.set_tick_params(length=0)
        
        for predictor, tick in zip(predictor_subgroup, ax.get_xticklabels()):
            if predictor in significant_predictors:
                tick.set_fontweight('bold')

        if i != 0:
            sns.despine(left=True, bottom=True, offset=5, ax=ax)
            for tic in ax.yaxis.get_major_ticks():
                tic.tick1On = tic.tick2On = False
                tic.label1On = tic.label2On = False
            ax.set_ylabel('')
        else:
            sns.despine(left=False, bottom=True, offset=5, ax=ax)
            ax.set_ylabel('Change to H/L ratio attributed to modification', 
                          fontsize=10)

    fig.suptitle(name, fontsize=10)


    fig.savefig(os.path.join(OUTPUT_DIRECTORY_BARPLOTS, filename), 
                bbox_inches='tight')
    plt.close()
#     break


  0%|          | 0/96 [00:00<?, ?it/s]

40S Ribosomal subunit
Skipping 40S Ribosomal subunit because it has more than 25 proteins
AP-2 dimers


  2%|▏         | 2/96 [00:02<01:40,  1.07s/it]

APC/C


  3%|▎         | 3/96 [00:06<03:15,  2.10s/it]

ASAP


  4%|▍         | 4/96 [00:09<03:23,  2.21s/it]

ATAC


  5%|▌         | 5/96 [00:11<03:29,  2.30s/it]

ATAC (exclusive subunits)


  6%|▋         | 6/96 [00:13<03:24,  2.28s/it]

B-MYB-MuvB


  7%|▋         | 7/96 [00:16<03:24,  2.30s/it]

B-MYB-MuvB (exclusive subunits)


  8%|▊         | 8/96 [00:18<03:16,  2.23s/it]

BAF


  9%|▉         | 9/96 [00:20<03:04,  2.12s/it]

BHC


 10%|█         | 10/96 [00:21<02:49,  1.97s/it]

BRCA1-A


 11%|█▏        | 11/96 [00:23<02:32,  1.80s/it]

BRCC


 12%|█▎        | 12/96 [00:24<02:21,  1.69s/it]

BRISC


 14%|█▎        | 13/96 [00:25<02:12,  1.60s/it]

Base Excision Repair Complex (exclusive subunits)


 15%|█▍        | 14/96 [00:27<02:14,  1.65s/it]

CAF-1


 16%|█▌        | 15/96 [00:29<02:08,  1.59s/it]

CHRAC


 17%|█▋        | 16/96 [00:30<02:05,  1.57s/it]

CtBP


 18%|█▊        | 17/96 [00:32<02:10,  1.65s/it]

EBAFA/B


 19%|█▉        | 18/96 [00:35<02:30,  1.93s/it]

EMSY


 20%|█▉        | 19/96 [00:37<02:36,  2.04s/it]

EMSY (exclusive subunits)


 21%|██        | 20/96 [00:39<02:39,  2.10s/it]

Fanconi Anemia


 22%|██▏       | 21/96 [00:41<02:33,  2.04s/it]

GBAF


 23%|██▎       | 22/96 [00:43<02:34,  2.09s/it]

GBAF (exclusive subunits)
Skipping GBAF (exclusive subunits) because it does not match quality criteria
HBO1


 25%|██▌       | 24/96 [00:45<02:06,  1.76s/it]

HBO1 (exclusive subunits)


 26%|██▌       | 25/96 [00:47<02:07,  1.80s/it]

HDAC-ELMSAN1-DNTTIP1


 27%|██▋       | 26/96 [00:49<02:09,  1.85s/it]

HUSH


 28%|██▊       | 27/96 [00:51<02:01,  1.76s/it]

INO80


 29%|██▉       | 28/96 [00:53<02:04,  1.84s/it]

INO80 (exclusive subunits)


 30%|███       | 29/96 [00:55<02:04,  1.86s/it]

Integrator


 31%|███▏      | 30/96 [00:57<02:07,  1.94s/it]

Integrator (exclusive subunits)


 32%|███▏      | 31/96 [00:59<02:07,  1.96s/it]

KAP1-HP1


 33%|███▎      | 32/96 [01:01<02:08,  2.00s/it]

KAP1-SETDB1-DNMT1-ZNF304
Skipping KAP1-SETDB1-DNMT1-ZNF304 because it does not match quality criteria
LSD-CoREST


 35%|███▌      | 34/96 [01:03<01:45,  1.70s/it]

MLL1/2


 36%|███▋      | 35/96 [01:06<02:05,  2.05s/it]

MLL1/2 (exclusive subunits)


 38%|███▊      | 36/96 [01:08<02:01,  2.02s/it]

MLL3/4


 39%|███▊      | 37/96 [01:09<01:53,  1.93s/it]

MLL3/4 (exclusive subunits)


 40%|███▉      | 38/96 [01:11<01:49,  1.89s/it]

MOZ/MORF


 41%|████      | 39/96 [01:13<01:51,  1.96s/it]

MRN


 42%|████▏     | 40/96 [01:15<01:46,  1.91s/it]

MSL


 43%|████▎     | 41/96 [01:17<01:47,  1.95s/it]

Mediator


 44%|████▍     | 42/96 [01:20<01:54,  2.12s/it]

N-CoR1


 45%|████▍     | 43/96 [01:22<01:52,  2.13s/it]

N-CoR2


 46%|████▌     | 44/96 [01:24<01:45,  2.03s/it]

NSL


 47%|████▋     | 45/96 [01:26<01:56,  2.29s/it]

NSL (exclusive subunits)


 48%|████▊     | 46/96 [01:28<01:48,  2.18s/it]

NuA4


 49%|████▉     | 47/96 [01:31<01:55,  2.36s/it]

NuA4 (exclusive subunits)


 50%|█████     | 48/96 [01:33<01:46,  2.22s/it]

NuRD


 51%|█████     | 49/96 [01:35<01:40,  2.13s/it]

NuRD (exclusive subunits)


 52%|█████▏    | 50/96 [01:37<01:31,  1.99s/it]

NuRF


 53%|█████▎    | 51/96 [01:39<01:41,  2.25s/it]

Nuclear cap-binding complex


 54%|█████▍    | 52/96 [01:41<01:35,  2.17s/it]

ORC


 55%|█████▌    | 53/96 [01:44<01:37,  2.26s/it]

PBAF


 56%|█████▋    | 54/96 [01:46<01:29,  2.14s/it]

PBAF (exclusive subunits)


 57%|█████▋    | 55/96 [01:48<01:25,  2.09s/it]

PCAF


 58%|█████▊    | 56/96 [01:50<01:29,  2.23s/it]

PR-DUB


 59%|█████▉    | 57/96 [01:52<01:20,  2.08s/it]

PRC1


 60%|██████    | 58/96 [01:54<01:21,  2.15s/it]

PRC1 (exclusive subunits)


 61%|██████▏   | 59/96 [01:57<01:23,  2.26s/it]

PRC2.1


 62%|██████▎   | 60/96 [01:59<01:17,  2.15s/it]

PRC2.1 (exclusive subunits)


 64%|██████▎   | 61/96 [02:00<01:09,  2.00s/it]

PRC2.2


 65%|██████▍   | 62/96 [02:03<01:09,  2.04s/it]

PSAP


 66%|██████▌   | 63/96 [02:05<01:11,  2.16s/it]

Paf1C


 67%|██████▋   | 64/96 [02:07<01:07,  2.12s/it]

Piccolo NuA4


 68%|██████▊   | 65/96 [02:09<01:00,  1.94s/it]

RFC


 69%|██████▉   | 66/96 [02:11<00:59,  1.99s/it]

RMI/BLM


 70%|██████▉   | 67/96 [02:12<00:56,  1.94s/it]

RNA Pol II


 71%|███████   | 68/96 [02:17<01:14,  2.67s/it]

RNA Pol II (exclusive subunits)


 72%|███████▏  | 69/96 [02:20<01:12,  2.70s/it]

RNA Pol III


 73%|███████▎  | 70/96 [02:22<01:04,  2.46s/it]

RPA


 74%|███████▍  | 71/96 [02:24<01:02,  2.48s/it]

SAGA


 75%|███████▌  | 72/96 [02:27<00:59,  2.48s/it]

SCF


 76%|███████▌  | 73/96 [02:30<01:04,  2.81s/it]

SET1A/B


 77%|███████▋  | 74/96 [02:33<00:59,  2.70s/it]

SF3B


 78%|███████▊  | 75/96 [02:34<00:51,  2.45s/it]

SIN3A/B


 79%|███████▉  | 76/96 [02:36<00:44,  2.24s/it]

SIN3A/B (exclusive subunits)


 80%|████████  | 77/96 [02:38<00:39,  2.08s/it]

SLF1/2-RAD18


 81%|████████▏ | 78/96 [02:39<00:34,  1.91s/it]

SNARP


 82%|████████▏ | 79/96 [02:41<00:30,  1.81s/it]

SRCAP


 83%|████████▎ | 80/96 [02:43<00:27,  1.72s/it]

Shelterin


 84%|████████▍ | 81/96 [02:44<00:25,  1.68s/it]

Shelterin (exclusive subunits)


 85%|████████▌ | 82/96 [02:46<00:22,  1.63s/it]

TFIID


 86%|████████▋ | 83/96 [02:47<00:21,  1.65s/it]

TFTC


 88%|████████▊ | 84/96 [02:49<00:20,  1.67s/it]

THO


 89%|████████▊ | 85/96 [02:51<00:20,  1.83s/it]

TREX


 90%|████████▉ | 86/96 [02:53<00:17,  1.79s/it]

XPC


 91%|█████████ | 87/96 [02:55<00:15,  1.74s/it]

XPC (exclusive subunits)


 92%|█████████▏| 88/96 [02:57<00:15,  1.97s/it]

ZAP3


 93%|█████████▎| 89/96 [02:59<00:13,  1.95s/it]

ZAP3 (exclusive subunits)


 94%|█████████▍| 90/96 [03:01<00:11,  1.84s/it]

ncPRC1


 95%|█████████▍| 91/96 [03:02<00:09,  1.84s/it]

ncPRC1.1


 96%|█████████▌| 92/96 [03:04<00:07,  1.80s/it]

ncPRC1.1 (exclusive subunits)


 97%|█████████▋| 93/96 [03:06<00:05,  1.78s/it]

ncPRC1.3/5


 98%|█████████▊| 94/96 [03:08<00:03,  1.77s/it]

ncPRC1.6


 99%|█████████▉| 95/96 [03:09<00:01,  1.79s/it]

ncPRC1.6 (exclusive subunits)


100%|██████████| 96/96 [03:11<00:00,  1.75s/it]
