This notebook generates the excel file with the inferred network. 
Which is used as one of the supplementary figures as well as downloads online.

In [1]:
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
import os
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from tqdm import tqdm
sns.set_palette('Dark2')
sns.set_style({'axes.axisbelow': True, 'axes.edgecolor': '.15', 'axes.facecolor': 'white',
               'axes.grid': True, 'axes.labelcolor': '.15', 'axes.linewidth': 1.25, 
               'figure.facecolor': 'white', 'font.family': ['sans-serif'], 'grid.color': '.15',
               'grid.linestyle': ':', 'grid.alpha': .5, 'image.cmap': 'Greys', 
               'legend.frameon': False, 'legend.numpoints': 1, 'legend.scatterpoints': 1,
               'lines.solid_capstyle': 'round', 'axes.spines.right': False, 'axes.spines.top': False,  
               'text.color': '.15',  'xtick.top': False, 'ytick.right': False, 'xtick.color': '.15',
               'xtick.direction': 'out', 'xtick.major.size': 6, 'xtick.minor.size': 3,
               'ytick.color': '.15', 'ytick.direction': 'out', 'ytick.major.size': 6,'ytick.minor.size': 3})
sns.set_context('talk')

#http://phyletica.org/matplotlib-fonts/
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

In [2]:
from snapanalysis.config import OUTPUT_DIRECTORY as MAIN_OUTPUT_DIRECTORY
OUTPUT_DIRECTORY = os.path.join(MAIN_OUTPUT_DIRECTORY, 'networks')

if not os.path.isdir(OUTPUT_DIRECTORY):
    os.makedirs(OUTPUT_DIRECTORY)

In [3]:
OUTPUT_FILE_XLSX = os.path.join(OUTPUT_DIRECTORY, 'table-networks.xlsx')
OUTPUT_FILE_TSV_FILTERED = os.path.join(OUTPUT_DIRECTORY, 'table-networks.sheet.{sheet_no:02}.{sheet_name}.filtered.tsv.gz')
OUTPUT_FILE_TSV_UNFILTERED = os.path.join(OUTPUT_DIRECTORY, 'table-networks.sheet.{sheet_no:02}.{sheet_name}.full.tsv.gz')

In [4]:
from snapanalysis.models.network.training import OUTPUT_FILE as NETWORK_TRAINING_OUTPUT
from snapanalysis.models.network.drawall import OUTPUT_HDF_FILE as NETWORK_OUTPUT_FILE

node_meta = pd.read_hdf(NETWORK_OUTPUT_FILE, '/output/node_meta')
community_colors = pd.read_hdf(NETWORK_OUTPUT_FILE, '/output/communities/0.001/colors')
edge_statistics = pd.read_hdf(NETWORK_OUTPUT_FILE, '/output/edge_statistics')

score_thresholds = pd.read_hdf(NETWORK_TRAINING_OUTPUT, '/output/score_thresholds')

# Join community colors
node_meta = node_meta

In [5]:
community_colors

Community
ATF1            #4369A7
NSD2            #E9B83F
ACTB            #4BAC7C
CHD6            #D54E74
TOR1AIP1 (1)    #786D9B
ATF3            #F0C77F
EPC2            #786D9B
UHRF1           #A2B9EE
ACTR5           #F0C77F
SLC25A5         #9EDEA8
MED6            #4BAC7C
RFC1            #EF8FB1
RPS4X           #E9B83F
ZFAT            #F0C77F
PCGF2           #F0C77F
TOP2A           #A2B9EE
SNW1            #4369A7
PAXIP1          #4369A7
NOP2 (1)        #4BAC7C
CDC73           #D54E74
MATR3           #786D9B
ANAPC15         #E9B83F
DNTTIP1         #4BAC7C
LIN37           #4369A7
BRCA1           #4369A7
GPS2            #4369A7
JUP             #4369A7
RFXAP           #4369A7
VKORC1          #4369A7
POLR2E          #4369A7
THOC1           #4369A7
satellites      #4369A7
sattelites      #969696
Name: Color, dtype: object

In [6]:
score_thresholds = score_thresholds.sort_values(by='threshold')
score_thresholds

Unnamed: 0_level_0,threshold,neg_log10_threshold,score,precision,recall,n_edges
threshold_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
high-confidence,5.41173e-07,6.266664,6.450395,0.700212,0.01678,970
0.0001,0.0001,4.0,5.434784,0.548125,0.030794,2274
0.001,0.001,3.0,4.882833,0.424974,0.042065,4005
0.01,0.01,2.0,4.216911,0.277725,0.059277,8579
0.05,0.05,1.30103,3.642714,0.190545,0.080831,16989


Categorise edges based on which confidence level the ecge will be accepted on

In [7]:
edge_categories = pd.Series('Other', index=edge_statistics.index, name='Classification')

for name, threshold in score_thresholds.sort_values(by='threshold', ascending=False)['threshold'].items():
    
    mask = edge_statistics['q_value'] <= threshold
    
    readable_name = name if name == 'high-confidence' else f'q ≤ {name}'
    edge_categories.loc[mask] = readable_name


In [8]:
edge_categories.value_counts()

Other              1815666
q ≤ 0.05              8410
q ≤ 0.01              4574
q ≤ 0.001             1731
q ≤ 0.0001            1304
high-confidence        970
Name: Classification, dtype: int64

Get the projection of nodes while we're at it

In [9]:
from snapanalysis.models.ptm_response.main import OUTPUT_FILE as PTM_RESPONSE_FILE

from snapanalysis.models.ptm_response.main import PREDICTOR_ORDER
predictors = PREDICTOR_ORDER

limma_results = pd.read_hdf(PTM_RESPONSE_FILE, '/ptm_stats/joint_limma_stats')
predictors_from_limma = limma_results.reset_index()['predictor'].unique()
assert all(p in predictors_from_limma for p in predictors)
assert all(p in predictors for p in predictors_from_limma)

limma_results['plusminus'] = limma_results['logFC'] - limma_results['CI.L']

In [10]:
node_meta

Unnamed: 0_level_0,Majority protein IDs,Gene names,Protein names,n_significant,n_nonzero,Community,network_pos_x,network_pos_y
Gene label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A0A087X222,A0A087X222,,,0,44,,,
A0A0C4DFX4,A0A0C4DFX4,,,0,26,,,
A0A0C4DGP2,A0A0C4DGP2,,,9,110,ATF3,574.075260,-11.161373
A0A0C4DGP5,A0A0C4DGP5,,,7,102,,,
AAAS,F8VZ44;Q9NRG9;H3BU82,AAAS,Aladin,3,60,satellites,585.458070,435.279500
AATF,Q9NY61,AATF,Protein AATF,1,110,,,
ABCF2,Q9UG63;C9JHK9;C9JZV3,ABCF2,ATP-binding cassette sub-family F member 2,0,62,,,
ABHD12,Q8N2K0,ABHD12,Monoacylglycerol lipase ABHD12,1,78,,,
ABRAXAS1,D6REL5;Q6UWZ7,ABRAXAS1,BRCA1-A complex subunit Abraxas,10,108,BRCA1,251.053960,632.421100
ACD,A0A0C4DGT6;Q96AP0;R4GNJ5,ACD,Adrenocortical dysplasia protein homolog,0,68,BRCA1,167.514340,651.253600


In [11]:
df_excel_edges = edge_statistics.join(edge_categories).copy()
df_excel_edges = df_excel_edges.sort_values(by='q_value')
df_excel_edges = df_excel_edges.reset_index()

df_excel_edges['interaction_type'] = 'Excluded'
df_excel_edges.loc[df_excel_edges['interaction_exists'] == True, 'interaction_type'] = 'In BioGRID'
df_excel_edges.loc[df_excel_edges['interaction_exists'] == False, 'interaction_type'] = 'Not in BioGRID'


df_excel_edges = df_excel_edges[[
    'Gene label (row)',
    'Gene label (col)',
    'Classification',
    'score',
    'p_value',
    'q_value',
    'neg_log10_q',
    'interaction_type',
    'publication_count',  
    'experimental_system',
   # 'experimental_system_type',
    #'throughput',
]]




df_excel_edges.to_csv(OUTPUT_FILE_TSV_UNFILTERED.format(sheet_no=1, sheet_name='edges'), sep='\t')

# Excel chokes if we print all..
df_excel_edges = df_excel_edges[df_excel_edges['Classification'] != 'Other']
df_excel_edges.to_csv(OUTPUT_FILE_TSV_FILTERED.format(sheet_no=1, sheet_name='edges'), sep='\t')


In [12]:
df_excel_limma = limma_results.unstack('predictor')
df_excel_limma = df_excel_limma[['logFC', 'plusminus']]
df_excel_limma = df_excel_limma.swaplevel(axis=1)
df_excel_limma = df_excel_limma.sort_index(axis=1)
df_excel_limma = df_excel_limma.copy()
df_excel_limma.columns = ['-'.join(c) for c in df_excel_limma.columns]
df_excel_limma.head()

Unnamed: 0_level_0,DNA Methylation-logFC,DNA Methylation-plusminus,H2A.Z-logFC,H2A.Z-plusminus,H3K27ac-logFC,H3K27ac-plusminus,H3K27me2-logFC,H3K27me2-plusminus,H3K27me3-logFC,H3K27me3-plusminus,...,H3ac-logFC,H3ac-plusminus,H4K16ac-logFC,H4K16ac-plusminus,H4K20me2-logFC,H4K20me2-plusminus,H4K20me3-logFC,H4K20me3-plusminus,H4ac-logFC,H4ac-plusminus
Gene label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A0A087X222,,,-0.120715,0.288662,,,,,0.130415,0.146096,...,,,0.015763,0.290089,0.043674,0.15655,-0.042913,0.182932,,
A0A0C4DFX4,,,,,,,,,,,...,,,0.258091,0.245891,,,,,0.053751,0.202778
A0A0C4DGP2,0.079881,0.079816,0.162207,0.220995,0.08892,0.444048,-0.056496,0.181011,-0.119317,0.112901,...,0.444815,0.126343,0.188181,0.166889,-0.182306,0.119657,-0.085021,0.115366,0.783898,0.154518
A0A0C4DGP5,0.143709,0.20937,-0.162679,1.034761,-0.13385,0.438555,0.014355,0.452128,-0.084018,0.264663,...,0.544675,0.490613,-0.061106,0.411125,-0.159186,0.438184,-0.20641,0.347552,0.977556,0.354015
AAAS,,,0.402661,0.239486,,,,,-0.078579,0.182164,...,0.18062,0.187812,-0.101349,0.270066,0.070355,0.137335,-0.070098,0.193831,0.182654,0.291146


In [13]:
df_excel_nodes = node_meta.join(df_excel_limma)
df_excel_nodes = df_excel_nodes.reset_index()

df_excel_nodes = df_excel_nodes.sort_values(by=['Community', 'Gene label'])

df_excel_nodes = df_excel_nodes[[
    'Gene label',
    'Majority protein IDs',
    'Gene names',
    'Protein names',
    'n_significant',
    'n_nonzero',
    'Community',
    'network_pos_x',
    'network_pos_y',
    'H2A.Z-logFC',
    'H2A.Z-plusminus',
    'H3ac-logFC',
    'H3ac-plusminus',
    'H3K4me1-logFC',
    'H3K4me1-plusminus',
    'H3K4me3-logFC',
    'H3K4me3-plusminus',
    'H3K9acK14ac-logFC',
    'H3K9acK14ac-plusminus',
    'H3K9me2-logFC',
    'H3K9me2-plusminus',
    'H3K9me3-logFC',
    'H3K9me3-plusminus',
    'H3K27ac-logFC',
    'H3K27ac-plusminus',
    'H3K27me2-logFC',
    'H3K27me2-plusminus',
    'H3K27me3-logFC',
    'H3K27me3-plusminus',
    'H4ac-logFC',
    'H4ac-plusminus',
    'H4K16ac-logFC',
    'H4K16ac-plusminus',
    'H4K20me2-logFC',
    'H4K20me2-plusminus',
    'H4K20me3-logFC',
    'H4K20me3-plusminus',
    'DNA Methylation-logFC',
    'DNA Methylation-plusminus',
]]

In [14]:
df_excel_nodes.to_csv(OUTPUT_FILE_TSV_UNFILTERED.format(sheet_no=2, sheet_name='nodes'), sep='\t')

In [15]:
print(',\n'.join(map(repr, df_excel_nodes.columns)))

'Gene label',
'Majority protein IDs',
'Gene names',
'Protein names',
'n_significant',
'n_nonzero',
'Community',
'network_pos_x',
'network_pos_y',
'H2A.Z-logFC',
'H2A.Z-plusminus',
'H3ac-logFC',
'H3ac-plusminus',
'H3K4me1-logFC',
'H3K4me1-plusminus',
'H3K4me3-logFC',
'H3K4me3-plusminus',
'H3K9acK14ac-logFC',
'H3K9acK14ac-plusminus',
'H3K9me2-logFC',
'H3K9me2-plusminus',
'H3K9me3-logFC',
'H3K9me3-plusminus',
'H3K27ac-logFC',
'H3K27ac-plusminus',
'H3K27me2-logFC',
'H3K27me2-plusminus',
'H3K27me3-logFC',
'H3K27me3-plusminus',
'H4ac-logFC',
'H4ac-plusminus',
'H4K16ac-logFC',
'H4K16ac-plusminus',
'H4K20me2-logFC',
'H4K20me2-plusminus',
'H4K20me3-logFC',
'H4K20me3-plusminus',
'DNA Methylation-logFC',
'DNA Methylation-plusminus'


In [16]:
edge_renames = {
    'Gene label (row)': 'A',
    'Gene label (col)': 'B',
    'Classification': 'Classification',
    'score': 'CLR score',
    'p_value': 'P-value',
    'q_value': 'Q-value',
    'neg_log10_q': '-log10(q)',
    'interaction_type': 'Interaction',
    'publication_count': 'Publication count',
    'experimental_system': 'Identified by'
}

node_renames = {
    'Gene label': 'Gene label',
    'Majority protein IDs': 'Majority protein IDs',
    'Gene names': 'Gene names',
    'Protein names': 'Protein names',
    'n_significant': '# enriched',
    'n_nonzero': '# nonzero',
    'Community': 'Community',
    'network_pos_x': 'X coord.',
    'network_pos_y': 'Y coord.',
    'H2A.Z-logFC': 'logFC',
    'H2A.Z-plusminus': '±95%',
    'H3ac-logFC': 'logFC',
    'H3ac-plusminus': '±95%',
    'H3K4me1-logFC': 'logFC',
    'H3K4me1-plusminus': '±95%',
    'H3K4me3-logFC': 'logFC',
    'H3K4me3-plusminus': '±95%',
    'H3K9acK14ac-logFC': 'logFC',
    'H3K9acK14ac-plusminus': '±95%',
    'H3K9me2-logFC': 'logFC',
    'H3K9me2-plusminus': '±95%',
    'H3K9me3-logFC': 'logFC',
    'H3K9me3-plusminus': '±95%',
    'H3K27ac-logFC': 'logFC',
    'H3K27ac-plusminus': '±95%',
    'H3K27me2-logFC': 'logFC',
    'H3K27me2-plusminus': '±95%',
    'H3K27me3-logFC': 'logFC',
    'H3K27me3-plusminus': '±95%',
    'H4ac-logFC': 'logFC',
    'H4ac-plusminus': '±95%',
    'H4K16ac-logFC': 'logFC',
    'H4K16ac-plusminus': '±95%',
    'H4K20me2-logFC': 'logFC',
    'H4K20me2-plusminus': '±95%',
    'H4K20me3-logFC': 'logFC',
    'H4K20me3-plusminus': '±95%',
    'DNA Methylation-logFC': 'logFC',
    'DNA Methylation-plusminus': '±95%',
}

In [17]:
classification_colors = {
    'Other': '#7fcdbb',
    'q ≤ 0.05': '#41b6c4',
    'q ≤ 0.01': '#1d91c0',            
    'q ≤ 0.001': '#225ea8',    
    'q ≤ 0.0001': '#253494',     
    'high-confidence': '#081d58',
}

interaction_colors = {
    'In BioGRID': '#2166ac',
    'Not in BioGRID': '#b2182b',
    'Excluded': '#999999',
}

In [18]:
import xlsxwriter
from xlsxwriter.utility import xl_rowcol_to_cell

writer = pd.ExcelWriter(OUTPUT_FILE_XLSX, engine='xlsxwriter')

# --- Edges ------------------------

df = df_excel_edges
name_rewrites = edge_renames

data_length = len(df)
first_data_row = 3
last_data_row = first_data_row + data_length - 1
sheet_name = 'Interactions'

df.to_excel(writer,
                        sheet_name=sheet_name,
                        startrow=first_data_row,
                        index=False,
                        header=False)

workbook = writer.book
worksheet = writer.sheets[sheet_name]


format_dicts = {
    'header': {'bold': True},
    'header-merged': {'bold': True,  
                      'align': 'center',
                      'valign': 'vcenter'},
    'red': {
        'font_color': '#b2182b',
    },
    '': {},
}


formats = {}

for format_, d in format_dicts.items():
    
    if len(d) > 0:
        formats[format_] = workbook.add_format(d)
    
    for border in ['bottom', 'right', 'bottom-right']:
        new_d = d.copy()
        
        if border in ['bottom', 'bottom-right']:
            new_d['bottom'] = 1
            
        if border in ['right', 'bottom-right']:
            new_d['right'] = 1
        
        formats[f'{format_}-{border}'] = workbook.add_format(new_d)
        

# Write column names
separator_columns = {
    'Gene label (col)',
    'neg_log10_q',
    'experimental_system'
}

col_positions = {}
for col, col_name in enumerate(df.columns):
    col_positions[col_name] = col
    
    col_label = name_rewrites.get(col_name, col_name)
    
    if col_name not in separator_columns:
        format_ = 'header-bottom'
    else:
        format_ = 'header-bottom-right'
        
    worksheet.write(first_data_row - 1, col, col_label, formats[format_])

for from_col, to_col, merged_header in [
    ('Gene label (row)', 'Gene label (col)', 'Gene label'),
    ('Classification', 'neg_log10_q', 'Prediction'),
    ('interaction_type', 'experimental_system', 'BioGRID information'),
]:
    worksheet.merge_range(first_data_row-3,
                          col_positions[from_col], 
                          first_data_row-2,
                          col_positions[to_col],
                          merged_header, 
                          formats['header-merged-right'])
    
for value, color in classification_colors.items():
    bg_color = color # matplotlib.colors.rgb2hex(sns.desaturate(color, 0.7))
    lum = sns.utils.relative_luminance(color)
    text_color = "#000000" if lum > .408 else "#FFFFFF"
    #    fmt_ = workbook.add_format({'bg_color': bg_color, 'font_color': text_color})
    fmt_ = workbook.add_format({'font_color': bg_color})
    
    worksheet.conditional_format(first_data_row, 
                                 col_positions['Classification'],
                                 last_data_row,
                                 col_positions['Classification'],
                                 {'type':     'formula',
                                  'criteria': '=${}="{}"'.format(xl_rowcol_to_cell(first_data_row, 
                                                                                   col_positions['Classification']),
                                                                 value),
                                  'format':   fmt_,
                                 }
                             )
    
for value, color in interaction_colors.items():
    bg_color = matplotlib.colors.rgb2hex(sns.desaturate(color, 0.7))
    lum = sns.utils.relative_luminance(color)
    text_color = "#000000" if lum > .408 else "#FFFFFF"
    #fmt_ = workbook.add_format({'bg_color': bg_color, 'font_color': text_color})
    fmt_ = workbook.add_format({'font_color': bg_color})
    worksheet.conditional_format(first_data_row, 
                                 col_positions['interaction_type'],
                                 last_data_row,
                                 col_positions['interaction_type'],
                                 {'type':     'formula',
                                  'criteria': '=${}="{}"'.format(xl_rowcol_to_cell(first_data_row, 
                                                                                   col_positions['interaction_type']),
                                                                 value),
                                  'format':   fmt_,
                                 }
                             )
    
    
for col in ['publication_count']:
    worksheet.conditional_format(first_data_row, 
                                 col_positions[col],
                                 last_data_row,
                                 col_positions[col],
                                 {'type':     'data_bar',
                                   'bar_solid': True,
                                   'min_type': 'percentile',
                                   'max_type': 'percentile',
                                   'min_value': 0,
                                   'max_value': 100,
                                   'bar_color': interaction_colors['In BioGRID'],
                                 }
                                 )
    
for col_name in separator_columns:
    worksheet.set_column(col_positions[col_name], col_positions[col_name], cell_format=formats['-right'])

    
for col_name, col_width in [('Gene label (row)', 20),
                            ('Gene label (col)', 20),
                            ('Classification', 15),
                            ('score', 10),
                            ('p_value', 10),
                            ('q_value', 10),
                            ('neg_log10_q', 10),
                            ('interaction_type', 15),
                            ('publication_count', 15),
                            ('experimental_system', 150)
                           ]:
    worksheet.set_column(col_positions[col_name], 
                         col_positions[col_name], 
                         col_width)
    
worksheet.autofilter(first_data_row-1, 0, last_data_row, len(df.columns)-1)
worksheet.freeze_panes(first_data_row, col_positions['score'])


# -------- Nodes ----------------
df = df_excel_nodes
name_rewrites = node_renames

data_length = len(df)
first_data_row = 3
last_data_row = first_data_row + data_length - 1
sheet_name = 'Nodes'

df.to_excel(writer,
            sheet_name=sheet_name,
            startrow=first_data_row,
            index=False,
            header=False)

workbook = writer.book
worksheet = writer.sheets[sheet_name]


# Write column names
separator_columns = {
    'Protein names',
    'n_nonzero',
    'network_pos_y',
    'DNA Methylation-plusminus'
}

col_positions = {}
for col, col_name in enumerate(df.columns):
    col_positions[col_name] = col
    
    col_label = name_rewrites.get(col_name, col_name)
    
    if col_name not in separator_columns:
        format_ = 'header-bottom'
    else:
        format_ = 'header-bottom-right'
        
    worksheet.write(first_data_row - 1, col, col_label, formats[format_])

for from_col, to_col, merged_header in [
    ('Gene label', 'Protein names', 'Metadata'),
    ('n_significant', 'n_nonzero', 'Enriched Pull-Downs'),
    ('Community', 'network_pos_y', 'Position in network'),
]:
    worksheet.merge_range(first_data_row-3,
                          col_positions[from_col], 
                          first_data_row-2,
                          col_positions[to_col],
                          merged_header, 
                          formats['header-merged-right'])
    
# Two-level headers
worksheet.merge_range(first_data_row-3,
                      col_positions['H2A.Z-logFC'], 
                      first_data_row-3,
                      col_positions['DNA Methylation-plusminus'],
                      'Chromatin modification effect estimates', 
                      formats['header-merged-right'])

for predictor in PREDICTOR_ORDER:
    from_col = f'{predictor}-logFC'
    to_col = f'{predictor}-plusminus'
    merged_header = predictor
    needs_border = predictor == 'DNA Methylation'
    
    if needs_border:
        format_ = formats['header-merged-right']
    else:
        format_ = formats['header-merged']
    worksheet.merge_range(first_data_row-2,
                          col_positions[from_col], 
                          first_data_row-2,
                          col_positions[to_col],
                          merged_header, 
                          format_)
    
    
for value, color in community_colors.items():
    bg_color = color # matplotlib.colors.rgb2hex(sns.desaturate(color, 0.7))
    lum = sns.utils.relative_luminance(color)
    text_color = "#000000" if lum > .408 else "#FFFFFF"
    fmt_ = workbook.add_format({'bg_color': bg_color, 'font_color': text_color})
#     fmt_ = workbook.add_format({'font_color': bg_color})
    
    worksheet.conditional_format(first_data_row, 
                                 col_positions['Community'],
                                 last_data_row,
                                 col_positions['Community'],
                                 {'type':     'formula',
                                  'criteria': '=${}="{}"'.format(xl_rowcol_to_cell(first_data_row, 
                                                                                   col_positions['Community']),
                                                                 value),
                                  'format':   fmt_,
                                 }
                             )
    
    
fmt_red = workbook.add_format({'font_color': '#fc4e2a'})

for predictor in PREDICTOR_ORDER:
    worksheet.conditional_format(first_data_row, 
                                 col_positions[f'{predictor}-logFC'],
                                 last_data_row,
                                 col_positions[f'{predictor}-logFC'],
                                 {
                                     'type': '3_color_scale',
                                     'min_type': 'num',
                                     'mid_type': 'num',
                                     'max_type': 'num',
                                     'min_value': -2,
                                     'mid_value': 0,
                                     'max_value': 2,
                                     'min_color': '#2166ac',
                                     'mid_color': '#f7f7f7',
                                     'max_color': '#b2182b',
                                  },
                                 )
    

    
    worksheet.conditional_format(first_data_row, 
                                 col_positions[f'{predictor}-plusminus'],
                                 last_data_row,
                                 col_positions[f'{predictor}-plusminus'],
                                 {'type': 'formula',
                                  'criteria': '=ABS(${}) - ${} <= 0'.format(
                                      xl_rowcol_to_cell(first_data_row, col_positions[f'{predictor}-logFC']),
                                      xl_rowcol_to_cell(first_data_row, col_positions[f'{predictor}-plusminus'])
                                  ),
                                  'format': fmt_red,
                                 })
                                
for col_name in separator_columns:
    worksheet.set_column(col_positions[col_name], col_positions[col_name], cell_format=formats['-right'])
    
    
for col_name, col_width in [('Gene label', 20),
                            ('Majority protein IDs', 20),
                            ('Gene names', 20),
                            ('Protein names', 30),
                            ('n_significant', 10),
                            ('n_nonzero', 10),
                            ('Community', 10),
                            ('network_pos_x', 10),
                            ('network_pos_y', 10)
                           ]:
    worksheet.set_column(col_positions[col_name], 
                         col_positions[col_name], 
                         col_width)
    
    
for predictor in PREDICTOR_ORDER:
    
    worksheet.set_column(col_positions[f'{predictor}-logFC'], 
                         col_positions[f'{predictor}-logFC'], 
                         5)
    
    worksheet.set_column(col_positions[f'{predictor}-plusminus'], 
                         col_positions[f'{predictor}-plusminus'], 
                         5)
    
worksheet.autofilter(first_data_row-1, 0, last_data_row, len(df.columns)-1)
worksheet.freeze_panes(first_data_row, col_positions['n_significant'])

writer.save()