In [1]:
import argparse
import os

import pandas as pd
import pickle
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from joblib import Parallel, delayed
from warnings import warn
import datetime
import time
import json

from tqdm.notebook import tqdm

from slugify import slugify

from scipy.stats import gaussian_kde
from scipy.stats import norm

import statsmodels.api as smapi
import statsmodels.formula.api as sm
from statsmodels.stats.multitest import multipletests as mt

from matplotlib.gridspec import GridSpec
import matplotlib.patches as mpatches

from meirlop.motif_enrichment import dict_to_df
from meirlop.motif_enrichment import preprocess_lr_df

In [2]:
output_dir = 'meirlop_output_directory'

motif_id_slugname_df = None
n_top = 10
plot_all = False
double_negative = True
norm_scale = 1.0
plot_dpi = 300
figsize = (10, 10)
plot_formats = ['.svg', '.png']
progress_wrapper = tqdm
motif_count_smooth_window = 3
num_kde_points = 1000
depp = True

In [3]:
def load_meirlop_output_dfs(output_dir):
    outpath_lr_results = os.path.normpath(output_dir + '/lr_results.tsv')
    outpath_lr_input = os.path.normpath(output_dir + '/lr_input.tsv')
    outpath_peak_length = os.path.normpath(output_dir + '/peak_lengths.tsv')
    outpath_motif_length = os.path.normpath(output_dir + '/motif_lengths.tsv')
    outpath_scan_results = os.path.normpath(output_dir + '/scan_results.tsv')
    outpath_html_logos_json = os.path.normpath(output_dir + '/html_logos.json')
    
    lr_results_df = pd.read_csv(outpath_lr_results, sep = '\t')
    lr_input_df = pd.read_csv(outpath_lr_input, sep = '\t')
    peak_length_df = pd.read_csv(outpath_peak_length, sep = '\t')
    motif_length_df = pd.read_csv(outpath_motif_length, sep = '\t')
    
    try:
        scan_results_df = pd.read_csv(outpath_scan_results, sep = '\t')
    except:
        warn(f'The file {outpath_scan_results} could not be loaded. Try rerunning meirlop with the --scan flag enabled')
        results = (lr_results_df, lr_input_df, peak_length_df, motif_length_df)
        return results
    
    try:
        with open(outpath_html_logos_json) as outpath_html_logos_json_file:
            html_logos = json.loads(outpath_html_logos_json_file.read())
    except:
        warn(f'The file {outpath_html_logos_json} could not be loaded. Try rerunning meirlop with the --html flag enabled')
        results = (lr_results_df, lr_input_df, peak_length_df, motif_length_df, scan_results_df)
        return results
    
    results = (lr_results_df, lr_input_df, peak_length_df, motif_length_df, scan_results_df, html_logos)
    return results


In [4]:
def get_motif_id_slugname_df(lr_results_df):
    motif_ids_by_rank = list(lr_results_df['motif_id'])
    motif_id_to_slugname = {motif_id: slugify(f'rank {rank} {motif_id}', separator = '_') for rank, motif_id in enumerate(motif_ids_by_rank, 1)}
    motif_id_slugname_df = dict_to_df(motif_id_to_slugname, 'motif_id', 'slugname')
    return motif_id_slugname_df


In [5]:
def precompute_motif_dfs(
    lr_input_df, 
    scan_results_df, 
    motif_length_df, 
    peak_length_df, 
    progress_wrapper = tqdm
):
    max_peak_length = peak_length_df['peak_length'].max()
    motif_length_dict = motif_length_df.set_index('motif_id')['motif_length'].to_dict()
    peak_length_dict = peak_length_df.set_index('peak_id')['peak_length'].to_dict()
    scan_results_df['instance_position_center'] = (
        scan_results_df['instance_position'] - 
        (scan_results_df['peak_id'].map(peak_length_dict) / 2.0) + 
        (scan_results_df['motif_id'].map(motif_length_dict) / 2.0)
    )
    scan_results_df['instance_position_center_int'] = scan_results_df['instance_position_center'].astype(int)
    lr_input_df['peak_score_rank'] = lr_input_df['peak_score'].rank(method = 'first')
    lr_input_df['peak_score_rank_int'] = lr_input_df['peak_score_rank'].astype(int) - 1
    max_peak_score_rank = lr_input_df['peak_score_rank_int'].max()
    
    scan_results_df_gb_motif_id = scan_results_df.groupby('motif_id')
    peak_cols = ['peak_id', 'peak_score', 'peak_score_rank', 'peak_score_rank_int']
    peak_and_motif_df_by_motif_id = {
        motif_id: 
        (
            lr_input_df[peak_cols]
            .merge(
                scan_results_df_gb_motif_id
                .get_group(motif_id)
                .copy()
            )
        ) 
        for motif_id 
        in progress_wrapper(
            list(
                set(
                    scan_results_df['motif_id']
                )
            )
        )
    }
    sorted_peak_score_rank_df = lr_input_df[['peak_score', 'peak_score_rank_int']].sort_values(by = 'peak_score_rank_int')
    return peak_and_motif_df_by_motif_id, sorted_peak_score_rank_df, max_peak_length, max_peak_score_rank


In [6]:
def compute_motif_distributions(
    peak_and_motif_df, 
    max_peak_length, 
    max_peak_score_rank, 
    window = 3, 
    num_kde_points = 1000, 
    progress_wrapper = tqdm
): 
    peak_and_motif_df_by_orientation = {
        orientation: 
        peak_and_motif_df[
            peak_and_motif_df['motif_orientation'] == orientation
        ] 
        for orientation 
        in set(list(peak_and_motif_df['motif_orientation']))
    }
    
    rolling_mean_on_motif_position_by_orientation = {
        orientation: 
        two_column_df_to_rolling_mean(
            (df[['instance_position_center_int', 'motif_id']]
             .rename(columns = {'motif_id': 'motif_count'})), 
            min_index = -int(np.floor(max_peak_length / 2.0)), 
            max_index = int(np.ceil(max_peak_length / 2.0)), 
            window = window, 
            center = True
        ) 
        for orientation, df 
        in peak_and_motif_df_by_orientation.items()
    }
    
    yvals_by_orientation = {}
    xvals_by_orientation = {}
    for orientation, df in peak_and_motif_df_by_orientation.items():
        yvals_by_orientation[orientation] = np.linspace(0, max_peak_score_rank+1, num_kde_points)
        if df.shape[0] > 1:
            kernel = gaussian_kde(df['peak_score_rank_int'])
            xvals_by_orientation[orientation] = kernel(yvals_by_orientation[orientation])
        else:
            xvals_by_orientation[orientation] = yvals_by_orientation[orientation] * 0.0
    
    motif_rank_kde_by_orientation = {
        orientation: 
        pd.DataFrame({'peak_score_rank_int': yvals_by_orientation[orientation], 
                      'motif_density': xvals_by_orientation[orientation]
                     }) 
        for orientation 
        in list(peak_and_motif_df_by_orientation.keys())
    }
    
    return peak_and_motif_df_by_orientation, rolling_mean_on_motif_position_by_orientation, motif_rank_kde_by_orientation


In [7]:
def two_column_df_to_rolling_mean(df, min_index = None, max_index = None, window = 3, center = True):
    col1 = df.columns[0]
    col2 = df.columns[1]
    if min_index is None:
        min_index = int(np.floor(df[col1].min()))
    if max_index is None:
        max_index = int(np.ceil(df[col1].max()))
    rolling_mean_df = (
        df[[col1, col2]]
        .groupby(col1)
        .count()
        .reset_index()
        .merge(
        pd.DataFrame(
            {col1 : 
             list(range(min_index, max_index + 1))
            }), 
        how = 'outer')
        .fillna(0.0)
        .sort_values(by = col1)
        .set_index(col1)
        .rolling(window, center = center)
        .mean()
        .dropna()
        .reset_index()
    )
    rolling_mean_df[col1]
    return rolling_mean_df


In [8]:
def compute_delta_enrichment_positional_profile(
    peak_and_motif_df, 
    lr_input_df, 
    max_peak_length, 
    norm_scale = 1.0,
    double_negative = True
):
    peak_and_motif_df_cp = peak_and_motif_df.copy()
    norm_locs = np.arange(-max_peak_length/2.0, max_peak_length/2.0 + 1, 1.0)
    lr_input_df_subset = lr_input_df[
        lr_input_df['peak_id']
        .isin(list(set(peak_and_motif_df['peak_id'])))
    ]
    
    preprocessed_lr_df = (
        preprocess_lr_df(
            lr_input_df_subset[
                lr_input_df_subset.columns[:2]
            ],
            lr_input_df_subset[
                [lr_input_df_subset.columns[0]] + 
                list(lr_input_df_subset.columns[2:])
            ]
        )
    )
    norm_loc_columns = []
    for i, norm_loc in enumerate(norm_locs):
        peak_and_motif_df_cp[('norm_pdf', i)] = norm.pdf(
            peak_and_motif_df_cp[
                'instance_position_center'
            ],
            loc = norm_loc,
            scale = norm_scale
        )
        if double_negative:
            peak_and_motif_df_cp[('norm_pdf', i)] = (
                1.0 - peak_and_motif_df_cp[('norm_pdf', i)]
            )
        norm_loc_columns.append(('norm_pdf', i))

    motif_peak_id_norm_pdf_df = peak_and_motif_df_cp[['peak_id', 'motif_orientation'] + norm_loc_columns].groupby(['peak_id', 'motif_orientation']).max().reset_index()

    lr_df = (
        motif_peak_id_norm_pdf_df
        .merge(preprocessed_lr_df, how = 'left')
        .fillna(0.0)
    )

    lr_df_gb_motif_orientation = lr_df.groupby('motif_orientation')
    motif_orientations = list(lr_df_gb_motif_orientation.groups.keys())
    indep_var_cols = list(preprocessed_lr_df.columns[1:])
    score_colname = indep_var_cols[0]
    lr_results = []

    for motif_orientation in motif_orientations:
        lr_df_subset = lr_df_gb_motif_orientation.get_group(motif_orientation)
        for i, norm_loc in enumerate(norm_locs):
            y = lr_df_subset[('norm_pdf', i)]
            X = lr_df_subset[indep_var_cols]
            model = smapi.OLS(y, X)
            result = model.fit(disp=0)
            coef = result.params[score_colname]
            std_err = result.bse[score_colname]
            pval = result.pvalues[score_colname]
            ci = result.conf_int()
            (
                ci_95_pct_lower,
                ci_95_pct_upper
            ) = (
                ci[0][score_colname],
                ci[1][score_colname]
            )

            # y_score = result.predict(X.values)
            result_tup = (
                motif_orientation,
                norm_loc,
                coef,
                std_err,
                ci_95_pct_lower,
                ci_95_pct_upper,
                pval,
                result
            )
            lr_results.append(result_tup)
    positional_enrichment_results_df = pd.DataFrame(
        [tup[:-1] for tup in lr_results], 
        columns = [
            'Motif Orientation',
            'Motif Position',
            'Positional Enrichment Coefficient',
            'Standard Error',
            '95% CI Upper',
            '95% CI Lower',
            'P-value'
    ])

    if double_negative:
        positional_enrichment_results_df[
            'Positional Enrichment Coefficient'
        ] = (
            0.0 -
            positional_enrichment_results_df[
                'Positional Enrichment Coefficient'
            ]
        )
        positional_enrichment_results_df['95% CI Upper'] = (
            0.0 - positional_enrichment_results_df['95% CI Upper']
        )
        positional_enrichment_results_df['95% CI Lower'] = (
            0.0 - positional_enrichment_results_df['95% CI Lower']
        )
    else:
        original_columns = list(positional_enrichment_results_df.columns)
        positional_enrichment_results_df = (
            positional_enrichment_results_df.rename(columns = {
                '95% CI Upper': '95% CI Lower',
                '95% CI Lower': '95% CI Upper'
            })
        )
        positional_enrichment_results_df = (
            positional_enrichment_results_df[original_columns]
        )

    positional_enrichment_results_df['Adjusted P-value'] = mt(
        positional_enrichment_results_df['P-value'],
        method = 'fdr_bh'
    )[1]
    positional_enrichment_results_df_gb_orientation = positional_enrichment_results_df.groupby('Motif Orientation')
    positional_enrichment_results_df_by_orientation = {
        motif_orientation: positional_enrichment_results_df_gb_orientation.get_group(motif_orientation)
        for motif_orientation
        in motif_orientations
    }
    return positional_enrichment_results_df_by_orientation

In [9]:
def plot_motif_instances_single(
    peak_and_motif_df_by_orientation, 
    rolling_mean_on_motif_position_by_orientation, 
    motif_rank_kde_by_orientation, 
    sorted_peak_score_rank_df, 
    depp_by_orientation = None,
    title = 'Plot of motif locations in ranked peaks', 
    filename = 'motif_instances_plot', 
    orientations_to_plot = ['+', '-'], 
    color_by_orientation = {'+': 'red', '-': 'blue'},
    figsize = (12.5, 10.0), 
    pointsize = 2.0, 
    alpha_factor = 4.0, 
    alpha_override = None, 
    plot_formats = ['svg', 'png'],
    plot_tight = True,
    plot_dpi = 300, 
    close_fig = True
):
    if alpha_override == None:
            alpha = np.min([np.max([alpha_factor/df['motif_count'].max() for df in rolling_mean_on_motif_position_by_orientation.values()]), 1.0])
            alpha = np.max([alpha, 1.0/256.0])
    else:
        alpha = alpha_override

    depp = depp_by_orientation != None

    if plot_tight:
        plt.tight_layout()
    fig = plt.figure(figsize = figsize)
    gs = GridSpec(ncols=5,nrows=5, figure = fig) if depp else GridSpec(ncols=5,nrows=4, figure = fig)
    ax_score_rank = fig.add_subplot(gs[1:4,0])
    ax_rank = fig.add_subplot(gs[1:4,4])
    ax_motifs = fig.add_subplot(gs[1:4,1:4])
    ax_pos = fig.add_subplot(gs[0,1:4])
    ax_motifs.get_shared_x_axes().join(ax_motifs, ax_pos)
    ax_motifs.get_shared_y_axes().join(ax_motifs, ax_rank)
    ax_motifs.get_shared_y_axes().join(ax_motifs, ax_score_rank)
    ax_pos.set_ylabel('Motif Count')
    ax_rank.set_xlabel('Motif Density')
    ax_motifs.set_xlabel('Motif Position')
    ax_score_rank.set_ylabel('Peak Score Rank')
    ax_score_rank.set_xlabel('Peak Score')
    ax_motifs.set_yticklabels([])
    ax_rank.set_yticklabels([])

    if depp:
        ax_depp = fig.add_subplot(gs[4,1:4])
        ax_motifs.get_shared_x_axes().join(ax_motifs, ax_depp)
        ax_depp.set_ylabel('\n'.join('Positional Enrichment Coefficient'.split()))
        ax_depp.set_xlabel('Motif Position')

    fig.suptitle(title)
    ax_score_rank.plot(sorted_peak_score_rank_df['peak_score'], sorted_peak_score_rank_df['peak_score_rank_int'], color = 'black')
    for orientation, df in rolling_mean_on_motif_position_by_orientation.items():
        if orientation in orientations_to_plot:
            ax_pos.plot(df['instance_position_center_int'], df['motif_count'], '-', color = color_by_orientation[orientation])
    for orientation, df in motif_rank_kde_by_orientation.items():
        if orientation in orientations_to_plot:
            ax_rank.plot(df['motif_density'], df['peak_score_rank_int'], '-', color = color_by_orientation[orientation])
    if depp:
        for orientation, df in depp_by_orientation.items():
            ax_depp.plot(
                df['Motif Position'], 
                df['Positional Enrichment Coefficient'], 
                '-', 
                color = color_by_orientation[orientation]
            )
            ax_depp.fill_between(
                df['Motif Position'], 
                df['95% CI Lower'], 
                df['95% CI Upper'], 
                facecolor=color_by_orientation[orientation], 
                alpha=0.5
            )

    for tick in ax_rank.get_xticklabels():
        tick.set_rotation(45)
    handles = []
    for orientation, df in peak_and_motif_df_by_orientation.items():
        if orientation in orientations_to_plot:
            ax_motifs.scatter(
                df['instance_position_center_int'], 
                df['peak_score_rank_int'], 
                color = color_by_orientation[orientation], 
                alpha = alpha, 
                s = pointsize)
            handle = mpatches.Patch(
                color = color_by_orientation[orientation], 
                label = orientation
            )
            handles.append(handle)
    ax_legends = fig.add_subplot(gs[0,4])
    ax_legends.axis('off')
    ax_legends.legend(
        handles = handles, 
        loc = 'lower left', 
        title = 'Orientation'
    )
    
    if plot_tight:
        bbox_inches = 'tight'
    else:
        bbox_inches = None
        
    if (len(plot_formats) > 0) and (filename != None):
        for fmt in plot_formats:
            fig.savefig(os.path.normpath(f'{filename}.{fmt}'), bbox_inches = bbox_inches, dpi = plot_dpi)
    
    if close_fig:
        plt.close(fig)
        fmt = plot_formats[0]
        return os.path.normpath(f'{filename}.{fmt}')
    
    return fig

In [10]:
orientations_to_filename_substr = lambda orientations: {
    ('+','-'): 'both', 
    ('+',): 'fwd', 
    ('-',): 'rev'
}[tuple(sorted(orientations))]

default_filename_func = lambda motif_id_slugname, orientations: f'{motif_id_slugname}_orientation_{orientations_to_filename_substr(orientations)}.modiplot'

default_title_func = lambda motif_id, orientations: f'Plot of motif {motif_id} locations in ranked peaks, \n Orientation: '+'/'.join(orientations)


In [11]:
def plot_motif_instances_multiple(
    motif_distributions_by_motif, 
    sorted_peak_score_rank_df, 
    motif_id_slugname_df, 
    depps_by_motif = None,
    motif_ids = None, 
    figsize = (12.5, 10.0), 
    pointsize = 2.0, 
    alpha_factor = 4.0, 
    alpha_override = None,
    title_func = default_title_func,
    filename_func = default_filename_func, 
    plot_formats = ['svg', 'png'],
    color_by_orientation = {'+': 'red', '-': 'blue'}, 
    progress_wrapper = tqdm, 
    plot_fwd = True, 
    plot_rev = True, 
    plot_separate = True,
    plot_tight = True,
    plot_dpi = 300, 
    n_jobs = 1, 
    close_fig = True
):
    if motif_ids == None:
        motif_ids = motif_id_slugname_df['motif_id']
    
    depp = depps_by_motif != None
    
    orientations_to_plot = []
    if plot_fwd: 
        orientations_to_plot.append('+')
    if plot_rev: 
        orientations_to_plot.append('-')
    
    
    motif_id_to_slugname = motif_id_slugname_df.set_index('motif_id')['slugname'].to_dict()
    
    orientations_to_plot = []
    if plot_fwd: 
        orientations_to_plot.append('+')
    if plot_rev: 
        orientations_to_plot.append('-')

    orientations_to_plot_sep = [orientations_to_plot]
    if plot_separate:
        orientations_to_plot_sep = (
            [orientations_to_plot] + 
            [[orientation] 
             for orientation 
             in orientations_to_plot])
    
    def wrap_plot(
        peak_and_motif_df_by_orientation, 
        rolling_mean_on_motif_position_by_orientation, 
        motif_rank_kde_by_orientation, 
        title, 
        filename, 
        orientations_to_plot, 
        depp_by_orientation
    ):
        fig = plot_motif_instances_single(
            peak_and_motif_df_by_orientation = peak_and_motif_df_by_orientation, 
            rolling_mean_on_motif_position_by_orientation = rolling_mean_on_motif_position_by_orientation, 
            motif_rank_kde_by_orientation = motif_rank_kde_by_orientation, 
            sorted_peak_score_rank_df = sorted_peak_score_rank_df, 
            depp_by_orientation = depp_by_orientation, 
            title = title, 
            filename = filename, 
            orientations_to_plot = orientations_to_plot, 
            color_by_orientation = color_by_orientation, 
            figsize = figsize, 
            pointsize = pointsize, 
            alpha_factor = alpha_factor, 
            alpha_override = alpha_override, 
            plot_formats = plot_formats,
            plot_tight = plot_tight,
            plot_dpi = plot_dpi, 
            close_fig = close_fig
        )
        return fig
    
    def get_wrap_plot_args(motif_id, orientations_to_plot):
        title = title_func(motif_id, orientations_to_plot)
        motif_id_slugname = motif_id_to_slugname[motif_id]
        filename = filename_func(motif_id_slugname, orientations_to_plot)
        (
            peak_and_motif_df_by_orientation, 
            rolling_mean_on_motif_position_by_orientation, 
            motif_rank_kde_by_orientation
        ) = motif_distributions_by_motif[motif_id]
        tup = (
            peak_and_motif_df_by_orientation, 
            rolling_mean_on_motif_position_by_orientation, 
            motif_rank_kde_by_orientation, 
            title, 
            filename, 
            orientations_to_plot
        )
        if depp:
            tup = tup + (depps_by_motif[motif_id],)
        else:
            tup = tup + (None,)
        return tup
    
    wrap_plot_args_dict = {
        (motif_id, tuple(orientations_to_plot)): 
        get_wrap_plot_args(motif_id, orientations_to_plot) 
        for orientations_to_plot in orientations_to_plot_sep 
        for motif_id in motif_ids 
    }
    
    if n_jobs == 1:
        figs_by_motif_id_and_orientations_to_plot = {
            key: wrap_plot(*val) 
            for key, val 
            in progress_wrapper(wrap_plot_args_dict.items())}
    else:
        figs_by_motif_id_and_orientations_to_plot_tups = Parallel(
            n_jobs=n_jobs
        )(
            delayed(
                lambda tup: (tup[0], wrap_plot(*tup[1]))
            )((key, val)) 
            for key, val 
            in progress_wrapper(wrap_plot_args_dict.items())
        )
        figs_by_motif_id_and_orientations_to_plot = {
            tup[0]: tup[1] 
            for tup 
            in figs_by_motif_id_and_orientations_to_plot_tups
        }
        
    
    return figs_by_motif_id_and_orientations_to_plot


In [12]:
def plot_motif_instances_from_output_dir(
    output_dir, 
    motif_id_slugname_df = None, 
    n_top = 10, 
    motif_count_smooth_window = 3, 
    num_kde_points = 1000, 
    depp = True,
    norm_scale = 1.0,
    progress_wrapper = tqdm, 
    **kwargs
):
    print(f'Plotting motifs from {output_dir}')
    (
        lr_results_df, 
        lr_input_df, 
        peak_length_df, 
        motif_length_df, 
        scan_results_df, 
        html_logos
    ) = load_meirlop_output_dfs(output_dir)

    if motif_id_slugname_df is None:
        motif_id_slugname_df = get_motif_id_slugname_df(lr_results_df).head(n_top)
    num_motif_ids = motif_id_slugname_df.shape[0]
    print(f'Plotting motif instances for {num_motif_ids} motifs')
    motif_ids = list(motif_id_slugname_df['motif_id'])
    scan_results_df = scan_results_df[scan_results_df['motif_id'].isin(motif_ids)]
    
    print('Formatting motif scan information')
    (
        peak_and_motif_df_by_motif_id, 
        sorted_peak_score_rank_df, 
        max_peak_length, 
        max_peak_score_rank
    ) = precompute_motif_dfs(
        lr_input_df, 
        scan_results_df, 
        motif_length_df, 
        peak_length_df, 
        progress_wrapper = progress_wrapper
    )
    
    print('Computing distributions of motifs')
    motif_distributions_by_motif = {
        motif_id: compute_motif_distributions(
            peak_and_motif_df, 
            max_peak_length, 
            max_peak_score_rank, 
            window = motif_count_smooth_window, 
            num_kde_points = num_kde_points)
        for motif_id, peak_and_motif_df 
        in progress_wrapper(peak_and_motif_df_by_motif_id.items())
    }
    
    if depp:
        print('Computing delta enrichment positional profiles of motifs')
        depps_by_motif = {
            motif_id: compute_delta_enrichment_positional_profile(
                peak_and_motif_df, 
                lr_input_df,
                max_peak_length, 
                norm_scale,
                )
            for motif_id, peak_and_motif_df 
            in progress_wrapper(peak_and_motif_df_by_motif_id.items())
        }
    else:
        depps_by_motif = None
    
    print('Generating figures')
    motif_id_to_coef = lr_results_df.set_index('motif_id')['coef'].to_dict()
    motif_id_to_pval = lr_results_df.set_index('motif_id')['pval'].to_dict()
    motif_id_to_padj = lr_results_df.set_index('motif_id')['padj'].to_dict()
    motif_id_to_stats_str = lambda motif_id: '\n'.join([
        f'Enrichment coefficient: {motif_id_to_coef[motif_id]:0.4e}',
        f'P-value: {motif_id_to_pval[motif_id]:0.4e}',
        f'Adjusted P-value: {motif_id_to_padj[motif_id]:0.4e}'
    ])
    title_func = lambda motif_id, orientations: default_title_func(motif_id, orientations) + '\n' + motif_id_to_stats_str(motif_id)
    
    figs_by_motif_id_and_orientations_to_plot = plot_motif_instances_multiple(
        motif_distributions_by_motif, 
        sorted_peak_score_rank_df, 
        motif_id_slugname_df, 
        depps_by_motif = depps_by_motif,
        title_func = title_func,
        filename_func = lambda motif_id_slugname, orientations: f'{output_dir}/'+default_filename_func(motif_id_slugname, orientations), 
        progress_wrapper = progress_wrapper, 
        **kwargs
    )
    return figs_by_motif_id_and_orientations_to_plot, motif_distributions_by_motif



In [13]:
plot_motif_instances_from_output_dir(output_dir)

Plotting motifs from meirlop_output_directory
Plotting motif instances for 10 motifs
Formatting motif scan information


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Computing distributions of motifs


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Computing delta enrichment positional profiles of motifs


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

Reduced covariates to 13 principal components
Components were chosen to explain 99.0% of variance in covariates
Reduced covariates to 13 principal components
Components were chosen to explain 99.0% of variance in covariates
Reduced covariates to 13 principal components
Components were chosen to explain 99.0% of variance in covariates
Reduced covariates to 13 principal components
Components were chosen to explain 99.0% of variance in covariates
Reduced covariates to 13 principal components
Components were chosen to explain 99.0% of variance in covariates
Reduced covariates to 13 principal components
Components were chosen to explain 99.0% of variance in covariates
Reduced covariates to 13 principal components
Components were chosen to explain 99.0% of variance in covariates
Reduced covariates to 13 principal components
Components were chosen to explain 99.0% of variance in covariates
Reduced covariates to 13 principal components
Components were chosen to explain 99.0% of variance in cov

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))




({('MA0139.1 CTCF',
   ('+',
    '-')): 'meirlop_output_directory/rank_1_ma0139_1_ctcf_orientation_both.modiplot.svg',
  ('MA0138.2 REST',
   ('+',
    '-')): 'meirlop_output_directory/rank_2_ma0138_2_rest_orientation_both.modiplot.svg',
  ('MA1102.1 CTCFL',
   ('+',
    '-')): 'meirlop_output_directory/rank_3_ma1102_1_ctcfl_orientation_both.modiplot.svg',
  ('MA0119.1 NFIC::TLX1',
   ('+',
    '-')): 'meirlop_output_directory/rank_4_ma0119_1_nfic_tlx1_orientation_both.modiplot.svg',
  ('MA0510.2 RFX5',
   ('+',
    '-')): 'meirlop_output_directory/rank_5_ma0510_2_rfx5_orientation_both.modiplot.svg',
  ('MA0600.2 RFX2',
   ('+',
    '-')): 'meirlop_output_directory/rank_6_ma0600_2_rfx2_orientation_both.modiplot.svg',
  ('MA0657.1 KLF13',
   ('+',
    '-')): 'meirlop_output_directory/rank_7_ma0657_1_klf13_orientation_both.modiplot.svg',
  ('MA0798.1 RFX3',
   ('+',
    '-')): 'meirlop_output_directory/rank_8_ma0798_1_rfx3_orientation_both.modiplot.svg',
  ('MA0502.1 NFYB',
   ('+',
    

<Figure size 432x288 with 0 Axes>

In [14]:
%%file

UsageError: %%file is a cell magic, but the cell body is empty.


In [None]:
(
        lr_results_df, 
        lr_input_df, 
        peak_length_df, 
        motif_length_df, 
        scan_results_df, 
        html_logos
    ) = load_meirlop_output_dfs(output_dir)

In [None]:
if motif_id_slugname_df is None:
    if plot_all:
        motif_id_slugname_df = get_motif_id_slugname_df(lr_results_df)
    else:
        motif_id_slugname_df = get_motif_id_slugname_df(lr_results_df).head(n_top)
num_motif_ids = motif_id_slugname_df.shape[0]

In [None]:
motif_id_to_slugname = motif_id_slugname_df.set_index('motif_id')['slugname'].to_dict()

In [None]:
print(f'Plotting motif instances for {num_motif_ids} motifs')
motif_ids = list(motif_id_slugname_df['motif_id'])
scan_results_df = scan_results_df[scan_results_df['motif_id'].isin(motif_ids)]

print('Formatting motif scan information')
(
    peak_and_motif_df_by_motif_id, 
    sorted_peak_score_rank_df, 
    max_peak_length, 
    max_peak_score_rank
) = precompute_motif_dfs(
    lr_input_df, 
    scan_results_df, 
    motif_length_df, 
    peak_length_df, 
    progress_wrapper = progress_wrapper
)

In [None]:
peak_and_motif_df_by_motif_id[motif_ids[0]]

In [None]:
print('Computing distributions of motifs')
motif_distributions_by_motif = {
    motif_id: compute_motif_distributions(
        peak_and_motif_df, 
        max_peak_length, 
        max_peak_score_rank, 
        window = motif_count_smooth_window, 
        num_kde_points = num_kde_points)
    for motif_id, peak_and_motif_df 
    in progress_wrapper(peak_and_motif_df_by_motif_id.items())
}

In [None]:
if depp:
    print('Computing delta enrichment positional profiles of motifs')
    depps_by_motif = {
        motif_id: compute_delta_enrichment_positional_profile(
            peak_and_motif_df, 
            lr_input_df,
            max_peak_length, 
            norm_scale,
            )
        for motif_id, peak_and_motif_df 
        in progress_wrapper(peak_and_motif_df_by_motif_id.items())
    }