In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns


def preprocess_david(data):
    # GeneRatio = Count / List Total
    data['GeneRatio'] = data['Count']/data['List Total']

    # BgRatio = Pop Hits / Pop Total
    data['BgRatio'] = data['Pop Hits'] / data['Pop Total']
    
    # Fisher E
    data['-log10(PValue)'] = np.log10(data['PValue'])
    data['-log10(PValue)'] *= -1

    # Replace 0 to min values
    # pvals = ['PValue', 'Bonferroni', 'Benjamini']
    # for pval in pvals:
    #     min_non_zero = data[pval][data[pval] != 0].min
    #     data[pval] = data[pval].replace(0, min_non_zero)
    return data


def pick_go_term_up(data, type="bp", p="fisher", top=10):   
    # Terms
    if type == "bp":
        data_tmp = data[data['Category'] == "GOTERM_BP_DIRECT"]
    elif type == "cc":
        data_tmp = data[data['Category'] == "GOTERM_CC_DIRECT"]
    elif type == "mf":
        data_tmp = data[data['Category'] == "GOTERM_MF_DIRECT"]
    elif type == "kegg":
        data_tmp = data[data['Category'] == "KEGG_PATHWAY"]
    else:
        raise ValueError

    # Fold-enrichment into log2 scale
    # data_tmp['Fold Enrichment'] = np.log2(data_tmp['Fold Enrichment'])
    data_tmp.loc[:,'Fold Enrichment'] = np.log2(data_tmp.loc[:, 'Fold Enrichment'])

    # p
    data_p = data_tmp[data_tmp['FDR'] <= 0.01]
    data_sort = data_p.sort_values(by="Fold Enrichment", ascending=False)
    data = data_sort[:top]
    return data


def pick_go_term_down(data, type="bp", p="fisher", top=10):   
    # Terms
    if type == "bp":
        data_tmp = data[data['Category'] == "GOTERM_BP_DIRECT"]
    elif type == "cc":
        data_tmp = data[data['Category'] == "GOTERM_CC_DIRECT"]
    elif type == "mf":
        data_tmp = data[data['Category'] == "GOTERM_MF_DIRECT"]
    elif type == "kegg":
        data_tmp = data[data['Category'] == "KEGG_PATHWAY"]
    else:
        raise ValueError

    # Fold-enrichment into log2 scale
    # data_tmp['Fold Enrichment'] = np.log2(data_tmp['Fold Enrichment'])
    data_tmp.loc[:,'Fold Enrichment'] = np.log2(data_tmp.loc[:, 'Fold Enrichment'])

    # p
    data_p = data_tmp[data_tmp['FDR'] <= 0.01]
    data_sort = data_p.sort_values(by="Fold Enrichment", ascending=False)
    data_sort['Fold Enrichment'] *= -1
    data_sort['Term'] = ' '+ data_sort['Term']
    data = data_sort[:top]
    return data

In [None]:
import matplotlib.cm as cm
import matplotlib as mpl

# GeneOntology Dot Plot Generator
font = {'family':'Arial',
        'weight':'normal',
        'size':6}
font_ticks = {'family':'Arial',
              'weight':'normal',
              'size':6}
spectrum_colors = sns.color_palette("coolwarm_r", as_cmap=True)


fig = plt.figure(figsize=(1, 4), dpi=150, facecolor="none")
scatterplot = sns.scatterplot(data=df_plot, x="Fold Enrichment", y="Term", size="Count", hue="FDR", palette='coolwarm_r', legend="brief")

# Custom legends
h, l = scatterplot.get_legend_handles_labels()
plt.legend(h[6:], l[6:], bbox_to_anchor=(1.0, 0.5), loc="upper left", title="Counts", frameon=False, borderaxespad=0., fontsize=6, title_fontsize=6)


# ColorBar
cmap = cm.bwr
vmin = df_plot['FDR'].min()
vmax = df_plot['FDR'].max()
vmm = vmin + vmax
norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)
sm = cm.ScalarMappable(cmap=cmap, norm=norm)
cbar = fig.colorbar(sm, ax=plt.gca(), fraction=0.04, pad=0.1, anchor=(1.0, 0.9), format='%.0e')
cbar.ax.spines['outline'].set_visible(False) # outline remove
cbar.ax.tick_params(size=0, labelsize=6) # remove ticks
cbar.set_label('FDR', rotation=270, size=6)


# Details
plt.title('KEGG', font='Arial', size=8)
plt.xlabel("log2 Fold Enrichment", fontdict=font)
plt.ylabel(None)
plt.xticks(fontproperties=font_ticks)
plt.yticks(fontproperties=font_ticks)

# Add grid lines to both x and y axes with gray color and a lower z-order
plt.grid(True, color='gray', linewidth=0.2, zorder=0)

plt.show()