### Modules

In [None]:
# basic
import os, sys, glob, pybedtools
import numpy as np, pandas as pd
from Bio import SeqIO
import matplotlib.pyplot as plt, seaborn as sns
from matplotlib.cm import ScalarMappable
from matplotlib.ticker import FormatStrFormatter
from scipy.stats import fisher_exact

### Figure 6A

In [None]:
### m6A peaks 
pred = pd.read_csv('../data/hepg2_m6atm.csv')
pred_m6a = pred[pred['m6a'].str.contains('yes')]
m6a_sites = pred_m6a.gn_site.tolist()

peg10_df = pred.loc[pred['name2'] == 'PEG10',['chrom', 'gn_site', 'gn_pos', 'gn_pos_1', 'name2', 'probability', 'strand']]
peg10_df['m6a'] = peg10_df.gn_site.isin(m6a_sites)
peg10_df = peg10_df.loc[:,['chrom', 'gn_pos', 'gn_pos_1', 'name2', 'probability', 'strand', 'm6a']]
peg10_bedtools = pybedtools.BedTool.from_dataframe(df = peg10_df)

In [None]:
### eCLIP data
rbp_bed_hepg2_encode = pd.read_csv('../data/rbp_hepg2.bed')
rbp_list = sorted(list(set(rbp_bed_hepg2_encode.name)))

pval_list = []
or_list = []
val_list = []
for i in rbp_list:
    
    # eCLIP data
    eclip_bed = rbp_bed_hepg2_encode[rbp_bed_hepg2_encode['name'].isin([i])]
    eclip_bed = eclip_bed.loc[:,['chrom', 'chromStart', 'chromEnd', 'name']]
    eclip_bedtools = pybedtools.BedTool.from_dataframe(df = eclip_bed)
    
    # overlapping
    overlapping = peg10_bedtools.intersect(eclip_bedtools, wao = True)
    overlapping = overlapping.to_dataframe()
    overlapping = overlapping[~overlapping.loc[:,['chrom', 'start', 'end']].duplicated()]
    
    # frequency table
    val1 = overlapping[(overlapping['thickStart'] == True) & (overlapping['blockSizes'] == i)].shape[0]
    val2 = overlapping[(overlapping['thickStart'] == True) & (overlapping['blockSizes'] == '.')].shape[0]
    val3 = overlapping[(overlapping['thickStart'] == False) & (overlapping['blockSizes'] == i)].shape[0]
    val4 = overlapping[(overlapping['thickStart'] == False) & (overlapping['blockSizes'] == '.')].shape[0]
    
    # fisher test
    table = np.array([[val1, val2], [val3, val4]])
    res = fisher_exact(table, alternative = 'greater')
    
    pval_list.append(res[1])
    or_list.append(res[0])
    val_list.append([val1, val2, val3, val4])

In [None]:
val_list_zip = list(zip(*val_list))
result_table = pd.DataFrame({'rbp': rbp_list,
                             'odds': or_list,
                             'p_val': pval_list,
                             'val1': val_list_zip[0],
                             'val2': val_list_zip[1],
                             'val3': val_list_zip[2],
                             'val4': val_list_zip[3]})

In [None]:
##### main
sns.set_theme(style = 'whitegrid') # theme
tab_color = sns.color_palette() # color palette
fig, ax = plt.subplots(figsize = (12, 4)) # figure size

### plot
scatter_table = result_table.dropna()
scatter = plt.scatter(scatter_table['odds'], scatter_table['val1'], c = scatter_table['p_val'], s = 80, cmap = 'viridis_r', alpha = 0.8, 
                      linewidth = 1, edgecolors = 'black')

# cmap
pval = scatter_table['p_val']
data_color = [x/max(pval) for x in pval]
my_cmap = plt.cm.get_cmap('viridis_r')
colors = my_cmap(data_color)

sm = ScalarMappable(cmap = my_cmap, norm = plt.Normalize(vmin = min(pval), vmax = max(pval)))
sm.set_array([])

cbar = plt.colorbar(sm, pad = 0.01, format = FormatStrFormatter('%g'))
cbar.ax.tick_params(labelsize = 12)
cbar.set_label('$\\mathit{P}$ value', rotation = 270, labelpad = 30, fontsize = 20)
cbar.set_ticks(ticks = [0.05, 0.1, 0.2, 0.5])

# axis
ax.set_xlabel('Odds ratio', fontsize = 16)
ax.set_ylabel('Number of overlapping', fontsize = 16)
ax.tick_params(labelsize = 14)

### Figure S7

In [None]:
### pie chart
sns.set_theme(style = 'white') # theme
tab_color = sns.color_palette('Set2') # color palette
fig, ax = plt.subplots(figsize = (8, 8)) # figure size

y1 = result_table[result_table.isna().any(axis=1)].shape[0]
y2 = scatter_table.shape[0]

labels = ['No overlap', 'â‰¥1 overlap']
sizes = [y1, y2]

ax.pie(sizes, labels = labels, explode = [0, 0.05], autopct = lambda x: int(np.round(x/100*sum(sizes), 0)),
       shadow = False, startangle = 80, labeldistance = None, colors = [tab_color[0], tab_color[6]], 
       textprops={'fontsize': 26, 'weight': 'bold'},
       wedgeprops={'linewidth': 0.5})

fig.legend(labels, loc = 'upper right', fontsize = 20)