In [None]:
import numpy
import scipy
import matplotlib
import pandas
import seaborn
import pyBigWig
from tqdm import tqdm
import matplotlib.ticker as ticker
from matplotlib.patches import Rectangle

In [None]:
#python libraries 
from glob import glob

### 3rd, need to install through conda 
import pylab
import numpy as np
import seaborn as sns
import pandas as pd

##basic functions for analysis 

def plotBdg(ax, chrom, start, end, data, pos="r", neg="grey"):
    nds = []
    vmax = None
    vmin = None
    for d in data:
        if d[0] == chrom:
            s, e, v = d[1], d[2],d[3]
            if s >= start and e <= end:
                if v > 0:
                    ax.plot([s,e],[v,v], color=pos, linewidth=0.1)
                    ax.fill_between( [s,e], 0, [v,v], color=pos, alpha=1  )
                else:
                    ax.plot([s,e],[v,v], color=neg, linewidth=0.1)
                    ax.fill_between( [s,e], [v,v], 0, color=neg, alpha=0.8  )
                if vmax is None or v > vmax: vmax = v
                if vmin is None or v < vmin: vmin = v
    ax.xaxis.set_major_formatter(ticker.EngFormatter())
    ax.set_xlim([start, end])
    sns.despine(top=True, right=True, bottom=True)
    #ax.set_ylim([vmin, vmax])
    return ax

def readBg(f):
    data = [] #store the bedGraph data
    cs = {}  #store the chromosome size
    for line in open(f):
        line = line.split("\n")[0].split("\t")
        chrom = line[0]
        start = int(line[1])
        end = int(line[2])
        v = float(line[3])
        data.append( [chrom, start, end, v] )
        if chrom not in cs:
            cs[chrom] = [ start, end ]
        else:
            if start < cs[chrom][0]: 
               cs[chrom][0]=start
            if end > cs[chrom][1]:
               cs[chrom][1]=end
    return data, cs

def getBins(f):
    ds = {}
    for line in open(f):
        line = line.split("\n")[0].split("\t")
        chrom = line[0]
        if chrom not in ds:
            ds[chrom] = {}
        k = "|".join(line[:-1])
        v = float(line[-1])
        ds[chrom][k] = v
    for chrom, vs in ds.items():
        ds[chrom] = pd.Series(vs)
    return ds

In [None]:
repeata = getBins("/data/GM12878_supercoiling_data.repeat1.100k.bg")
repeatb = getBins("/data/GM12878_supercoiling_data.repeat2.100k.bg")

#collect the all data
sas = []
sbs = []
for i, c in enumerate(["chr%s"% i for i in range(1,23)]):
    sa = repeata[c]
    sb = repeatb[c]
    ss = sa.index.intersection(sb.index)
    sa = sa[ss]
    sb = sb[ss]
    sas.extend( list(sa.values) )
    sbs.extend( list(sb.values) )
sas, sbs = np.array(sas), np.array(sbs)

#plot 
fig, ax = pylab.subplots(figsize=(3, 2.8)) #3.2, 2.2 size are perfect for 1/6 A4 page 
#ax.scatter(sas,sbs,s=1)
sns.histplot(x=sas, y=sbs, bins="auto", cmap="Blues", kde=True, ax=ax, binwidth = 0.01)
#ax.set_title("%s 100kb bins PCC: %.3f"%(len(sas), np.corrcoef(sas,sbs)[0][1]))
ax.set_xlim([-0.6, 0.6])
ax.set_ylim([-0.6, 0.6])
ax.xaxis.set_ticks(np.arange(-0.5, 0.6, 0.5))
ax.yaxis.set_ticks(np.arange(-0.5, 0.6, 0.5))
ax.axvline(x=0, color="gray", linewidth=0.5)
ax.axhline(y=0, color="gray", linewidth=0.5)
ax.set_xlabel("Repeat 1", fontsize= 11)
ax.set_ylabel("Repeat 2", fontsize= 11)
ax.text(0.1, 0.85, "r = %.2f"% np.corrcoef(sas,sbs)[0][1], transform=ax.transAxes)
sns.despine(top=True, right=True)

pylab.tight_layout()
#pylab.show()

pylab.savefig("figure1B.pdf")