In [135]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from pathlib import Path

% matplotlib inline

In [136]:
def summarize_overlap(cellid):
    names = []
    with open(cellid+"_list") as f:
        line = f.readline().strip()
        while line:
            names.append(line)
            line = f.readline().strip()
    n_cells = len(names)
    print("Number of cells:\t", n_cells)

    df = pd.DataFrame(np.zeros([n_cells, n_cells]), index=names, columns=names)
    pairs = pd.DataFrame(columns=['Cell_1_length(uM)', 'Cell_2_length(uM)', '' 'Overlap_length(uM)'])
    n_pairs = 0;
    for i in names:
        for j in names:
            my_file = Path("overlap_stats/"+cellid+"/"+i+"_"+j+".txt")
            if my_file.is_file():
                tp = pd.read_csv(my_file, header=None, index_col=0, delimiter="\t", skiprows=[0,1])
                df.loc[i,j] = round(tp.loc["Total_length(overlap)", 1]/tp.loc["Total_length(input1)", 1]*100, 1)
                df.loc[j,i] = round(tp.loc["Total_length(overlap)", 1]/tp.loc["Total_length(input2)", 1]*100, 1)
                if ((df.loc[i,j]>1) | (df.loc[j,i]>1)):
                    pairs = pairs.append(pd.DataFrame([tp.loc["Total_length(input1)", 1], tp.loc["Total_length(input2)", 1],tp.loc["Total_length(overlap)", 1]], 
                                                      index=pairs.columns, columns=[i+"_"+j]).transpose())
                    

    row_sums = df.sum(axis=1)
    for i in names:
        row_sums[i] = min(100, row_sums[i])

    output = "Summary_"+cellid+".xls"
    writer = pd.ExcelWriter(output, engine='xlsxwriter')
    row_sums = row_sums.to_frame("(%)overlap")
    row_sums.to_excel(writer, sheet_name="Sample-wise")
    df.to_excel(writer, sheet_name="Pair-wise")
    pairs.to_excel(writer, sheet_name="Overlapping_pairs")
    writer.save()
    return [row_sums, df, pairs]

In [137]:
cellid = "17302"
row_sums, df, pairs = summarize_overlap(cellid)

Number of cells:	 107


In [138]:
pairs.sort_values("Overlap_length(uM)", ascending=False).head(20)

Unnamed: 0,Cell_1_length(uM),Cell_2_length(uM),Overlap_length(uM)
028_029,31420.3,33779.3,16253.2
086_093,16056.7,22512.4,9465.14
002_005,15397.8,20851.9,3751.21
003_009,12377.9,15984.5,3641.61
003_014,12377.9,22652.8,3117.3
005_009,20851.9,15984.5,2768.98
018_019,38393.6,40579.8,2688.04
002_009,15397.8,15984.5,2604.9
107_111,24810.3,21758.9,1886.85
088_095,15866.3,12414.6,1880.62
