In [None]:
# Necessary imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from matplotlib.collections import PatchCollection
from matplotlib.patches import Rectangle
from string import ascii_uppercase
from IPython.display import display

In [None]:
# Read file
exc = pd.read_excel("Data_figureB.xlsx")

casenames = np.sort(exc["case"].unique())
case_translation = pd.DataFrame({"case": casenames, "ID": list(ascii_uppercase[:casenames.size])})

display(case_translation)

In [None]:
# Constants
chromosome_lengths = [249250621,243199373,198022430,191154276,180915260,171115067,159138663,146364022,141213431,135534747,135006516,133851895,115169878,107349540,102531392,90354753,81195210,78077248,59128983,63025520,48129895,51304566]
#chromosome_lengths = np.ceil(np.array(chromosome_lengths) / 60000) * 60000
total_chromosome_length = sum(chromosome_lengths)
chromosome_starts = np.cumsum(np.concatenate(([0], chromosome_lengths)))

colors = {
    "gain": {True: "Red",
            False: "LightPink"
            },
    "loss": {True: "Blue",
             False: "LightBlue"
            }
}



def render_aberrations(axes, cases, labels, effectlimit=3, boxheight=0.8):
    # Data should be a list of cases, where each case is a list of tuples [chromosome, start, end, gain/loss, effect]
    
    # Generate effect patches
    patches = []
    for i, case in enumerate(cases):
        base = i + (1-boxheight)/2
        
        for chromosome, inc_start, inc_end, gainorloss, effect in case:
            color = colors[gainorloss][effect>effectlimit]
            start = chromosome_starts[chromosome] + inc_start
            end   = chromosome_starts[chromosome] + inc_end
            patches.append(Rectangle(xy=(start, base), width=end-start, height=boxheight, facecolor=color))
            
    # Render patches and set plot limits
    ax.add_collection(PatchCollection(patches, match_original=True, antialiased=False))
    ax.set_xlim((0, total_chromosome_length))
    ax.set_ylim((0, len(cases)))

    ax.set_yticks(np.arange(0.5, len(cases)+1))
    ax.set_yticklabels(labels)

    # Add ticks to chromosome boundaries and add label inbetween ticks
    ax.set_xticks(chromosome_starts)
    ax.set_xticklabels([])
    ax.tick_params(axis='x', which='major', length=15)

    ax.set_xticks((chromosome_starts[1:] + chromosome_starts[:-1]) / 2, minor=True)
    ax.set_xticklabels(np.arange(1, 23), minor=True)
    ax.tick_params(axis='x', which='minor', length=0)

    ax.grid(axis='x', linestyle=':')

In [None]:
grouped_data = exc.groupby("case")

labels = []
all_data = []

for case, data in grouped_data:
    labels.append(case_translation.ID.values[case_translation.case == case][0])
    all_data.append(data.loc[:, ["chromosome", "start", "end", "gain/loss", "effect"]].to_numpy())

In [None]:
# Figure setup
fig = plt.figure(figsize=(16,8))
ax = fig.add_subplot(111)

render_aberrations(ax, all_data, labels)

# Frågor:
# - Bör vi runda upp kromosomgränserna till närmsta 'binsize'-tal för att undvika spillover?
# - Hur hanterar vi överlappande fynd? (ex ichor v WC)