In [None]:
## Imports
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# Set fonttype: necessary to save PDFs properly
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
matplotlib.rcParams['font.sans-serif'] = "Arial"

In [None]:
## Load results

# Folder containing the results of the axes enrichment analysis
path_results = Path(r"PASTE-FULL-PATH-TO-ANALYSIS-FOLDERs")

# Specify different data-sets (strings have to be part of the file-name)
data_sets = ('mutanthrde1', 'mutantpiwi', 'WT')

# Pixel-size in um
pixel_size = 0.104  # um per pixel

# Folder where plots should be saved
path_save = Path(r'PASTE-FULL-PATH-TO-FOLDER-TO-SAVE-RESULTS')

# Loop over all results (files ending wiht __binned.csv)
df_list = []
for f_results in path_results.glob('*__binned.csv'):

    print(f'\n\n>>> Processing file\n  {f_results}')

    # Check which data-set file corresponds to
    data_set = [substring for substring in data_sets if(substring in f_results.name)] 

    if len(data_set) == 1:
        df_loop = pd.read_csv(f_results)
        df_loop['dist_bin_um'] = df_loop['dist_bin']*pixel_size
        df_loop['file'] = f_results.name
        df_loop['data_set'] = data_set[0]
        df_list.append(df_loop)
    else:
        print(f'   !!! File could not be assigned to a data-set. {data_set}')

df_all = pd.concat(df_list)

In [None]:
## PLOT RESULTS

# >>> Combined line-plots with mean +/- standard deviation
plt.figure(figsize=(5, 4))
ax = sns.lineplot(x="dist_bin_um", y="n_rna_bin", hue='data_set', data=df_all)
plt.xlabel('Distance from loop [um]', fontsize=14)
plt.ylabel('# of RNAs/bin', fontsize=14)
ax.tick_params(axis='both', which='major', labelsize=12)
plt.xlim(-50, 100)
plt.tight_layout()

plt.savefig(path_save / 'line_plots_ci.png', dpi=300)
plt.savefig(path_save / 'line_plots_ci.pdf')

# >>> Generate separate plot for each of the specified data-sets

for data_set in data_sets:

    plt.figure(figsize=(5, 4))
    ax = sns.lineplot(x="dist_bin_um", y="n_rna_bin",
                      units="file", estimator=None,
                      data=df_all.query("data_set == @data_set"))
    plt.xlabel('Distance from loop [um]', fontsize=14)
    plt.ylabel('# of RNAs/bin', fontsize=14)
    ax.tick_params(axis='both', which='major', labelsize=12)
    plt.xlim(-50, 100)
    plt.tight_layout()

    plt.savefig(path_save / f'line_plots__{data_set}.png', dpi=300)
    plt.savefig(path_save / f'line_plots__{data_set}.pdf')