In [1]:
import pathlib as pl

import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.use('Agg')

desc="""
This notebook uses the output of the rule
--- 75_assm_stats.smk::aggregate_quast_reports
as input, and produces panels for figure 1.
"""

print(desc)

exec_dir = pl.Path('.').resolve(strict=True)
wd_dir = exec_dir
out_dir = pl.Path('/home/local/work/data/sig_chrY/paper/output/figures').resolve(strict=True)

print('Execution directory: ', exec_dir)
print('Working directory: ', wd_dir)
print('Output directory: ', out_dir)
print('=================================')

quast_table_dir = pl.Path(
    '/home/local/work/data/sig_chrY/paper/stats/quast_reports'
).resolve(strict=True)

table_file = quast_table_dir / pl.Path('SAMPLES.HIFIRW.ONTUL.na.chrY.quast-report.tsv')
table_file = table_file.resolve(strict=True)

df = pd.read_csv(table_file, sep='\t', header=0)
drop_samples = [
    'HG02666',
    'HG01457',
    'NA19384',
    'NA18989',
    'NA24385'
]
df = df.loc[~df['sample'].isin(drop_samples), :].copy().reset_index(inplace=False, drop=True)

hprc_samples = """
HG03579
HG01952
HG01243
HG00673
HG03492
HG01358
HG01258
HG02572
HG02717
HG01928
HG01106
HG00621
HG01109
HG02486
HG03471
"""
hprc_samples = hprc_samples.strip().split()

hprc = df.loc[df['sample'].isin(hprc_samples), :].copy()
hgsvc = df.loc[~df['sample'].isin(hprc_samples), :].copy()

high_cov = [
    'HG01890',
    'HC02666',
    'HC01457',
    'HC18989',
    'HC19384',
    'NA19317',
    'NA19347',
    'HG00358'
]

hgsvc_hc = hgsvc.loc[hgsvc['sample'].isin(high_cov), :].copy()
hgsvc_nm = hgsvc.loc[~hgsvc['sample'].isin(high_cov), :].copy()

fig, ax = plt.subplots(figsize=(8,8))
fig_name = 'fig1_panel_assm_ng50-v-length'

ax.scatter(
    hprc['assembly_length_bp'] / 1e6,
    hprc['contig_NG50'] / 1e6,
    s=50,
    c='royalblue',
    label='HPRC',
    marker='o'
)

ax.scatter(
    hgsvc_hc['assembly_length_bp'] / 1e6,
    hgsvc_hc['contig_NG50'] / 1e6,
    s=75,
    c='limegreen',
    label='HGSVC[hc]',
    marker='x'
)

ax.scatter(
    hgsvc_nm['assembly_length_bp'] / 1e6,
    hgsvc_nm['contig_NG50'] / 1e6,
    s=50,
    c='limegreen',
    label='HGSVC',
    marker='o'
)

ax.scatter(
    [62460029 / 1e6],
    [62460029 / 1e6],
    s=50,
    c='red',
    label='T2T-Y',
    marker='s'
)

ax.plot([0, 90], [0, 90], ls='dotted', c='grey', lw=1, zorder=0)
ax.set_xlim(35, 90)
ax.set_ylim(0, 70)

ax.set_title('chrY assemblies', fontsize=18)
ax.set_xlabel('Assembly length (Mbp)', fontsize=20)
ax.set_ylabel('Contig NG50 (Mbp)', fontsize=20)
ax.tick_params(axis='both', which='major', labelsize=12)
ax.legend(loc='best', fontsize=18)

plt.savefig(
    out_dir / pl.Path(f'{fig_name}.png'),
    dpi=150, bbox_inches='tight', transparent=False
)
plt.savefig(
    out_dir / pl.Path(f'{fig_name}.pdf'),
    bbox_inches='tight', transparent=False
)

#################

fig, ax = plt.subplots(figsize=(8,8))
fig_name = 'fig1_panel_nctg-v-length'

ax.scatter(
    hprc['assembly_length_bp'] / 1e6,
    hprc['contigs_num'],
    s=50,
    c='royalblue',
    label='HPRC',
    marker='o'
)

ax.scatter(
    hgsvc_hc['assembly_length_bp'] / 1e6,
    hgsvc_hc['contigs_num'],
    s=75,
    c='limegreen',
    label='HGSVC[hc]',
    marker='x'
)

ax.scatter(
    hgsvc_nm['assembly_length_bp'] / 1e6,
    hgsvc_nm['contigs_num'],
    s=50,
    c='limegreen',
    label='HGSVC',
    marker='o'
)

ax.scatter(
    [62460029 / 1e6],
    [1],
    s=50,
    c='red',
    label='T2T-Y',
    marker='s'
)

#ax.plot([0, 90], [0, 90], ls='dotted', c='grey', lw=1, zorder=0)
# ax.set_xlim(35, 90)
# ax.set_ylim(0, 70)
ax.text(65, 205, 'NA19239', {'fontsize': 12})
ax.text(80, 120, 'HG00512', {'fontsize': 12})
ax.set_title('chrY assemblies', fontsize=18)
ax.set_xlabel('Assembly length (Mbp)', fontsize=20)
ax.set_ylabel('Contigs (N)', fontsize=20)
ax.tick_params(axis='both', which='major', labelsize=12)
ax.legend(loc=2, fontsize=18)

plt.savefig(
    out_dir / pl.Path(f'{fig_name}.png'),
    dpi=150, bbox_inches='tight', transparent=False
)
plt.savefig(
    out_dir / pl.Path(f'{fig_name}.pdf'),
    bbox_inches='tight', transparent=False
)



This notebook uses the output of the rule
--- 75_assm_stats.smk::aggregate_quast_reports
as input, and produces panels for figure 1.

Execution directory:  /home/local/work/code/github/project-male-assembly/notebooks/plotting
Working directory:  /home/local/work/code/github/project-male-assembly/notebooks/plotting
Output directory:  /home/local/work/data/sig_chrY/paper/output/figures
