# Project Report
 
__Project Name__: {{ cookiecutter.project_name }}  
__Dataset Name__: {{ cookiecutter.dataset_name }}  
__Author__: {{ cookiecutter.author_name }}  
__Email__: {{ cookiecutter.email }}  
__Dataset__: {{ cookiecutter.dataset_name }}  
__Created Date__: {% now 'local', '%d/%m/%Y' %}  

## Outline

This is a Differential Binding Analysis workflow for ChIP-Seq data

Our workflow is based on a set of Jupyter Notebooks and [CWL workflows](https://github.com/ncbi/cwl-ngs-workflows-cbb). 
The workflows excuted the analysis using the following tools: 

 * FastQC, for pre-processing quality control
 * Trimmomatic, for reads trimming
 * BWA, for reads alignment
 * Phantompeakqualtools, for ChIP-Seq quality control
 * MACS2, for peak calling
 * IDR, for Irreproducible Discovery Rate (IDR) peak classification
 * Homer, for peak annotation
 * Diffbind for Differential binding analysis 

### Workflow  steps


 1. [Pre-processing QC](#1.-Pre-processing-QC)
 2. [Trimming](#2.-Trimming)
 3. [Alignments and Quantification](#3.-Alignments-and-Quantification)
     - [Alignment Quality Control](#3.1.-Alignment-QC)
 4. [Peak Calling](#4.-Peak-Calling)
     - [Irreproducible Discovery Rate](#4.1.-Irreproducible-Discovery-Rate) 
 5. [Differential binding Detection](#5.-Differential-binding-Detection)

### Requirements

 1. PM4NGS
 2. Poppler (https://poppler.freedesktop.org/)
 3. Full list of requirements *requirements/conda-env-dependencies.yaml*
{% if cookiecutter.use_docker == 'y' %}
 4. Docker
{% endif %}   

In [None]:
%run ../config/init.py
from pm4ngs.jupyterngsplugin.markdown.fastqc import fastqc_table, fastqc_trimmomatic_table
from pm4ngs.jupyterngsplugin.markdown.alignment import alignment_table
from pm4ngs.jupyterngsplugin.markdown.alignment import reads_distribution_plot
from pm4ngs.jupyterngsplugin.markdown.phantompeakqualtools import qc_table
from pm4ngs.jupyterngsplugin.markdown.chipseq import peak_calling_table_with_qc
from pm4ngs.jupyterngsplugin.markdown.chipseq import idr_table
from pm4ngs.jupyterngsplugin.markdown.chipseq import diffbind_table
from pm4ngs.jupyterngsplugin.markdown.utils import hide_code_str
from pm4ngs.jupyterngsplugin.markdown.utils import info_table
from pm4ngs.jupyterngsplugin.utils.notebook import save_2_html
HTML(hide_code_str())

## 1. Pre-processing QC

In [None]:
data_dir = os.path.join(DATA, DATASET)
os.chdir(data_dir)
samples = [ f.replace('.fastq.gz','') for ds,dr,fs in os.walk('./') for f in fs if f.endswith('.fastq.gz')]
samples.sort()

os.chdir(NOTEBOOKS)
display(Markdown(info_table('01 - Pre-processing QC', data_dir)))

str_msg = '#### FastQC report\n'
display(Markdown(str_msg))
samples_data, str_msg =  fastqc_table(samples, 
                                      os.path.join(DATA, DATASET),
                                      os.path.join(DATA, DATASET))
display(Markdown(str_msg))
del str_msg

## 2. Trimming

In [None]:
os.chdir(NOTEBOOKS)
result_dir = os.path.join(RESULTS, DATASET, 'trimmomatic')
display(Markdown(info_table('02 - Samples trimming', result_dir)))

samples_data, str_msg =  fastqc_trimmomatic_table(samples_data, samples, result_dir)
display(Markdown(str_msg))
del str_msg

## 3. Alignments and Quantification

In [None]:
os.chdir(NOTEBOOKS)
result_dir = os.path.join(RESULTS, DATASET, 'alignments')
display(Markdown(info_table('03 - Alignments and Quantification', result_dir)))
display(Markdown('### Reference genome\n**{{ cookiecutter.genome_name }}**\n\n'))
sample_table_file = os.path.join(DATA, DATASET, 'sample_table.csv')
sample_table = pandas.read_csv(sample_table_file, keep_default_na=False)
samples = sample_table['sample_name'].unique()
{% if cookiecutter.sequencing_technology == 'paired-end' %}
method = 'BWA_paired'
{% else %}
method = 'BWA_single'
{% endif %}

samples_data, str_msg =  alignment_table(samples_data, samples, result_dir, method) 
display(Markdown(str_msg))
del str_msg

### Distribution of Reads

In [None]:
reads_distribution_plot(samples_data, samples,(18,6), method)
plt.show()
plt.close()

## 3.1. Alignment QC

### Phantompeakqualtools

See https://github.com/kundajelab/phantompeakqualtools for more detailed info about the Phantompeakqualtools parameters

In [None]:
str_msg =  qc_table(samples, os.path.join(RESULTS, DATASET, 'alignments'), 450, 450)
display(Markdown(str_msg))
del str_msg

## 4. Peak Calling
### Phantompeakqualtools savp PDF plots with MACS2 identified peaks for pooled samples

In [None]:
img_size = 250
os.chdir(NOTEBOOKS)
result_dir = os.path.join(RESULTS, DATASET, 'peak-calling')
display(Markdown(info_table('04 - Peak Calling and IDR', result_dir)))

alignment_path = os.path.join(RESULTS, DATASET, 'alignments')
str_msg = peak_calling_table_with_qc(sample_table, alignment_path, result_dir, img_size, img_size)
display(Markdown(str_msg))
del str_msg

## 4.1. Irreproducible Discovery Rate

In [None]:
os.chdir(NOTEBOOKS)
img_size = 450
str_msg = idr_table(sample_table, os.path.join(RESULTS, DATASET, 'idr'), img_size, img_size)
display(Markdown(str_msg))
del str_msg

## 5. Differential binding Detection

In [None]:
os.chdir(NOTEBOOKS)
result_dir = os.path.join(RESULTS, DATASET, 'diffbind')
display(Markdown(info_table('05 - Differential binding Detection', result_dir)))

img_size = 450
result_dir = os.path.join(RESULTS, DATASET, 'diffbind')
str_msg = diffbind_table(sample_table, result_dir, img_size, img_size)
display(Markdown(str_msg))
del str_msg

In [None]:
os.chdir(NOTEBOOKS)
save_2_html("00 - Project Report.ipynb")