In [None]:
%run ../config/init.py

## Setting workdir to `results/{{ cookiecutter.dataset_name }}/trimmomatic`

In [None]:
data_dir = os.path.join(DATA, DATASET)
result_dir = working_dir(os.path.join(RESULTS, DATASET, 'trimmomatic'))
sample_table_file = os.path.join(DATA, DATASET, 'sample_table.csv')
sample_table = pandas.read_csv(sample_table_file, keep_default_na=False)
sample_table.head()

## Trimming and QC samples with Trimmomatic

Trimmomatic options should be modified accordingly with the FastQC report taking into account:

{% if cookiecutter.sequencing_technology is defined and cookiecutter.sequencing_technology == 'paired-end' %}
 * IlluminaClip: TruSeq3-PE.fa:2:30:10
{% else %}
 * IlluminaClip: TruSeq3-SE.fa:2:30:10
{% endif %}
 * Minlen: 25
 * Avgqual: 30
 * Leading: 30
 * Trailing: 30
 
For more info about [Trimmomatic](http://www.usadellab.org/cms/?page=trimmomatic)

### More options from the Trimmomatic CWL workflow
```
       [--avgqual AVGQUAL] 
       [--crop CROP]       
       [--headcrop HEADCROP]
       [--illuminaClip ILLUMINACLIP]
       [--leading LEADING]
       [--maxinfo MAXINFO]
       [--minlen MINLEN]
       [--phred PHRED]
       [--tophred33]
       [--tophred64]
       [--trailing TRAILING]
```

In [None]:
log_file = 'trimming.log'
# Edit these values accordingly with the FastQC report and Trimmomatic path for adapters

MINLEN = 25
AVGQUAL = 30
LEADING = 30
TRAILING = 30
{% if cookiecutter.sequencing_technology is defined and cookiecutter.sequencing_technology == 'paired-end' %}
TRIMMOMATIC_ADAPTER = 'TruSeq3-PE.fa:2:30:10'

trimming_yml = {
    'threads': 2,
    'illuminaClip': os.path.join(TRIMMOMATIC_ADAPTERS, TRIMMOMATIC_ADAPTER),
    'minlen': MINLEN,
    'avgqual': AVGQUAL,
    'leading': LEADING,
    'trailing': TRAILING,
    'input_files': []
}
for i, r in sample_table.iterrows():
    files = r['file'].split('|')
    r1 = os.path.join(DATA, DATASET, files[0])
    r2 = os.path.join(DATA, DATASET, files[1])
    if not os.path.exists(files[0]) or \
        not os.path.exists(files[1]):
        trimming_yml['input_files'].append([
            {'class': 'File', 'path': r1},
            {'class': 'File', 'path': r2}])
{% else %}
TRIMMOMATIC_ADAPTER = 'TruSeq3-SE.fa:2:30:10'

trimming_yml = {
    'threads': 2,
    'illuminaClip': os.path.join(TRIMMOMATIC_ADAPTERS, TRIMMOMATIC_ADAPTER),
    'minlen': MINLEN,
    'avgqual': AVGQUAL,
    'leading': LEADING,
    'trailing': TRAILING,
    'input_files': []
}

for i, r in sample_table.iterrows():
    f = os.path.join(DATA, DATASET, r['file'])
    if not os.path.exists(r['file']):
        trimming_yml['input_files'].append({'class': 'File', 'path': f})
{% endif %}

if trimming_yml['input_files']:
    write_to_yaml(trimming_yml, 'trimming.yml')  
{% if cookiecutter.sequencing_technology is defined and cookiecutter.sequencing_technology == 'paired-end' %}
    cmd_header = '{} {}/pre-processing/trimming-qc-pe.cwl trimming.yml > {} 2>&1 &'.format(
        CWLRUNNER, CWLWORKFLOWS, log_file)
{% else %}
    cmd_header = '{} {}/pre-processing/trimming-qc-se.cwl trimming.yml > {} 2>&1 &'.format(
        CWLRUNNER, CWLWORKFLOWS, log_file)
{% endif %}
    run_command(cmd_header)

### Checking command output
Execute next cell until it prints: **Run completed**

In [None]:
check_cwl_command_log(log_file)
