# Parsing NovaSeq Interop Data

Goal of Novaseq QC tool is to automatically parse Novaseq QC Interop data without manual. Overall goal is for longitudinal study; provide users ability to search for failed data with associated reagent ID's and info (RIF). 

### User Input 
1. FCID
2. User Lab ID

### Output
1. Report
2. Push data into a database

### Acceptance Criteria

#### Q30
* S4 1.5 - 85%
* S2 1.0 - 75%
* S2 1.5 - 85%

#### Yield (G)
* S4 1.5 - 2400
* S2 1.0 - 1000
* S2 1.5 - 1000

# Functions

In [1]:
# Packages to Import
import pytz
import dateutil
import pandas as pd
import numpy as np
import csv
import math
import argparse
import re
import glob
import subprocess
from interop import py_interop_run
from interop import py_interop_metrics
from interop import py_interop_plot
from interop import py_interop_comm
from interop import py_interop_table
from interop import py_interop_run_metrics

def format_value(val):
    if hasattr(val, 'mean'):
        return val.mean()
    else:
        return val

from interop import py_interop_run_metrics, py_interop_run, py_interop_summary


def find_flowcell(fcid, paths = '/ghds/ivd/raw'):
    paths = list(paths.split(','))
    for path in paths:
        cmd = "ls -d " + path + "/*" + fcid
        ps = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        o, e = ps.communicate()
        output = o.decode("utf-8")
        fcid_path = output.split('\n')[0]
        if "No such file" in fcid_path:
            next
        else:
            return fcid_path
    return (0)

# def interop(run_folder):    
#     run_metrics = py_interop_run_metrics.run_metrics()
#     valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
#     py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)
#     run_folder = run_metrics.read(run_folder, valid_to_load)
#     summary = py_interop_summary.run_summary()
#     py_interop_summary.summarize_run_metrics(run_metrics, summary)
    
def summary(run_folder):
    run_metrics = py_interop_run_metrics.run_metrics()
    valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
    py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)
    run_folder = run_metrics.read(run_folder, valid_to_load)
    summary = py_interop_summary.run_summary()
    py_interop_summary.summarize_run_metrics(run_metrics, summary)
    
    columns = ( ('Yield Total (G)', 'yield_g'), ('Projected Yield (G)', 'projected_yield_g'), ('% Aligned', 'percent_aligned'))
    rows = [('Non-Indexed Total', summary.nonindex_summary()), ('Total', summary.total_summary())]
    d = []
    for label, func in columns:
        d.append( (label, pd.Series([getattr(r[1], func)() for r in rows], index=[r[0] for r in rows])))
    df = pd.DataFrame.from_dict(d)

    return df

def read_metrics(run_folder):
    run_metrics = py_interop_run_metrics.run_metrics()
    valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
    py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)
    run_folder = run_metrics.read(run_folder, valid_to_load)
    summary = py_interop_summary.run_summary()
    py_interop_summary.summarize_run_metrics(run_metrics, summary)
    
    read = 0
    columns = ( ('Lane', 'lane'), ('Tiles', 'tile_count'), ('Density (K/mm2)', 'density'))
    rows = [summary.at(read).at(lane) for lane in range(summary.lane_count())]
    d = []

    for label, func in columns:
        d.append( (label, pd.Series([format_value(getattr(r, func)()) for r in rows])))
    df = pd.DataFrame.from_dict(d)
    
    return df

def index(run_folder):
    run_metrics = py_interop_run_metrics.run_metrics()
    valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
    py_interop_run_metrics.list_index_metrics_to_load(valid_to_load)
    run_folder = run_metrics.read(run_folder, valid_to_load)
    summary = py_interop_summary.index_flowcell_summary()
    py_interop_summary.summarize_index_metrics(run_metrics, summary)

    
    columns = ( ('Index Number', 'id'), ('Sample Id', 'sample_id'), ('Project', 'project_name'), ('Index 1 (I7)', 'index1'), ('Index 2 (I5)', 'index2'), ('% Reads Identified (PF)', 'fraction_mapped'))
    lane_summary = summary.at(0)

    d = []
    for label, func in columns:
        d.append( (label, pd.Series([getattr(lane_summary.at(i), func)() for i in range(lane_summary.size())], index=[lane_summary.at(i).id() for i in range(lane_summary.size())])))
    df = pd.DataFrame.from_items(d)
    print("\n".join([method for method in dir(lane_summary) if not method.startswith('_') and method not in ("set", "push_back", "reserve", "this", "resize", "clear", "sort")]))
    return df

def format_value(val):
    if hasattr(val, 'mean'):
        return val.mean()
    else:
        return val


def format_value_median(val):
    if hasattr(val, 'median'):
        return val.median()
    else:
        return val


def rmnan(input):
    if math.isnan(input):
        return ""
    else:
        return input

def surface(summary, input=None, read_index=0, lane_index=0, check_type=None):
    if check_type == "tile_count":
        return format_value(getattr(summary.at(read_index).at(lane_index), "tile_count")())
    elif check_type == "error_rate":
        return format_value(getattr(summary.at(read_index).at(lane_index), "error_rate")())
    elif check_type == "error_rate_75":
        return format_value(getattr(summary.at(read_index).at(lane_index), "error_rate_75")())
    elif check_type == "error_rate_100":
        return format_value(getattr(summary.at(read_index).at(lane_index), "error_rate_100")())
    elif check_type == "error_rate_35":
        return format_value(getattr(summary.at(read_index).at(lane_index), "error_rate_35")())
    elif check_type == "percent_gt_q30":
        return format_value_median(getattr(summary.at(read_index).at(lane_index), "percent_gt_q30")())
    elif check_type == "density":
        return round(format_value(getattr(summary.at(read_index).at(lane_index), "density")()) / (1e+3))
    elif check_type == "percent_pf":
        return round(format_value(getattr(summary.at(read_index).at(lane_index), "percent_pf")()), 2)
    elif check_type == "density_pf":
        return format_value(getattr(summary.at(read_index).at(lane_index), "density_pf")())
    elif check_type == "phasing":
        return format_value_median(getattr(summary.at(read_index).at(lane_index), "phasing")())
    elif check_type == "cluster_count_pf":
        return format_value(getattr(summary.at(read_index).at(lane_index), "cluster_count_pf")())
    elif check_type == "prephasing":
        return format_value(getattr(summary.at(read_index).at(lane_index), "prephasing")())
    elif check_type == "first_cycle_intensity":
        return format_value(getattr(summary.at(read_index).at(lane_index), "first_cycle_intensity")())
    elif check_type == "yield_g":
        return round(format_value(getattr(summary.at(read_index).at(lane_index), "yield_g")()), 1)
    elif check_type == "reads":
        return round(format_value(getattr(summary.at(read_index).at(lane_index), 'reads')()) / (1e+6), 2)
    elif check_type == "reads_pf":
        return round(format_value(getattr(summary.at(read_index).at(lane_index), "reads_pf")()) / (1e+6), 2)
    elif check_type == "percent_aligned":
        return format_value(getattr(summary.at(read_index).at(lane_index), "percent_aligned")())
    elif check_type == "lane":
        return format_value(getattr(summary.at(read_index).at(lane_index), "lane")())
    else:
        return ""

def interop(fcid_path):
    run_info = py_interop_run.info()
    run_info.read(fcid_path)
    run_metrics = py_interop_run_metrics.run_metrics()
    summary = py_interop_summary.run_summary()
    valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
    py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)
    run_folder_path = fcid_path

    try:
        run_metrics.read(run_folder_path, valid_to_load)
    except Exception as ex:
        print("Skipping - cannot read RunInfo.xml: %s - %s" % (run_folder_path, str(ex)))

    py_interop_summary.summarize_run_metrics(run_metrics, summary)

    read_lengthrows = [
        ("Read %s%d" % ("(I)" if summary.at(i).read().is_index() else " ", summary.at(i).read().number()),
         summary.at(i).summary()) for i in range(summary.size())]
    read_title = [i[0] for i in read_lengthrows]
    columns_read = (
        ("Lane", "lane"), ('Tiles', 'tile_count'), ("Density (K/mm2)", 'density'), ("Cluster PF(%)", "percent_pf"),
        ("Phas/Prephas", "phasing"),
        ("Reads(M)", "reads"), ("Reads PF(M)", "reads_pf"), ("%>=Q30", "percent_gt_q30"),
        ("Yield(G)", "yield_g"), ("Aligned(%)", "percent_aligned"), ("Error Rate", "error_rate"),
        ("Error Rate 35 cycle(%)", "error_rate_35"), ("Error Rate 75 cycle(%)", "error_rate_75"),
        ("Error Rate 100 cycle(%)", "error_rate_100"), ("Intensity Cycle 1", "first_cycle_intensity"))

    output = pd.DataFrame()

    for read_id in range(len(read_title)):
        d_read = []

        for lane_id in range(summary.lane_count()):
            lane_read = []
            for label, func in columns_read:
                add_info = pd.Series(surface(summary, read_index=read_id, lane_index=lane_id, check_type=func))
                if label == 'Tiles':
                    lane_read.append((label, 2 * float(add_info)))
                else:
                    lane_read.append((label, add_info))
            lane_read_tmp = pd.DataFrame.from_dict(lane_read)
            d_read.append(lane_read_tmp)


        result = pd.concat(d_read)

        # tranpose data and combine
        header = np.transpose(d_read[0]).iloc[0]
        start = 0 
        end = len(header)
        df = pd.DataFrame(columns=header)

        for lane_id in range(summary.lane_count()):
            tmp = np.transpose(result[start:end])
            tmp = tmp.iloc[-1]
            tmp = np.transpose(pd.DataFrame(tmp))
            tmp.columns = header
            df = pd.concat([df, tmp], ignore_index=True)
            start = start + len(header)
            end = end + len(header)
        df['Read'] = read_title[read_id]

        output = pd.concat([output, df], ignore_index=True)

    output['Date'] = run_info.date()
    output['Flowcell'] = run_info.flowcell_id()
    output = output[['Date', 'Flowcell', 'Read', *header]]
    
    return output

# How to Use

In [2]:
fcid = 'H2YKTBGXK'
folder = find_flowcell(fcid)
test = interop(folder)
test

Unnamed: 0,Date,Flowcell,Read,Lane,Tiles,Density (K/mm2),Cluster PF(%),Phas/Prephas,Reads(M),Reads PF(M),%>=Q30,Yield(G),Aligned(%),Error Rate,Error Rate 35 cycle(%),Error Rate 75 cycle(%),Error Rate 100 cycle(%),Intensity Cycle 1
0,211214,H2YKTBGXK,Read 1,0 1 dtype: int64,432,0 319 dtype: int64,0 64.35 dtype: float64,0 0.363695 dtype: float64,0 207.01 dtype: float64,0 133.13 dtype: float64,0 75.505409 dtype: float64,0 19.9 dtype: float64,0 0.0 dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 5716.884277 dtype: float64
1,211214,H2YKTBGXK,Read 1,0 2 dtype: int64,432,0 313 dtype: int64,0 65.45 dtype: float64,0 0.353739 dtype: float64,0 203.18 dtype: float64,0 132.89 dtype: float64,0 76.559868 dtype: float64,0 19.9 dtype: float64,0 0.0 dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 5673.513672 dtype: float64
2,211214,H2YKTBGXK,Read 1,0 3 dtype: int64,432,0 317 dtype: int64,0 63.42 dtype: float64,0 0.35331 dtype: float64,0 205.56 dtype: float64,0 130.28 dtype: float64,0 74.534813 dtype: float64,0 19.5 dtype: float64,0 0.0 dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 4518.352051 dtype: float64
3,211214,H2YKTBGXK,Read 1,0 4 dtype: int64,432,0 316 dtype: int64,0 63.23 dtype: float64,0 0.339912 dtype: float64,0 205.01 dtype: float64,0 129.55 dtype: float64,0 75.459961 dtype: float64,0 19.4 dtype: float64,0 0.0 dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 4468.870605 dtype: float64
4,211214,H2YKTBGXK,Read (I)2,0 1 dtype: int64,432,0 319 dtype: int64,0 64.35 dtype: float64,0 NaN dtype: float64,0 207.01 dtype: float64,0 133.13 dtype: float64,0 88.97049 dtype: float64,0 0.7 dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 5611.912109 dtype: float64
5,211214,H2YKTBGXK,Read (I)2,0 2 dtype: int64,432,0 313 dtype: int64,0 65.45 dtype: float64,0 NaN dtype: float64,0 203.18 dtype: float64,0 132.89 dtype: float64,0 89.150581 dtype: float64,0 0.7 dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 5354.486328 dtype: float64
6,211214,H2YKTBGXK,Read (I)2,0 3 dtype: int64,432,0 317 dtype: int64,0 63.42 dtype: float64,0 NaN dtype: float64,0 205.56 dtype: float64,0 130.28 dtype: float64,0 88.818657 dtype: float64,0 0.6 dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 4347.203613 dtype: float64
7,211214,H2YKTBGXK,Read (I)2,0 4 dtype: int64,432,0 316 dtype: int64,0 63.23 dtype: float64,0 NaN dtype: float64,0 205.01 dtype: float64,0 129.55 dtype: float64,0 88.341454 dtype: float64,0 0.6 dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 4187.402832 dtype: float64
8,211214,H2YKTBGXK,Read 3,0 1 dtype: int64,432,0 319 dtype: int64,0 64.35 dtype: float64,0 0.316656 dtype: float64,0 207.01 dtype: float64,0 133.13 dtype: float64,0 75.358383 dtype: float64,0 19.9 dtype: float64,0 0.0 dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 5408.852051 dtype: float64
9,211214,H2YKTBGXK,Read 3,0 2 dtype: int64,432,0 313 dtype: int64,0 65.45 dtype: float64,0 0.30762 dtype: float64,0 203.18 dtype: float64,0 132.89 dtype: float64,0 76.447197 dtype: float64,0 19.9 dtype: float64,0 0.0 dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 NaN dtype: float64,0 5262.824219 dtype: float64


In [None]:

sum(test['%>=Q30'])/len(test['%>=Q30'])

In [12]:
from interop import py_interop_run_metrics, py_interop_run, py_interop_summary
run_metrics = py_interop_run_metrics.run_metrics()
run_metrics.read(folder)

# Testing: Summary Data

In [4]:
# interop summary
input = folder

return_info = {}

run_metrics = py_interop_run_metrics.run_metrics()
summary = py_interop_summary.run_summary()
valid_to_load = py_interop_run.uchar_vector(py_interop_run.MetricCount, 0)
py_interop_run_metrics.list_summary_metrics_to_load(valid_to_load)
run_folder_path = input

try:
    run_metrics.read(run_folder_path, valid_to_load)
except Exception as ex:
    print("Skipping - cannot read RunInfo.xml: %s - %s" % (run_folder_path, str(ex)))
py_interop_summary.summarize_run_metrics(run_metrics, summary)

# print(summary.size(), summary.lane_count(), summary.surface_count())

columns = (
    ('Yield Total (G)', "yield_g"), ('Projected Yield (G)', "projected_yield_g"), ("Error Rate", "error_rate"),
    ('% Aligned', "percent_aligned"), ("Intensity Cycle 1", "first_cycle_intensity"), ("%>=Q30", "percent_gt_q30"))
read_lengthrows = [
    ("Read %s%d" % ("(I)" if summary.at(i).read().is_index() else " ", summary.at(i).read().number()),
     summary.at(i).summary()) for i in range(summary.size())]

read_title = [i[0] for i in read_lengthrows]
summary_title = pd.DataFrame({"Run Summary": []})

# error_rate first_cycle_intensity percent_aligned percent_gt_q30 projected_yield_g yield_g
rows = [('Total', summary.total_summary())]
d = []

for label, func in columns:
    d.append((label, pd.Series([getattr(r[1], func)() for r in rows], index=[r[0] for r in rows])))
rows = [("Read %s%d" % ("(I)" if summary.at(i).read().is_index() else " ", summary.at(i).read().number()),
         summary.at(i).summary()) for i in range(summary.size())]

d_read = [] 
for label, func in columns:
    d_read.append((label, pd.Series([getattr(r[1], func)() for r in rows], index=[r[0] for r in rows])))
columns_list = ['Yield Total (G)', 'Projected Total Yield (G)', '% Aligned', "Error Rate",
                "Intensity Cycle 1", "%>=Q30"]

df_read = pd.DataFrame.from_dict(d_read)
df = pd.DataFrame.from_dict(d)  # total
d_read.append(d)  
return_summary_data = []


for index in range(len(read_title)):
    tmp = pd.DataFrame()
    for index2 in range(len(df_read.values)):      
        tmp["Level"] = read_title[index]
        tmp[columns_list[index2]] = df_read.values[index2][index]
    return_summary_data.append(tmp)
tmp = pd.DataFrame()
tmp["Level"] = "Total"

result = pd.concat([df_read, df], ignore_index=True)
for index in range(len(df.values)):
    tmp[columns_list[index]] = df.values[index][1]
return_summary_data.append(tmp)



In [5]:
header = np.transpose(df[0])
start = 0 
end = len(header)
df2 = pd.DataFrame(columns=header)

for lane_id in range(len(read_title)):
    tmp = np.transpose(result[start:end])
    tmp.columns = header
    tmp = tmp.iloc[-1]
    
    tmp = np.transpose(pd.DataFrame(tmp))
    
    df2 = pd.concat([df2, tmp], ignore_index=True)
    start = start + len(header)
    end = end + len(header)

# Testing: RIF Data

```
  <RfidsInfo>
    <FlowCellSerialBarcode>HJCN2DSX3</FlowCellSerialBarcode>
    <FlowCellPartNumber>20015843</FlowCellPartNumber>
    <FlowCellLotNumber>20620168</FlowCellLotNumber>
    <FlowCellExpirationdate>01/22/2023 00:00:00</FlowCellExpirationdate>
    <FlowCellStartDate>06/01/2022 14:00:00</FlowCellStartDate>
    <FlowCellNumberOfReuseRemaining>1</FlowCellNumberOfReuseRemaining>
    <FlowCellSupportedModes>HTWashOnly;S4</FlowCellSupportedModes>
    <FlowCellMode>S4</FlowCellMode>
    <FlowCellConsumableVersion>1</FlowCellConsumableVersion>
    <FlowCellRssi>2</FlowCellRssi>
    <LibraryTubeSerialBarcode>NV0532777-LIB</LibraryTubeSerialBarcode>
    <LibraryTubeSupportedModes>Universal</LibraryTubeSupportedModes>
    <LibraryTubePartNumber>20005221</LibraryTubePartNumber>
    <LibraryTubeLotNumber>1000011889</LibraryTubeLotNumber>
    <LibraryTubeExpirationdate>12/31/2169 00:00:00</LibraryTubeExpirationdate>
    <LibraryTubeStartDate>06/01/2022 14:00:00</LibraryTubeStartDate>
    <LibraryTubeRssi>4</LibraryTubeRssi>
    <SbsSerialBarcode>NV3855808-RGSBS</SbsSerialBarcode>
    <SbsSupportedModes>S4</SbsSupportedModes>
    <SbsPartNumber>20031051</SbsPartNumber>
    <SbsLotNumber>20613243</SbsLotNumber>
    <SbsExpirationdate>01/03/2023 00:00:00</SbsExpirationdate>
```

In [259]:
# RIF data

%%bash
folder=/ghds/ivd/raw/220601_A00770_0503_AHJCN2DSX3/
run_param=$(echo $folder"RunParameters.xml")
grep FlowCellExpirationdate $run_param | awk -F '>|<| ' '{print $7}'
grep FlowCellMode $run_param | awk -F '>|<' '{print $3}'

01/22/2023
S4


# Testing: Report

In [None]:
def generate_report():
    
    # Version Number
    if os.path.exists(f"{SCRIPT_DIR}/../VERSION.txt"):
        with open(f"{SCRIPT_DIR}/../VERSION.txt", "r") as file:
            result = file.readline().strip().split(" ")
            GIT_VERSION = result[0]
            if len(result) == 2:
                VERSION = result[1]
            else:
                VERSION = ''
    else:
        # EXTRACT MOST RECENT GIT COMMIT HASH FOR SCRIPT
        result = subprocess.run(["git", "rev-parse", "--short", "HEAD"], stdout=subprocess.PIPE)
        GIT_VERSION = result.stdout.decode("utf-8").strip()

        # EXRACT GIT TAG (SCRIPT VERSION) FOR MOST RECENT GIT COMMIT HASH
        result = subprocess.run(["git", "tag", "--list", "--contains", GIT_VERSION], stdout=subprocess.PIPE)
        tag = result.stdout.decode("utf-8").strip().split("/")[-1]
        result = subprocess.run(["git", "rev-list", "--count", "HEAD"], stdout=subprocess.PIPE)
        n_builds = result.stdout.decode("utf-8").strip()
        VERSION = tag + "-" + n_builds + "-" + GIT_VERSION

    now = datetime.now()
    TODAY_STRING = now.strftime("%m/%d/%Y")

    
    # ---Build Results Dataframes---#
    qc_report = pd.DataFrame(
        {"Metric": ["Yield Total (G)", "Q30", "Overall Qualification"],
         "Outcome": "NA"
         })

    # ---RIFID---#
    rif_df = pd.DataFrame({"Lot": lot,
                           "Metric": ["Cluster Density", "Cluster Passing Filter"],
                           "Threshold": [f"{cd_min_op}{cd_min}K/mm^2", f"{pf_op}{pf_cutoff}%"],
                           "Value": [f"{r_dict['seq_cd']}", f"{r_dict['seq_cf']}"],
                           "Outcome": "NA"})

    seq_df.loc[0, 'Outcome'] = "Pass" if cd_op(int(r_dict['seq_cd']), int(cd_min)) else "Fail"
    seq_df.loc[1, 'Outcome'] = "Pass" if p_op(int(r_dict['seq_cf']), int(pf_cutoff)) else "Fail"
    seq_pass = seq_df['Outcome'] == 'Pass'
    
    qc_report.loc[0]['Outcome'] = "Pass" if seq_pass.all() else "Fail"

    
    # ---OVERALL METRICS---#
    # Overall Qualification eioqc_passfail
    overall_pass = qc_report.loc[:3]['Outcome'] == 'Pass'
    qc_report.loc[4]['Outcome'] = 'Pass' if overall_pass.all() else 'Fail'

    header = f"Script name:,{SCRIPT_NAME}\n" \
             f"Script version:,{VERSION}\n" \
             f"Script github commit version:,{str(GIT_VERSION)}\n" \
             f"Date of analysis:,{TODAY_STRING}\n" \
             f"Flowcell ID:, {fcid}\n" \
             f"Flowcell Mode:, {fcid_mode}\n" \
             f"Flowcell filepath:,{fcid_path}\n" \
             f"Output filepath:,{output_file}\n\n" \
             f"Summary of qualification status:\n"
    footer = f"\nPerformed by:\nPerformed date:\nVerified by:\nVerified date:\n"

    # ---PRINT OUTPUTS---#
    with open(output_file, 'w') as fout:
        fout.write(header)
    qc_report.to_csv(output_file, sep=",", mode="a", index=False)

    # ---SEQUENCING OUTPUT---#
    with open(output_file, "a") as fout:
        fout.write('\nSequencing metric:\n')
    seq_df.to_csv(output_file, sep=",", mode="a", index=False)

    with open(output_file, "a") as fout:
        fout.write(footer)

    output_file = output_file.replace("^/ghess/", "/ghds/")
    output_csv = f"{url}{output_file}"
    print(f"\nOutput URL: {url}{output_path}")
    print(f"Report Output CSV: {output_csv}")

    return (0)