# coproID report

In [None]:
import datetime
with open("version.txt", "r") as f:
    for line in f:
        print(f"Version {line.rstrip()}")
print(f"Report generated on {datetime.datetime.now()}")

[![](coproid_logo.png)](https://github.com/nf-core/coproID)

## Introduction
[coproID](https://github.com/nf-core/coproID) is a pipeline to identify the source of coprolites, and in general, of a metagenomic sample.

If you read these lines, coproID successfully finished running and you can find your results below.  
You can find more informations about the different result files in the coproID documentation: [coproid.readthedocs.io](https://coproid.readthedocs.io/en/latest/output.html)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from IPython.display import display, Markdown, Latex, HTML
import base64
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource
from bokeh.transform import factor_cmap
from bokeh.palettes import Set1
from bokeh.models.tools import HoverTool
from bokeh.layouts import widgetbox
from bokeh.models.widgets import Button
from bokeh.models.widgets import DataTable, DateFormatter, TableColumn
from bokeh.models import CustomJS
import ipywidgets as widgets
from plotnine import *
import warnings
warnings.simplefilter('ignore')

In [None]:
def coproid_summary_plot(df):
    df = pd.read_csv(df, index_col=0)
    organisms = [i.replace("normalized_bp_proportion_aligned_","") for i in list(df.columns) if "normalized_bp_proportion_aligned_" in i]
    organisms_clean = [i.replace("_"," ") for i in organisms]
    if len(organisms_clean) < 3:
        display(Markdown("### coproID summary plot"))
        species_text = pd.DataFrame()
        species_text['x'] = [0.25, 0.75, 0.75, 0.25]
        species_text['y'] = [0.25, 0.25, 0.75, 0.75]
        species_text['text'] = ['Unknown', organisms_clean[0], 'Unknown', organisms_clean[1]]
    
        df['samp_name'] = df.index
        df['coproID_prediction'] = ['Unknown'] * df.shape[0]
        df['coproID_prediction'] = np.where(df[f"coproID_proba_{organisms[0]}"] > 0.5, organisms_clean[0], df['coproID_prediction'])
        df['coproID_prediction'] = np.where(df[f"coproID_proba_{organisms[1]}"] > 0.5, organisms_clean[1], df['coproID_prediction'])
    
        p = ggplot(df, aes(x = f"coproID_proba_{organisms[0]}",y = f"coproID_proba_{organisms[1]}"))
        p = p + geom_point(aes(color='coproID_prediction'), size=2)
        p = p + geom_label(aes(label="samp_name", color='coproID_prediction'), size=8, nudge_x = 0.02, ha='left', va='bottom')
        p = p + theme_classic() + labs(x=f"coproID proba {organisms_clean[0]}",y = f"coproID proba {organisms_clean[1]}")
        p = p + geom_text(data=species_text, mapping=aes(x='x',y='y', label='text'), alpha=0.3, color='grey')
        p = p + geom_hline(yintercept=0.5, linetype='dashed', alpha=0.1) 
        p = p + geom_vline(xintercept=0.5, linetype='dashed', alpha=0.1)
        p = p + scale_color_manual(name='Predicted Organism', values = {organisms_clean[0]:'#ef7576', organisms_clean[1]:'#c194c8', 'Unknown':'#a2a3a1'})
        p = p + coord_cartesian(xlim=[0,1],ylim=[0,1])
        p.draw()
        return(organisms)
    else:
        return

In [None]:
def plot_bokeh(df, organisms = []):
    d = pd.read_csv(df, index_col = 0).fillna('sink')
    d = d.rename(columns={'PC1':'DIM1', 'PC2':'DIM2'})
    
    orga_in_endo = [i for i in list(sorted(set(d['labels']))) if i in organisms]
    orga_not_in_endo = [i for i in list(sorted(set(d['labels']))) if i not in organisms]
    colors = {k:v for k, v in zip(orga_in_endo, Set1[9][0:2]) if k in organisms}
    cnt=2
    for i in orga_not_in_endo:
        colors[i] = Set1[9][cnt]
        cnt +=1
        
    d['colors'] = [colors[i] for i in list(d['labels'])]

    TOOLS="pan,wheel_zoom,zoom_in,zoom_out,box_zoom,reset,save"
    source = ColumnDataSource(d)
#     labels = list(set(list(set(source.data['labels'])).append(organisms)))
#     colors = ["#e41a1c","#377eb8","#4daf4a","#984ea3","#ff7f00",""]
#     color_map = factor_cmap(field_name='labels', palette=colors, factors=labels)
    p = figure(tools=TOOLS)
    p.scatter(x = 'DIM1', y='DIM2', color='colors', alpha = 0.6, size = 6, legend='labels', source=d)
    hover = HoverTool()
    hover.tooltips = [("Organism", "@labels"),('Sample',"@name")]
    p.add_tools(hover)
    output_notebook(hide_banner=True)
    show(p)

In [None]:
def bokeh_table(df):
    
    d = pd.read_csv(df, index_col=0)
    d = d.rename(columns={'PC1':'DIM1', 'PC2':'DIM2'})
    d.insert(0, "sample", d.index)
    source = ColumnDataSource(d)

    Columns = [TableColumn(field=Ci, title=Ci) for Ci in d.columns] # bokeh columns
    data_table = DataTable(columns=Columns, source=ColumnDataSource(d)) # bokeh table

    button = Button(label="Download", button_type="success")

    javaScript="""
        function table_to_csv(source) {
        const columns = Object.keys(source.data)
        const nrows = source.get_length()
        const lines = [columns.join(',')]

        for (let i = 0; i < nrows; i++) {
            let row = [];
            for (let j = 0; j < columns.length; j++) {
                const column = columns[j]
                row.push(source.data[column][i].toString())
            }
            lines.push(row.join(','))
        }
        return lines.join('\\n').concat('\\n')
    }


    const filename = 'coproID_result.csv'
    filetext = table_to_csv(source)
    const blob = new Blob([filetext], { type: 'text/csv;charset=utf-8;' })

    //addresses IE
    if (navigator.msSaveBlob) {
        navigator.msSaveBlob(blob, filename)
    } else {
        const link = document.createElement('a')
        link.href = URL.createObjectURL(blob)
        link.download = filename
        link.target = '_blank'
        link.style.visibility = 'hidden'
        link.dispatchEvent(new MouseEvent('click'))
    }
    """
    button.js_event_callbacks = CustomJS(args=dict(source=source),code=javaScript)
    output_notebook(hide_banner=True)
    show(button)

In [None]:
umap = "sourcepredict_embedding.csv"

## coproID summary
### Table

In [None]:
d = "coproID_result.csv"
df = pd.read_csv(d, index_col=0)
df

In [None]:
def create_download_link(df=df, title = "Download table as CSV", filename = "coproid_result.csv"):  
    csv = df.to_csv(index =False)
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)

In [None]:
create_download_link(df)


In [None]:
orga = coproid_summary_plot(d)

## Microbiome profile embedding

In [None]:
plot_bokeh(umap, organisms=orga)

## Damage plots

In [None]:

allplots = os.listdir(".")
allplots = [i for i in allplots if "_freq.txt" in i]
fwd_ext = ".5pCtoT_freq.txt"
rev_ext = ".3pGtoA_freq.txt"
samples = list(set([i.split("_otu_")[0] for i in allplots]))
species = list(set(["_".join(".".join(i.split(".")[0:-2]).split("_otu_")[1:]) for i in allplots]))

In [None]:
def plot_damage(fwd, rev, sample, organism):
    f = pd.read_csv(fwd, skiprows=3, delimiter="\t")
    r =  pd.read_csv(rev, skiprows=3, delimiter="\t")
    r['pos'] = list(r['pos']*-1)[::-1]
    fig = plt.figure(figsize=(18,3))
    fig.suptitle(f"Sample: {sample} - Species: {organism.replace('_',' ')}", fontsize="x-large", fontweight='bold', y = 1.2)
    display(Markdown(f"- **Sample**: {sample} - **Species**: *{organism.replace('_',' ')}*"))
    plt.title('Test')
    plt.subplot(1, 2, 1)
    plt.plot(f['pos'],f['5pC>T'])
    plt.title('5pC>T')
    plt.ylabel('Frequency')
    plt.xticks(f['pos'])

    plt.subplot(1, 2, 2)
    plt.plot(r['pos'][::-1],r['3pG>A'], color = 'red')
    plt.title('3pG>A')
    plt.ylabel('Frequency')
    plt.xticks(r['pos'])
    plt.show()
    

In [None]:
for sa in samples:
    for sp in species:
        try:
            plot_damage(f"{sa}_otu_{sp}{fwd_ext}", f"{sa}_otu_{sp}{rev_ext}", sa, sp)
        except FileNotFoundError:
            print(f"{sa}_{sp}{fwd_ext}")
            continue