<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Setup" data-toc-modified-id="Setup-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Setup</a></span></li><li><span><a href="#Get-Well-Data-from-the-Database" data-toc-modified-id="Get-Well-Data-from-the-Database-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Get Well Data from the Database</a></span></li><li><span><a href="#Get-a-List-of-Plates" data-toc-modified-id="Get-a-List-of-Plates-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Get a List of Plates</a></span></li><li><span><a href="#Get-Wells-for-Each-Plate" data-toc-modified-id="Get-Wells-for-Each-Plate-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Get Wells for Each Plate</a></span></li><li><span><a href="#Get-Samples-Missing-from-the-Taxonomy" data-toc-modified-id="Get-Samples-Missing-from-the-Taxonomy-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Get Samples Missing from the Taxonomy</a></span></li><li><span><a href="#Get-Family-and-Genus-Coverage" data-toc-modified-id="Get-Family-and-Genus-Coverage-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Get Family and Genus Coverage</a></span></li><li><span><a href="#Generate-Report" data-toc-modified-id="Generate-Report-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Generate Report</a></span></li></ul></div>

# Setup

In [1]:
from pathlib import Path
from datetime import datetime

import pandas as pd
from jinja2 import Environment, FileSystemLoader

import lib.db as db
import lib.util as util

In [2]:
CXN = db.connect()

NOW = datetime.now()

OUTPUT_DIR = Path('..') / 'output'
TEMPLATE_DIR = str(Path('.') / 'reports')

REPORT_NAME = f'sample_plates_report_{NOW.strftime("%Y-%m-%d")}.html'
REPORT_TEMPLATE = 'sample_plates_report.html'
REPORT_PATH = OUTPUT_DIR / REPORT_NAME

# Get Well Data from the Database

In [3]:
def get_wells(cxn):
    sql = """
        SELECT wells.*, picogreen_id, scientific_name, ng_microliter_mean
          FROM wells
          JOIN taxon_ids ON (wells.sample_id = taxon_ids.id)
     LEFT JOIN picogreen USING (sample_id)
        ORDER BY local_no, row, col
    """
    wells = pd.read_sql(sql, cxn)
    return wells

# Get a List of Plates

In [4]:
def get_plates(wells):
    columns = ['plate_id', 'entry_date', 'local_id', 'protocol', 'notes']
    plates = wells.loc[:, columns]
    plates = plates.drop_duplicates()
    return plates

# Get Wells for Each Plate

In [5]:
def get_plate_wells(wells):
    plate_wells = {}
    for group, plate in wells.groupby('local_no'):
        plate_id = plate['plate_id'].iloc[0]
        plate_wells[plate_id] = plate.fillna('').to_dict(orient='records')
    return plate_wells

# Get Samples Missing from the Taxonomy

In [6]:
def get_missing(cxn, wells):
    taxon_ids = pd.read_sql('SELECT * FROM taxon_ids', cxn)
    in_ids = wells.sample_id.isin(taxon_ids.id)
    missing = wells[~in_ids]
    return missing

# Get Family and Genus Coverage

In [7]:
def get_genus_coverage(cxn):
    taxonomy = pd.read_sql('SELECT * FROM taxonomy', cxn)
    taxonomy.rename(
        columns={'scientific_name': 'total', 'file_names': 'imaged'},
        inplace=True)
    taxonomy = taxonomy[['family', 'genus', 'total', 'imaged']]

    genera = taxonomy.groupby(['family', 'genus']).count()

    taxonomy['genus'] = ''
    families = taxonomy.groupby(['family', 'genus']).count()

    taxonomy['family'] = '~Total~'
    total = taxonomy.groupby(['family', 'genus']).count()

    coverage = pd.concat([families, genera, total])
    coverage['family'] = coverage.index.get_level_values('family')
    coverage['genus'] = coverage.index.get_level_values('genus')
    coverage['percent'] = coverage['imaged'] / coverage['total'] * 100.0

    coverage.sort_values(['family', 'genus'], inplace=True)
    return coverage

# Generate Report

In [8]:
def print_report(cxn):
    env = Environment(loader=FileSystemLoader(TEMPLATE_DIR))
    template = env.get_template(REPORT_TEMPLATE)

    wells = get_wells(cxn)

    report = template.render(
        now=NOW,
        wells=get_plate_wells(wells),
        plates=get_plates(wells).to_dict(orient='records'),
        genera=get_genus_coverage(cxn).to_dict(orient='records'),
        missing=get_missing(cxn, wells).to_dict(orient='records'))

    with open(REPORT_PATH, 'w') as out_file:
        out_file.write(report)

In [9]:
print_report(CXN)