<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Setup" data-toc-modified-id="Setup-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Setup</a></span></li><li><span><a href="#Get-the-Sample-Plates-from-the-Google-Sheet" data-toc-modified-id="Get-the-Sample-Plates-from-the-Google-Sheet-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Get the Sample Plates from the Google Sheet</a></span></li><li><span><a href="#Get-the-Picogreen-Data-from-the-Google-Sheet" data-toc-modified-id="Get-the-Picogreen-Data-from-the-Google-Sheet-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Get the Picogreen Data from the Google Sheet</a></span></li><li><span><a href="#Output-the-Sample-Data" data-toc-modified-id="Output-the-Sample-Data-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Output the Sample Data</a></span></li></ul></div>

# Setup

In [1]:
from pathlib import Path

import pandas as pd

import lib.db as db
import lib.util as util
import lib.google as google

In [2]:
INTERIM_DATA = Path('..') / 'data' / 'interim'
PROCESSED_DATA = Path('..') / 'data' / 'processed'

# Get the Sample Plates from the Google Sheet

Get the entered data from the sample_plates Google sheet.

There is a fixed format to the plates:
```
                        Plate column 1  ...     Plate column 12
plate_id:UUID
entry_date:ISO_Date
local_id:Text
protocol:Text
notes:Text
results:Text
Plate row A                UUID?          ...     UUID?
    .                        .            ...       .
    .                        .            ...       .
    .                        .            ...       .
Plate row H                UUID?          ...     UUID?
```

In [3]:
csv_name = 'sample_plates.csv'

csv_path = INTERIM_DATA / csv_name
step = 14

with open(csv_path, 'wb') as csv_out:
    google.export_sheet_csv('sample_plates', csv_out)

sample_plates = pd.read_csv(csv_path)

has_data = sample_plates['Plate ID'].notna()
sample_plates = sample_plates[has_data]
sample_plates.reset_index(drop=True, inplace=True)

# Get all of the per plate information into a data frame
plates = []
for i in range(6):
    plate = sample_plates.iloc[i::step, [0]]
    plate.reset_index(drop=True, inplace=True)
    plates.append(plate)

plates = pd.concat(plates, axis=1, ignore_index=True)

# Append per well information with the per plate information for each well
row_start = 6
rows = 'ABCDEFGH'
wells = []
for row in range(row_start, row_start + len(rows)):
    for col in range(1, 13):
        well = pd.DataFrame(sample_plates.iloc[row::step, col])
        well.reset_index(drop=True, inplace=True)
        row_offset = row - row_start
        well['row'] = rows[row_offset:row_offset + 1]
        well['col'] = col
        well = pd.concat([plates, well], axis=1, ignore_index=True)
        wells.append(well)

wells = pd.concat(wells, axis=0, ignore_index=True)
wells.rename(
    columns={
        0: 'plate_id',
        1: 'entry_date',
        2: 'local_id',
        3: 'protocol',
        4: 'notes',
        5: 'results',
        6: 'sample_id',
        7: 'row',
        8: 'col'},
    inplace=True)
wells['well_no'] = wells.apply(
    lambda well: 'ABCDEFGH'.find(well.row.upper()) * 12 + well.col, axis=1)
wells['local_no'] = wells.local_id.str.replace(r'\D+', '').astype('int')
wells['well'] = wells.apply(
    lambda well: well.row + f'{well.col:02d}', axis=1)

wells.head()

Unnamed: 0,plate_id,entry_date,local_id,protocol,notes,results,sample_id,row,col,well_no,local_no,well
0,00f5f483-3657-40de-8aad-7639c6b8e74a,2018-01-11,Local identifier: NITFIX_1,Protocol: Protocol_NitFix_1,"Notes: OSU SAMPLES. Failed grinding,low yield,...",Quantification NA,x,A,1,1,1,A01
1,02b81f81-0fc3-45de-9ad4-0d85eb8d5c94,2018-01-17,Local identifier: NITFIX_2,Protocol,"Notes: OSU SAMPLES. Tube and cap failure, samp...",Quantification NA,x,A,1,1,2,A01
2,031fc196-3587-477d-8bd2-4a9f5167be4d,2018-01-18,Local identifier:NITFIX_3,Protocol,Notes: OSU SAMPLES,Quantification 3/5,ade73b3b-79de-407d-b9d2-6c4f850309bc,A,1,1,3,A01
3,037a4923-94f1-4134-b6dc-b36478e37bcc,2018-01-19,Local identifier: NITFIX_4,Protocol,"Notes:Contamination, Samples Discarded",Quantification NA,x,A,1,1,4,A01
4,04a4aca9-a339-40f6-b2f0-047b1513e4de,2018-01-23,Local identifier: NITFIX_5,Protocol,Notes: OSU SAMPLES.,Quantification 3/5,b5bc9a61-1be8-4d9c-9722-6ebe5fa0f244,A,1,1,5,A01


# Get the Picogreen Data from the Google Sheet

In [4]:
csv_path = INTERIM_DATA / 'picogreen.csv'

with open(csv_path, 'wb') as csv_out:
    google.export_sheet_csv('picogreen_2_14_2_15', csv_out)

picogreen = pd.read_csv(
    csv_path,
    header=0,
    names=[
        'picogreen_id', 'well', 'rfu', 'ng_microliter', 'ng_microliter_mean',
        'quant_method', 'quant_date', 'sample_id'])

picogreen.head()

Unnamed: 0,picogreen_id,well,rfu,ng_microliter,ng_microliter_mean,quant_method,quant_date,sample_id
0,13_01,A1,195.286,9.645493,10.189264,picogreen,2_15_18,c84c6871-887f-479e-bf1e-ff1c68b1c490
1,,B1,217.399,10.733035,,picogreen,2_15_18,
2,13_02,A2,1149.629,56.581164,65.824389,picogreen,2_15_18,e58072c6-ce0f-4029-9246-9756c391d944
3,,B2,1525.514,75.067614,,picogreen,2_15_18,
4,13_03,A3,337.331,16.631427,14.516884,picogreen,2_15_18,d07a86d9-dab9-45c9-a547-36b15ccc1dd7


# Output the Sample Data

In [5]:
CXN = db.connect()

In [6]:
name = 'wells'
wells.to_sql(name, CXN, if_exists='replace', index=False)
wells.to_csv(PROCESSED_DATA / f'{name}.csv', index=False)

In [7]:
name = 'picogreen'
picogreen.to_sql(name, CXN, if_exists='replace', index=False)
picogreen.to_csv(PROCESSED_DATA / f'{name}.csv', index=False)