In [28]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# import catheat
import openpyxl as xl
from openpyxl.utils.dataframe import dataframe_to_rows
import logging

In [29]:
def is_number(s):
    try:
        float(s)
        return True
    except ValueError:
        pass

In [30]:
mastersheetCats = ['Tag', 'Cage', 'Ear', 'Sex', 'Color', 'Genotype', 'DOB','DOD', 'Father', 'Mother', 'Lineage', 'PlatePos']

In [31]:
def parseFrame(fn):
    df = pd.read_excel(fn, dtype={'Column': str, 'Tag': str}).dropna(how='all') # drop empty columns and typecast things that would otherwise be read as numbers
    if 'Strain' in df.columns: #not really necessary anymore
        df = df.rename(columns={'Strain': 'Lineage'})
    df.Column = df.Column.str.zfill(2)
    df.Lineage = df.Lineage.fillna("--")
    df[['Father', 'Mother']] = df.Parents.str.split("x", expand=True)
    df['TagClean'] = df.Tag.apply(lambda x: "t" + str(x)
                                  if is_number(x) else x)
    df['PlatePos'] = [
        "{}-{}{}".format(plate, row, col)
        for plate, row, col in zip(df.Plate, df.Row, df.Column)
    ]
    df['Info'] = [
        '\n'.join([str(x), str(y)]) for x, y in zip(df['Tag'], df['Lineage'])
    ]
    locs = pd.read_pickle('96-wellLocs.pkl').sort_values(
        ['Column',
         'Row']).reset_index(drop=True)  # impt standard plate indexing
    df = locs.merge(df, how='outer')
#     logging.log(msg = "Returned parsed sample sheet", level = 0)
    return df

In [32]:
# def makeExcel(fn, wb, df):
#     '''Takes the filename for naming the sheet and a pyxl workbook'''
#     sheetname = fn.rstrip('.xlsx') + '_CleanLayout'
#     wb = xl.load_workbook(fn)
#     ws = wb.create_sheet(sheetname)    
#     layout = df.pivot(index='Row', columns='Column',
#                       values=['Info']).fillna("empty")
#     rows = dataframe_to_rows(layout['Info'],
#                              index=True,
#                              header=True)
#     for r in rows:
#         ws.append(r)
#     for cell in ws['A'] + ws[1]:
#         cell.style = 'Pandas'
#     wb.save(fn)
#     return fn

In [33]:
def makeCleanLayout(fn, wb, df):
    sheetname = fn.rstrip('.xlsx') + '_CleanLayout'
    ws = wb.create_sheet(sheetname)    
    layout = df.pivot(index='Row', columns='Column',
                      values=['Info']).fillna("empty")
    rows = dataframe_to_rows(layout['Info'],
                             index=True,
                             header=True)
    for r in rows:
        ws.append(r)
    for cell in ws['A'] + ws[1]:
        cell.style = 'Pandas'
#     logging.log(msg = "Returning clean layout")
    return wb

In [34]:
def makeCleanSampleList(fn, wb, df):
    sheetname = fn.rstrip('.xlsx') + "_CleanSampleList"
    ws = wb.create_sheet(sheetname)
    cleanSamples = df[['Plate', 'Row', 'Column', 'TagClean', 'Lineage']]
    rows = dataframe_to_rows(cleanSamples,
                             index=False,
                             header=True)
    for r in rows:
        ws.append(r)
    return wb

In [35]:
def makeCleanMasterSheet(fn, wb, df, cats=mastersheetCats):
    sheetname = fn.rstrip('.xlsx') + "_Mastersheet"
    ws = wb.create_sheet(sheetname)
    master = df[cats].fillna("")
    rows = dataframe_to_rows(master,
                             index=False,
                             header=True)
    for r in rows:
        ws.append(r)
    return wb


In [36]:
def addHeaders(wb, operator = "KL"):
    for sheet in wb.sheetnames:
        wb[sheet].oddHeader.left.text = "&[Tab] \
                                            KL - &[Date]"
    return wb

In [37]:
def process(fn, wb, df):
    wb = makeCleanLayout(fn, wb, df)
    wb = makeCleanSampleList(fn, wb, df)
    wb = makeCleanMasterSheet(fn, wb, df)
    wb = addHeaders(wb)
    logging.log("Saving file")
    wb.save(fn)
    return wb

In [38]:
fn = 'T1129040_20-12-03.xlsx'

In [39]:
wb = xl.load_workbook(fn)
df = parseFrame(fn)
# fn = "testoutput.xlsx"

In [40]:
wb = makeCleanLayout(fn, wb, df)
wb = makeCleanSampleList(fn, wb, df)
wb = makeCleanMasterSheet(fn, wb, df)
wb = addHeaders(wb)




In [41]:
wb.save(fn)

In [26]:
for fn in fns:
    wb = xl.load_workbook(fn)
    df = parseFrame(fn)
    wb = makeCleanLayout(fn, wb, df)
    wb = makeCleanSampleList(fn, wb, df)
    wb = makeCleanMasterSheet(fn, wb, df)
    wb.save(fn)