In [31]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# import catheat
import openpyxl as xl
from openpyxl.utils.dataframe import dataframe_to_rows
import logging

In [32]:
# fn =
# fn = T979199.xlsx
# fn = 'T962919_2020-01-23.xlsx'
# fn = 'T1004640_2-14-20.xlsx'
# fn = 'T965----05_2020-02-28.xlsx'
# fn = '20200323_plateT965523.xlsx'

In [33]:
mastersheetCats = ['Tag', 'Cage', 'Ear', 'Sex', 'Color', 'Genotype', 'DOB','DOD', 'Father', 'Mother', 'Lineage', 'PlatePos']

In [34]:
def parseFrame(fn):
    df = pd.read_excel(fn, dtype={'Column': str, 'Tag': str}).dropna(how='all')
    if 'Strain' in df.columns:
        df = df.rename(columns={'Strain': 'Lineage'})
    df.Column = df.Column.str.zfill(2)
    df.Lineage = df.Lineage.fillna("--")
    df[['Father', 'Mother']] = df.Parents.str.split("x", expand=True)
    df['TagClean'] = df.Tag.apply(lambda x: "t" + str(x)
                                  if x.isnumeric() else x)
    df['PlatePos'] = [
        "{}-{}{}".format(plate, row, col)
        for plate, row, col in zip(df.Plate, df.Row, df.Column)
    ]
    df['Info'] = [
        '\n'.join([str(x), str(y)]) for x, y in zip(df['Tag'], df['Lineage'])
    ]
    locs = pd.read_pickle('96-wellLocs.pkl').sort_values(
        ['Column',
         'Row']).reset_index(drop=True)  # impt standard plate indexing
    df = locs.merge(df, how='outer')
    return df

In [35]:
# def makeExcel(fn, wb, df):
#     '''Takes the filename for naming the sheet and a pyxl workbook'''
#     sheetname = fn.rstrip('.xlsx') + '_CleanLayout'
#     wb = xl.load_workbook(fn)
#     ws = wb.create_sheet(sheetname)    
#     layout = df.pivot(index='Row', columns='Column',
#                       values=['Info']).fillna("empty")
#     rows = dataframe_to_rows(layout['Info'],
#                              index=True,
#                              header=True)
#     for r in rows:
#         ws.append(r)
#     for cell in ws['A'] + ws[1]:
#         cell.style = 'Pandas'
#     wb.save(fn)
#     return fn

In [36]:
def makeCleanLayout(fn, wb, df):
    sheetname = fn.rstrip('.xlsx') + '_CleanLayout'
    ws = wb.create_sheet(sheetname)    
    layout = df.pivot(index='Row', columns='Column',
                      values=['Info']).fillna("empty")
    rows = dataframe_to_rows(layout['Info'],
                             index=True,
                             header=True)
    for r in rows:
        ws.append(r)
    for cell in ws['A'] + ws[1]:
        cell.style = 'Pandas'
    return wb

In [37]:
def makeCleanSampleList(fn, wb, df):
    sheetname = fn.rstrip('.xlsx') + "_CleanSampleList"
    ws = wb.create_sheet(sheetname)
    cleanSamples = df[['Plate', 'Row', 'Column', 'TagClean', 'Lineage']]
    rows = dataframe_to_rows(cleanSamples,
                             index=False,
                             header=True)
    for r in rows:
        ws.append(r)
    return wb

In [38]:
def makeCleanMasterSheet(fn, wb, df, cats=mastersheetCats):
    sheetname = fn.rstrip('.xlsx') + "_Mastersheet"
    ws = wb.create_sheet(sheetname)
    master = df[cats].fillna("")
    rows = dataframe_to_rows(master,
                             index=False,
                             header=True)
    for r in rows:
        ws.append(r)
    return wb


In [39]:
def addHeaders(wb, operator = "KL"):
    for sheet in wb.sheetnames:
        wb[sheet].oddHeader.left.text = "&[Tab] \
                                            KL - &[Date]"
    return wb

In [40]:
fn = 'T965521_20-05-08.xlsx'

In [41]:
wb = xl.load_workbook(fn)
df = parseFrame(fn)
# fn = "testoutput.xlsx"

In [42]:
wb = makeCleanLayout(fn, wb, df)
wb = makeCleanSampleList(fn, wb, df)
wb = makeCleanMasterSheet(fn, wb, df)
wb = addHeaders(wb)



In [44]:
wb.save(fn)

In [None]:
fns = ['T979159_2020-01-27.xlsx', 'T979199.xlsx', 'T962919_2020-01-23.xlsx', 'T1004640_2-14-20.xlsx']

In [None]:
for fn in fns:
    wb = xl.load_workbook(fn)
    df = parseFrame(fn)
    wb = makeCleanLayout(fn, wb, df)
    wb = makeCleanSampleList(fn, wb, df)
    wb.save(fn)