# Back-up file

In [None]:
import os
import pg8000
import pandas as pd
from pandas import ExcelWriter
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from datetime import datetime
from single_cell_ephys.lims_funcs import limsquery
#from single_cell_ephys.file_funcs import validated_input
import warnings
warnings.filterwarnings('ignore')

In [None]:
def get_lims_dataframe(query):
    """Return a dataframe with lims query"""
    result = limsquery(query)
    try:
        data_df = pd.DataFrame(data=result, columns=result[0].keys())
    except IndexError:
        print "Could not find results for your query."
        data_df = pd.DataFrame()
    return data_df

In [None]:
def user_df(P_number, df):
    """Saves an excel dataframe with all patched cells for a specified user in current directory.
     
    Parameters
    ----------
    P_number : A string. Corresponds to the user's P number in the form of P1, P2, P3, etc.
       
    Returns
    -------
    None
    """
    user = df[df["patched_cell_container"].str.contains(P_number)]
    user.sort_values("patched_cell_container", axis = 0, inplace = True)
    user = user.loc[:,['name', 'patched_cell_container','date', 'rna_qc', 'go_no_go_63x', 'link_20x', 'dendrite_type']]
    writer = pd.ExcelWriter('user_dataframe.xlsx')
    user.to_excel(writer,'Sheet1')



In [None]:
def cell_count(P_number, df):
    """Return the number of cells in a specified dataframe for a specified user.
     
    Parameters
    ----------
    P_number : A string. Corresponds to the user's P number in the form of P1, P2, P3, etc.
    df: a pandas dataframe
    
    Returns
    -------
    An integer cell number
    """
    user = df[df["patched_cell_container"].str.contains(P_number)]
    return user["patched_cell_container"].count()

In [None]:
def gen_filter(df, col, cond):
    filtered = df[df[col] == cond]
    return filtered

In [None]:
def per_user(P_number, df):
    """Returns a summary of patcher metrics for a specified user.
     
    Parameters
    ----------
    P_number : A string. Corresponds to the user's P number in the form of P1, P2, P3, etc.
        
    Returns
    -------
    Returns None
    Prints a summary of patcher metrics, including total cells patched, cells that passed RNA seq QC, and QC pass percentage
    """
    total_cells = cell_count(P_number, df)
    cells_passed_qc = cell_count(P_number, qc_pass(df))
    qc_pass_percentage = float(cells_passed_qc)/float(total_cells)
    [mouse, human] = get_species(df)
    mouse_cells = cell_count(P_number, mouse)
    human_cells = cell_count(P_number, human)
    imaged_cells = cell_count(P_number, image_go(df))
    return (total_cells, cells_passed_qc, qc_pass_percentage, mouse_cells, human_cells, imaged_cells)

In [None]:
def get_df(month):
    recording_month_query_str = "%%" + month + "%%"
    query = """
    WITH do_63x(cell_id, go) AS (SELECT DISTINCT cell.id, array_to_string(array_agg(DISTINCT tag.name), ' _AND_ ') 
    FROM specimens cell JOIN ephys_roi_results err ON err.id = cell.ephys_roi_result_id 
    JOIN specimen_tags_specimens sptagsp ON sptagsp.specimen_id = cell.id 
    JOIN specimen_tags tag ON tag.id = sptagsp.specimen_tag_id AND tag.id in (602120185,602122082) 
    GROUP BY cell.id ORDER BY 1), 
    dendrite_type AS (SELECT sts.specimen_id,array_to_string(array_agg(DISTINCT tag.name), ' ') AS dendrite_type \
    FROM specimen_tags_specimens sts JOIN specimen_tags tag on sts.specimen_tag_id = tag.id \
    WHERE tag.name in('dendrite type - spiny', 'dendrite type - NA', 'dendrite type - sparsely spiny', 'dendrite type - aspiny') \
    GROUP BY sts.specimen_id),
    imgs20 AS (WITH tag_list AS (SELECT DISTINCT ims.id AS ims_id, imst.name AS tag FROM image_series ims 
    JOIN image_series_image_series_tags ims2imst ON ims2imst.image_series_id = ims.id 
    JOIN image_series_tags imst ON imst.id = ims2imst.image_series_tag_id 
    ORDER BY 1,2) 
    SELECT DISTINCT ims.id AS image_series_id, ims.workflow_state AS workflow_state, ims.specimen_id, 
    ARRAY_TO_STRING(ARRAY_AGG(DISTINCT tag_list.tag), '_AND_') AS tags FROM image_series ims 
    LEFT JOIN tag_list ON tag_list.ims_id = ims.id 
    WHERE ims.type = 'FocalPlaneImageSeries' AND is_stack = false 
    GROUP BY ims.id, ims.workflow_state, ims.specimen_id)
    SELECT DISTINCT s.patched_cell_container, s.created_at AS date, do_63x.go AS go_no_go_63x, CASE WHEN ra.id IS NOT NULL THEN (CASE WHEN ra.failed = 't' THEN 'failed' ELSE 'passed' END) ELSE '' END AS rna_qc, s.name,
    '=HYPERLINK("http://lims2/focal_plane_image_series?id=' || imgs20.image_series_id || '")' as link_20x, dendrite_type
    FROM specimens s 
    LEFT JOIN projects proj ON s.project_id = proj.id 
    LEFT JOIN do_63x ON do_63x.cell_id = s.id 
    JOIN specimens slice ON s.parent_id = slice.id
    LEFT JOIN rna_amplification_inputs rai on rai.sample_id = s.id 
    LEFT JOIN imgs20 ON slice.id = imgs20.specimen_id 
    LEFT JOIN rna_amplifications ra on ra.id = rai.rna_amplification_id 
    LEFT JOIN dendrite_type on s.id = dendrite_type.specimen_id
    WHERE proj.code <> 'mMPATCH' AND 
    s.created_at > '2017-10-01' AND
    s.patched_cell_container IS NOT NULL AND
    s.patched_cell_container LIKE '%s'""" %recording_month_query_str


    #AND (err.recording_date > '2017-10-01' OR err.recording_date IS NULL)
    #AND err.recording_date > '2017-10-01'

    month_df = get_lims_dataframe(query)
    return month_df

In [None]:
def qc_pass(df):
    qcpass = gen_filter(df,'rna_qc', 'passed')
    return qcpass

In [None]:
def image_go(df):
    imagego = gen_filter(df,'go_no_go_63x', '63x go')
    return imagego

In [None]:
def get_species(df):
    df['human?'] = df['name'].str.match(r"H\d\d") #creates new boolean column
    mouse = gen_filter(df, 'human?', False)
    human = gen_filter(df, 'human?', True)
    return (mouse, human)

In [None]:
def validated_input(prompt_text, invalid_response, valid_options=None):
    """Keep asking user for input until a valid input has been entered"""
    while True:
        result = raw_input(prompt_text)
        if (valid_options) and (result not in valid_options):
            print invalid_response
            continue
        else:
            break
    return result

In [None]:
def validated_date_input(prompt_text, invalid_response, valid_options=None):
    """Prompt user to enter date, and check whether date is valid input.
    Keep prompting until a valid input has been entered.
    
    Parameters
    ----------
    prompt_text : string
    invalid_response : string
        A message to return to user if entry was invalid.
    valid_options: None or list
        Optional argument with valid options
        
    Returns
    -------
    result : string
        User's validated response to prompt text.
    """

    while True:
        result = raw_input(prompt_text)
        result = result.lower()
        try:
            datetime.strptime(result, "%y%m")
        except:
            print invalid_response
            continue
        else:
            break
    return result

In [None]:
def user_plot(user_stats, P_number, month):
    [total_cells, cells_passed_qc, imaged_cells] = user_stats
    if month == "_":
        month = "All Production"
        
    plt.bar([1, 1, 1], user_stats, width = 0.5, color = sns.light_palette("green"))
    axes = plt.gca()
    axes.set_xlim(0.7, 1.5)
    plt.xticks([1], [P_number])
    sns.despine()

    plt.ylabel("Number of Cells", fontsize = 12)
    plt.title("User Stats: %s" %month, loc = "center", fontsize = 14)
    plt.text(1.3, (total_cells-cells_passed_qc)/2+cells_passed_qc, "Total: %d" %total_cells, va = "center")
    plt.text(1.3, (cells_passed_qc-imaged_cells)/2+imaged_cells, "Passed QC: %d" %cells_passed_qc, va = "center")
    plt.text(1.3, imaged_cells/2, "Imaged: %d" %imaged_cells, va = "center")
    plt.tight_layout()

    plt.savefig('User_Plot.png')

In [None]:
def main():
    """Prompts the user about the operator on which they'd like to report, 
    then prints info about that operator's patching statistics.
    """

    #Get user input
    str_prompt1 = "\nOn what operator would you like to report? (P#): "
    valid_vals1 = ["P1", "P2", "P8", "P9", "PA", "PB"]
    str_prompt2 = "\nWould you like to report on all samples (Y or N)?: "
    valid_vals2 = ["Y", "N"]
    str_prompt3 = "\nOn what month would you like to report? (YYMM): "

    response1 = "\nPlease try again...rig operator should be in the form P#"
    response2 = "\nPlease try again...Y or N?"
    response3 = "\nPlease try again...month should be in the form YYMM"

    P_number = validated_input(str_prompt1, response1, valid_vals1)
    range_state = validated_input(str_prompt2, response2, valid_vals2)
    if range_state == "Y":
        month = "_"
    else:
        month = validated_date_input(str_prompt3, response3, valid_options=None)
    
    #Get data based on user input
    month_df = get_df(month)
    [total_cells, cells_passed_qc, qc_pass_percentage, mouse_cells, human_cells, imaged_cells] = per_user(P_number, month_df)
    data_to_plot = [total_cells, cells_passed_qc, imaged_cells]
    user_plot(data_to_plot, P_number, month)
    
    #Print user stats
    print "\nTotal cells patched: %d" %total_cells
    print "Cells that passed RNA seq QC: %d" %cells_passed_qc
    print "QC pass percentage: %f" %qc_pass_percentage
    print "Mouse cells: %d" %mouse_cells
    print "Human cells: %d" %human_cells
    print "Cells imaged at 63x: %d" %imaged_cells
    
    #Save data to Excel file
    user_df(P_number, month_df)

In [None]:
main()