# Analysis Project: Post Patch vs Cell Depth

In [1]:
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from scipy import stats
import math
import seaborn as sns
%matplotlib inline

In [2]:
ps = pd.read_csv("patch_seq_log_mouse.csv")

In [3]:
select = ["Date", "Depth (um)", "Post patch?", "Nucleus sucked in?", "Post patch pipette R", "PCR cycles", "SM_QC_PF", "Bad dates"]
ps = ps[select]

#Changing Date Format
ps["Date"] = pd.to_datetime(ps["Date"], format = "%y%m%d")

#Categories
ps["Post patch?"].astype("category")
ps["Nucleus sucked in?"].astype("category")
ps["SM_QC_PF"] = ps["SM_QC_PF"].astype("category")

#Filtering Columns
ps = ps[ps["PCR cycles"] == 21]      #shows 21 pcr cycles
ps = ps[ps["SM_QC_PF"] != "cnt"]     #shows everything except cnt
ps = ps[ps["Bad dates"] != "x"]      #shows everthing except x

#ps.dropna(subset = ["Post patch?", "Nucleus sucked in?"], returns start date 10/02/17 and data after that date
ps.dropna(subset = ["Post patch?", "SM_QC_PF"]).head()

Unnamed: 0,Date,Depth (um),Post patch?,Nucleus sucked in?,Post patch pipette R,PCR cycles,SM_QC_PF,Bad dates
4968,2017-04-19,56.3,Outside-Out,,,21.0,fail,
4969,2017-04-19,28.5,Outside-Out,,,21.0,pass,
4970,2017-04-19,53.8,No-Seal,,,21.0,fail,
4971,2017-04-19,24.5,Nucleated,,,21.0,pass,
4972,2017-04-19,40.7,Outside-Out,,,21.0,pass,


## Definitions

In [4]:
#Clarify if cutoff should be 500 and 501 or 500 and 499
#Defines each post patch category 

def nucleated(x):
    nucleus = x[(x["Post patch?"] == "Nucleated") | (((x["Post patch?"] == "nucleus_visible") | (x["Post patch?"] == "nucleus_present")) & (x["Post patch pipette R"] >= 501))]
    return nucleus

def partial_nucleated(y):
    partial = y[(y["Post patch?"] == 'Partial-Nucleus') | (((y['Post patch?'] == 'nucleus_present') | (y['Post patch?'] == 'nucleus_visible')) & (y["Post patch pipette R"] <= 500))]
    return partial

def outside_out(z):
    outside = z[(z["Post patch?"] == 'Outside-Out') | (((z['Post patch?'] == 'nucleus_absent') | (z['Post patch?'] == 'no_nuclues_visible')) & (z['Post patch pipette R'] >= 501))]
    return outside

def no_seal(w): 
    no = w[(w["Post patch?"] == 'No-Seal') | (((w['Post patch?'] == 'nucleus_absent') | (w['Post patch?'] == 'no_nucleus_visible')) & (w['Post patch pipette R'] <= 500))]
    return no
    
def entire_cell(v):
    entire = v[(v["Post patch?"] == 'Entire-Cell') | (v['Post patch?'] == 'entire_cell')]
    return entire

## Creating a Dataframe by Post Patch Class

In [5]:
#Labelling each post patch category
#This creates a column returning the "___" for the defined categories above

nuc = nucleated(ps)
nuc["post patch class"] = "Nucleated"

part_nuc = partial_nucleated(ps)
part_nuc["post patch class"] = "Partial nucleated"

out_out = outside_out(ps)
out_out["post patch class"] = "Outside out"

noSeal = no_seal(ps)
noSeal["post patch class"] = "No seal"

entirecell = entire_cell(ps)
entirecell["post patch class"] = "Entire Cell"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-d

In [6]:
#This puts data in order the frames list 
#All nucleated data is shown then partial nuc then outside-out...as continues inside brackets

frames = [nuc, part_nuc, out_out, noSeal, entirecell]
ps = pd.concat(frames)
ps.head()

Unnamed: 0,Date,Depth (um),Post patch?,Nucleus sucked in?,Post patch pipette R,PCR cycles,SM_QC_PF,Bad dates,post patch class
4971,2017-04-19,24.5,Nucleated,,,21.0,pass,,Nucleated
4973,2017-04-19,17.6,Nucleated,,,21.0,pass,,Nucleated
4975,2017-04-19,40.2,Nucleated,,,21.0,pass,,Nucleated
4988,2017-04-20,56.2,Nucleated,,,21.0,pass,,Nucleated
4997,2017-04-20,55.4,Nucleated,,,21.0,pass,,Nucleated


## Experimenting not finalized code

In [7]:
#Not sure about this just experimenting, refer to Rusty's example of Post patch by cre-line
#Dropping specific columns that are labeled within the brackets
#You don't need to add labels or axis, that is how the method is labeled, press shift+tab to check

ps0 = ps.drop(labels = ["Post patch?", "PCR cycles", "SM_QC_PF", "Bad dates"], axis = 1)
ps0.head()

Unnamed: 0,Date,Depth (um),Nucleus sucked in?,Post patch pipette R,post patch class
4971,2017-04-19,24.5,,,Nucleated
4973,2017-04-19,17.6,,,Nucleated
4975,2017-04-19,40.2,,,Nucleated
4988,2017-04-20,56.2,,,Nucleated
4997,2017-04-20,55.4,,,Nucleated


In [8]:
ps0.to_csv("depth_post")

## Misc

ps["Post patch?"].value_counts()
ps["Nucleus sucked in?"].value_counts()
ps.dtypes
ps.info()

All categories for post patch...up till 10/02/2017 
All categories for nucleus sucked in?... starts at 10/02/2017
For post patch pipette R and nucleus sucked in?... starts at 10/02/2017

depth = numerical 
post patch? = categorical 
post patch pipette R = numerical 
nucleus sucked in? = categorical

## Graphing

Either use Box & Whisker Plot OR Histogram. 
But Whisker Plot would be easier and cleaner to display 5 categories.
You need to display 5 categories of patching and the range of the depth values. 

Displayed data
Title = Post Patch vs Cell Depth
x-axis = patching categories 
y-axis = Cell Depth