# Coding Project Analysis: Nucleated patches per user

In [None]:
import os
import csv
import json
import fnmatch
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

In [None]:
def get_jsons(dirname, expt):
    json_paths = []
    for jfile in os.listdir(dirname):
        if fnmatch.fnmatch(jfile, '*.%s.json' %expt):
            jpath = os.path.join(dirname, jfile)
            json_paths.append(jpath)
    return json_paths

In [None]:
#make list of json paths for every json file in MIES Experiments directory on 279
json_list = get_jsons("//allen/programs/celltypes/workgroups/279/Patch-Seq/all-metadata-files/", "PS")
json_list

In [None]:
from pandas.io.json import json_normalize

def flatten_attempts(slice_info, form_version):
    """Return flattened slice metadata dataframe.
    
    Parameters
    ----------
    slice_info : dict
        A dictionary of slices with nested pipette attempts.
    
    form_version : string
        A string containing the JEM form version.
        (Pre-version 2 contains IVSCC, PatchSeq and Electroporation arrays)
    Returns
    -------
    attempts_slice_df : pandas dataframe
        A dataframe of all pipette attempts along with all slice metadata.
    """
    
    df = json_normalize(slice_info)
    if form_version >= "2":
        ps_array_name = "pipettes"
    else:
        ps_array_name = "pipettesPatchSeqPilot"
    try:
        attempts_df = json_normalize(slice_info[ps_array_name])
        attempts_df["limsSpecName"] = df["limsSpecName"].values[0]
        attempts_df["attempt"] = [p+1 for p in attempts_df.index.values]
        attempts_slice_df = pd.merge(df, attempts_df, how="outer", on="limsSpecName")
        attempts_slice_df.drop(ps_array_name, axis=1, inplace=True)
        return attempts_slice_df
    except IndexError:
        pass

def is_field(df, colname):
    """Determine whether a column name exists in a dataframe.
    
    Parameters
    ----------
    df : a Pandas dataframe
    colname : string
        
    Returns
    -------
    Boolean
        Boolean value indicating if the colname exists in the dataframe.
    """
    
    try:
        df[colname]
        return True
    except KeyError:
        return False

### Output Starts

In [None]:
json_df = pd.DataFrame()

for json_path in json_list:
    with open(json_path) as data_file:
        slice_info = json.load(data_file)
        if is_field(slice_info, "formVersion"):
            jem_version = slice_info["formVersion"]
        else:
            jem_version = "1.0.0"
        flat_df = flatten_attempts(slice_info, jem_version)
        json_df = pd.concat([json_df, flat_df], axis=0)
        
pd.options.display.max_columns = None  #Displays all the columns 
json_df.head()

### Trying to only have date as YYYY-MM-DD

In [None]:
json_df.info()

In [None]:
json_df['date'] = json_df['date'].str[:11]

In [None]:
json_df['date'] = datetime.strptime(json_df['date'],format = '%Y%m%d', errors = 'coerce')
#date = datetime.strptime("1/4/2014", "%m/%d/%Y")

In [None]:
#json_df['date'] = pd.to_datetime(json_df['date'], format = '%Y%m%d', errors = 'coerce')

### Sets the Date column as the index

In [None]:
json_df.set_index('date', inplace = True)

### Sorts the Date column

In [None]:
json_df.sort_index(inplace = True)

In [None]:
json_df1 = json_df['2017-10-02':]
#['Start date' : 'End date']
#Problem: The old versions have different date displayed

In [None]:
json_df1.head()

In [None]:
json_df1.tail()

In [None]:
#Mouse 
json_df1 = json_df1[json_df1['status'] == 'SUCCESS']  
json_df1 = json_df1[json_df1['approach.creCell'] == 'Cre+'] #Optional 
#There's also SUCCESS(low confidence), SUCCESS(high confidence) from old jem forms

In [None]:
#Number of Successful attempts by Rig operator 
json_df1['rigOperator'].value_counts()

In [None]:
#Nucleus present vs absent 
json_df1['extraction.postPatch'].value_counts()