##### Imports

In [1]:
import pandas as pd
import csv
import logging
from functions_py.mephys_funcs import read_file, merge_dataframes, filter_date, drop_cols, drop_nans, \
create_cond_df, create_container_df

import seaborn as sns

##### File Paths

In [2]:
csv_path = "C:/Users/ramr/Documents/Github/analysis_projects/csv/"
excel_path = "C:/Users/ramr/Documents/Github/analysis_projects/excel/"

##### Logging Set-up

In [3]:
logging.basicConfig(level=logging.INFO)
LOGGER = logging.getLogger(__name__)

##### Miscellaneous Settings

In [4]:
sns.set(context = "notebook", style = "ticks", font="verdana", font_scale = 1.35)
pd.set_option("display.max_colwidth",150) #Expands the number of characters shown in the columns
pd.set_option('display.max_columns', None)

##### Logging Comments about this project

In [5]:
LOGGER.info("""Purpose: Personal Tracker for 2020""")

INFO:__main__:Purpose: Personal Tracker for 2020


In [6]:
fields_jem = ["date", "organism_name", "name", "container", "rigOperator",
              "status", "roi_major", "roi_minor",
              "extraction.postPatch", "extraction.endPipetteR"]
fields_ephys = ["b'name'", "b'patched_cell_container'", "b'vrest'", "b'sag'", 
                "b'tau'", "b'upstroke_downstroke_ratio_long_square'", 
                "b'latency'", "b'f_i_curve_slope'"]
fields_shiny = ["cell_name", "sample_id", "cluster_label", "broad_class_label",
                "subclass_label", "cell_specimen_project"]


path_jem = "Z:/Patch-Seq/compiled-jem-data/jem_metadata.csv"
path_ephys = "C:/Users/ramr/Documents/Github/analysis_projects/csv/mephys_features.csv"
path_shiny = "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/patch_seq/star/mouse_patchseq_VISp_current/mapping.df.with.bp.40.lastmap.csv"


drop_cols_list = ["b'name'", "b'patched_cell_container'", "cell_name", "sample_id"]
drop_nans_list = ["date", "name", "rigOperator"]

r_users = ["kristenh", "lindsayn", "ramr", "katherineb", "jessicat"]

##### Main

In [7]:
def csv_first_line():
    """"""
    with open("counts.csv", mode="w", newline="") as csv_file: #Automatically closes file
        writer = csv.writer(csv_file)
        writer.writerow(["User", "Start Date", "End Date", "Overall Total",
                         "Mouse Total", "RSPd", "RSPv", "SSp",
                         "ORB", "CTXsp", "MOp", "MOs", "HY", "HIP", "VISP",
                         "Human Total", "Human Acute", "Human Culture"])

In [8]:
def csv_counts():
    with open("counts.csv", mode="w", newline="") as csv_file: #Automatically closes file
        writer = csv.writer(csv_file)
        writer.writerow(["User", "Start Date", "End Date", "Overall Total",
                         "Mouse Total", "RSPd", "RSPv", "SSp",
                         "ORB", "CTXsp", "MOp", "MOs", "HY", "HIP", "VISP",
                         "Human Total", "Human Acute", "Human Culture"])
        
        with open("counts.csv", mode="w", newline="") as csv_file: #Automatically closes file
            writer = csv.writer(csv_file)
            user = "User name"
            start = start_date
            end = end_date
            overall = jem_2020.container.count()
            m_total = mjem_2020.organism_name.count()
            rd = rspd.organism_name.count()
            rv = rspv.organism_name.count()
            sp = ssp.organism_name.count()
            ob = orb.organism_name.count()
            cp = ctxsp.organism_name.count()
            mp = mop.organism_name.count()
            ms = mos.organism_name.count()
            h = hy.organism_name.count()
            hp = hip.organism_name.count()
            v = visp.organism_name.count()
            h_total = hjem_2020.organism_name.count()
            h_acute = hajem_2020.organism_name.count()
            h_culture = hcjem_2020.organism_name.count()

            writer.writerow([user, start, end, overall,
                             m_total, rd, rv, sp,
                             ob, cp, mp, ms,
                             h, hp, v,
                             h_total, h_culture, h_acute])

In [25]:
count_dict = {"Overall total": jem.name.count(),
              "Mouse total": mjem.name.count(),
              "RSPd": rspd.name.count(),
              "RSPv": rspv.name.count(),
              "SSp": ssp.name.count(),
              "ORB": orb.name.count(),
              "CTXsp": ctxsp.name.count(),
              "MOp": mop.name.count(),
              "MOs": mos.name.count(),
              "HY": hy.name.count(),
              "HIP": hip.name.count(),
              "VISp": visp.name.count(),
              "Human total": hjem.name.count(),
              "Human acute": hajem.name.count(),
              "Human culture": hcjem.name.count()}

In [36]:
def display_dict(dictionary):
    for k,v in dictionary.items():
        print(f"{k}: {v}")

In [47]:
print(f"Date range: {start_date} to {end_date}")
display_dict(count_dict)

Date range: 2020-01-03 to 2020-02-25
Overall total: 656
Mouse total: 504
RSPd: 121
RSPv: 113
SSp: 26
ORB: 119
CTXsp: 40
MOp: 12
MOs: 15
HY: 3
HIP: 21
VISp: 34
Human total: 149
Human acute: 72
Human culture: 77


In [10]:
jem = read_file(path_jem, fields_jem)
jem = filter_date(jem, "date")
jem = drop_nans(jem, drop_nans_list)
jem = create_cond_df(jem, "rigOperator", r_users)
jem = create_container_df(jem, "container")

INFO:functions_py.mephys_funcs:Read file in as a pandas dataframe
INFO:functions_py.mephys_funcs:Sorted date to only display 2020 data
INFO:functions_py.mephys_funcs:Dropped NaNs from these columns: ['date', 'name', 'rigOperator']
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.dropna(subset=drop_na_col, inplace=True)
INFO:functions_py.mephys_funcs:Created a conditional dataframe based on a list of values
INFO:functions_py.mephys_funcs:Created a container_label column to show(ex.'PA')


In [12]:
start_date = jem["date"].iloc[0][0:10]
end_date = jem["date"].iloc[-1][0:10]
print(start_date)
print(end_date)

2020-01-03
2020-02-25


In [13]:
mjem = create_cond_df(jem, "organism_name", ["Mouse"])
hjem = create_cond_df(jem, "organism_name", ["Human"])
hajem = hjem[hjem["container_label"] != "PC"]
hcjem = create_cond_df(hjem, "container_label", ["PC"])

rspd = create_cond_df(mjem, "roi_major", ["RSPd"])
rspv = create_cond_df(mjem, "roi_major", ["RSPv"])
ssp = create_cond_df(mjem, "roi_major", ["SSp"])
orb = create_cond_df(mjem, "roi_major", ["ORB"])
ctxsp = create_cond_df(mjem, "roi_major", ["CTXsp"])
mop = create_cond_df(mjem, "roi_major", ["MOp"])
mos = create_cond_df(mjem, "roi_major", ["MOs"])
hy = create_cond_df(mjem, "roi_major", ["HY"])
hip = create_cond_df(mjem, "roi_major", ["HIP"])
visp = create_cond_df(mjem, "roi_major", ["VISp"])

INFO:functions_py.mephys_funcs:Created a conditional dataframe based on a list of values
INFO:functions_py.mephys_funcs:Created a conditional dataframe based on a list of values
INFO:functions_py.mephys_funcs:Created a conditional dataframe based on a list of values
INFO:functions_py.mephys_funcs:Created a conditional dataframe based on a list of values
INFO:functions_py.mephys_funcs:Created a conditional dataframe based on a list of values
INFO:functions_py.mephys_funcs:Created a conditional dataframe based on a list of values
INFO:functions_py.mephys_funcs:Created a conditional dataframe based on a list of values
INFO:functions_py.mephys_funcs:Created a conditional dataframe based on a list of values
INFO:functions_py.mephys_funcs:Created a conditional dataframe based on a list of values
INFO:functions_py.mephys_funcs:Created a conditional dataframe based on a list of values
INFO:functions_py.mephys_funcs:Created a conditional dataframe based on a list of values
INFO:functions_py.mep

##### Writing to csv_file

In [16]:
shiny.to_csv(csv_path + "shiny.csv")

##### Writing to excel_file

In [17]:
writer = pd.ExcelWriter(excel_path + "personal_tracker.xlsx")
mjem.to_excel(writer, "mouse_2020", freeze_panes=(1,0))
hjem.to_excel(writer, "human_2020", freeze_panes=(1,0))
writer.save()

##### Additional Fields

In [None]:
#If you want to filter on the columns immediately
all_jem_fields=["date", "organism_name", "name", "container", "rigOperator",
                "status", "roi_major", "roi_minor",
                "extraction.postPatch", "extraction.endPipetteR",
                "extraction.pressureApplied", "extraction.retractionPressureApplied",
                "recording.timeStart", "recording.timeWholeCellStart",
                "extraction.timeExtractionStart", "extraction.timeExtractionEnd", "extraction.timeRetractionEnd",
                "cell_depth",
                "approach.creCell",
                "approach.sliceHealth"]

#If you want to filter on the columns immediately
#jem.tail(1490) # first recording 2020-01-03 10:40:30 -0800 and last recording of 2019: 2019-12-23 15:51:07