##### Imports

In [1]:
import pandas as pd
import csv
import logging

import seaborn as sns

##### Logging Set-up

In [2]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

##### Miscellaneous Settings

In [3]:
sns.set(context = "notebook", style = "ticks", font="verdana", font_scale = 1.35)
pd.set_option("display.max_colwidth",150) #Expands the number of characters shown in the columns
pd.set_option('display.max_columns', None)

##### Logging Comments about this project

In [4]:
logger.info("""Purpose: Discovering similarities/differences in ephys features among new mouse regions
            
            Checklist:
            1) Track RSPd, RSPv, SSp for coronal
            2) Track ORB, CLA, MO for sagittal
            3) Obtains ephys features for all the mouse regions
            4) Plot similarities/differences""")

INFO:__main__:Purpose: Discovering similarities/differences in ephys features among new mouse regions
            
            Checklist:
            1) Track RSPd, RSPv, SSp for coronal
            2) Track ORB, CLA, MO for sagittal
            3) Obtains ephys features for all the mouse regions
            4) Plot similarities/differences


##### Load in JEM form automatically from file path

In [5]:
def read_jem_csv(fields=None):
    """
    Finds and read jem metadata csv.
    
    Args: 
        directory_name: file directory path of jem matadata file
        jem_file: jem metadata file name

    Returns:
        jem: a pandas dataframe with jem metadata
    """
    global jem  # Define as global to return outside of function
    
    csv_path = "Z:/Patch-Seq/compiled-jem-data/"
    csv_file = "jem_metadata.csv"
    jem = pd.read_csv(csv_path + csv_file, usecols=fields, index_col=["date"])
    logger.info("Read jem metadata csv (date: 2/04/2020)")

In [6]:
#If you want to filter on the columns immediately
jem_fields=["date", "organism_name", "name", "container", "rigOperator",
            "status", "roi_major", "roi_minor",
            "extraction.postPatch", "extraction.endPipetteR",
            "extraction.pressureApplied", "extraction.retractionPressureApplied",
            "recording.timeStart", "recording.timeWholeCellStart",
            "extraction.timeExtractionStart", "extraction.timeExtractionEnd", "extraction.timeRetractionEnd",
            "cell_depth",
            "approach.creCell",
            "approach.sliceHealth"]
read_jem_csv(jem_fields)
#jem.tail(1490) # first recording 2020-01-03 10:40:30 -0800 and last recording of 2019: 2019-12-23 15:51:07 -0800

INFO:__main__:Read jem metadata csv (date: 2/04/2020)


##### Choosing Date Range for 2020 (1/03/2020 - Present 2020)

In [7]:
jem_2020 = jem.loc["2020-01-03 10:40:30 -0800":,:]
logger.info("Date Range: 2020-01-03 to Present 2020")

#jem_2020["p_container"] = jem_2020.container.str[0:4]
#logger.info("Creating column to check p_container")
#jem_2020.p_container.unique()

r_users = ["kristenh", "lindsayn", "ramr", "katherineb", "jessicat"] 
m_users = ["P1", "P8", "PA", "PE", "PF"]
#c_users = ["PC"]

jem_2020.dropna(subset=["rigOperator", "container"], inplace=True)
jem_2020 = jem_2020[jem_2020["status"] == "SUCCESS"]
jem_2020 = jem_2020[jem_2020.rigOperator.str.contains("|".join(r_users))]
jem_2020 = jem_2020[jem_2020.container.str.contains("|".join(m_users))]
logger.info("Created dataframe with selected users in container column")

jem_2020 = jem_2020[jem_2020.index.notnull()]
logger.info("Dropped NaNs from index date column")

INFO:__main__:Date Range: 2020-01-03 to Present 2020
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':
INFO:__main__:Created dataframe with selected users in container column
INFO:__main__:Dropped NaNs from index date column


##### Creating new_region: coronal_region, sagittal_region, other_region and v1_region

In [8]:
jem_2020["new_region"] = "default_value"
logger.info("Created a new column(new_region) with default_value")

c_region = ["RSPd", "RSPv", "SSp"] 
s_region = ["MOs", "MOp", "ORB", "CTXsp"]
o_region = ["TCx", "HY"]
v_region = ["VISp"]

jem_2020["new_region"][jem_2020.roi_major.str.contains("|".join(c_region))] = "coronal_region"
jem_2020["new_region"][jem_2020.roi_major.str.contains("|".join(s_region))] = "sagittal_region"
jem_2020["new_region"][jem_2020.roi_major.str.contains("|".join(o_region))] = "other_region"
jem_2020["new_region"][jem_2020.roi_major.str.contains("|".join(v_region))] = "v1_region"
logger.info("Filled in new_region with corresponding region")

cor = jem_2020[jem_2020["new_region"] == "coronal_region"] 
sag = jem_2020[jem_2020["new_region"] == "sagittal_region"]
other = jem_2020[jem_2020["new_region"] == "other_region"]
v1 = jem_2020[jem_2020["new_region"] == "v1_region"]
logger.info("Created region pandas dataframes")

logger.info("Coronal Region Count: %s", cor.name.count())
logger.info("Sagittal Region Count: %s", sag.name.count())
logger.info("Other Region Count: %s", other.name.count())
logger.info("V1 Region Count: %s", v1.name.count())

INFO:__main__:Created a new column(new_region) with default_value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#retu

In [9]:
jem_2020

Unnamed: 0_level_0,organism_name,name,cell_depth,approach.creCell,approach.sliceHealth,extraction.endPipetteR,extraction.postPatch,extraction.pressureApplied,extraction.retractionPressureApplied,extraction.timeExtractionEnd,extraction.timeExtractionStart,extraction.timeRetractionEnd,recording.timeStart,recording.timeWholeCellStart,rigOperator,status,roi_major,roi_minor,container,new_region
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2020-01-03 10:40:30 -0800,Mouse,Gad2-IRES-Cre;Ai14-504539.10.01.01,54.1,Cre-,2.0,1000.0,nucleus_present,-27.0,-33.0,11:13:49 -0800,11:10:01 -0800,11:19:49 -0800,11:01:36 -0800,11:03:04 -0800,jessicat,SUCCESS,RSPd,layer 2/3,PFS4_200103_701_A01,coronal_region
2020-01-03 13:00:07 -0800,Mouse,Pvalb-IRES-Cre;Ai14-504851.11.01.01,40.4,Cre-,2.0,1000.0,nucleus_present,-50.0,-50.0,13:26:03 -0800,13:23:58 -0800,13:28:22 -0800,13:14:59 -0800,13:15:56 -0800,jessicat,SUCCESS,RSPd,layer 5,PFS4_200103_702_A01,coronal_region
2020-01-03 13:00:07 -0800,Mouse,Pvalb-IRES-Cre;Ai14-504851.11.01.02,54.1,Cre-,2.0,2000.0,nucleus_present,-45.0,-45.0,13:50:01 -0800,13:47:58 -0800,13:51:33 -0800,13:40:26 -0800,13:42:07 -0800,jessicat,SUCCESS,RSPv,layer 2/3,PFS4_200103_703_A01,coronal_region
2020-01-06 13:12:44 -0800,Mouse,Vip-IRES-Cre;Ai14-504179.11.01.02,47.1,Cre-,1.0,3000.0,nucleus_present,-40.0,-40.0,14:20:51 -0800,14:18:02 -0800,14:22:22 -0800,14:12:48 -0800,14:14:20 -0800,jessicat,SUCCESS,SSp,layer 5,PFS4_200106_702_A01,coronal_region
2020-01-06 13:12:44 -0800,Mouse,Vip-IRES-Cre;Ai14-504179.11.01.01,79.3,Cre-,1.0,7.0,nucleus_present,-47.0,-47.0,13:45:37 -0800,13:43:51 -0800,13:49:54 -0800,13:37:16 -0800,13:39:57 -0800,jessicat,SUCCESS,RSPd,layer 5,PFS4_200106_701_A01,coronal_region
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-02-04 14:13:07 -0800,Human,H20.03.303.11.08.01.04,72.7,,2.0,1900.0,nucleus_present,-56.0,-56.0,16:38:16 -0800,16:34:40 -0800,16:43:43 -0800,16:24:15 -0800,16:26:22 -0800,ramr,SUCCESS,TCx,layer 1,PAS4_200204_457_A01,other_region
2020-02-04 14:13:07 -0800,Human,H20.03.303.11.08.01.02,45.9,,2.0,2000.0,nucleus_present,-55.0,-55.0,15:42:20 -0800,15:40:49 -0800,15:44:43 -0800,15:32:51 -0800,15:35:01 -0800,ramr,SUCCESS,TCx,layer 1,PAS4_200204_455_A01,other_region
2020-02-04 14:13:07 -0800,Human,H20.03.303.11.08.01.07,52.7,,1.0,3.2,nucleus_present,-55.0,-55.0,18:26:23 -0800,18:23:36 -0800,18:29:34 -0800,18:17:10 -0800,18:20:00 -0800,ramr,SUCCESS,TCx,layer 4,PAS4_200204_460_A01,other_region
2020-02-04 14:13:07 -0800,Human,H20.03.303.11.08.01.01,54.3,,2.0,1400.0,nucleus_present,-55.0,-55.0,15:07:30 -0800,15:04:18 -0800,15:10:56 -0800,14:52:55 -0800,14:55:08 -0800,ramr,SUCCESS,TCx,layer 1,PAS4_200204_454_A01,other_region


##### Creating Time columns for exp_time, rec_time, ext_time, ret_time 

In [10]:
#jem["exp_time"] = pd.to_datetime(jem["extraction.timeRetractionEnd"]) - pd.to_datetime(jem["recording.timeStart"])
#jem["rec_time"] = pd.to_datetime(jem["extraction.timeRetractionEnd"]) - pd.to_datetime(jem["recording.timeWholeCellStart"])
#jem["ext_time"] = pd.to_datetime(jem["extraction.timeExtractionEnd"]) - pd.to_datetime(jem["extraction.timeExtractionStart"])
#jem["ret_time"] = pd.to_datetime(jem["extraction.timeRetractionEnd"]) - pd.to_datetime(jem["extraction.timeExtractionEnd"])

##### Creating post_patch_outcomes based on end seal and post Patch

In [11]:
jem_2020["post_patch_outcome"] = "default_value"

In [12]:
jem_2020

Unnamed: 0_level_0,organism_name,name,cell_depth,approach.creCell,approach.sliceHealth,extraction.endPipetteR,extraction.postPatch,extraction.pressureApplied,extraction.retractionPressureApplied,extraction.timeExtractionEnd,extraction.timeExtractionStart,extraction.timeRetractionEnd,recording.timeStart,recording.timeWholeCellStart,rigOperator,status,roi_major,roi_minor,container,new_region,post_patch_outcome
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-03 10:40:30 -0800,Mouse,Gad2-IRES-Cre;Ai14-504539.10.01.01,54.1,Cre-,2.0,1000.0,nucleus_present,-27.0,-33.0,11:13:49 -0800,11:10:01 -0800,11:19:49 -0800,11:01:36 -0800,11:03:04 -0800,jessicat,SUCCESS,RSPd,layer 2/3,PFS4_200103_701_A01,coronal_region,default_value
2020-01-03 13:00:07 -0800,Mouse,Pvalb-IRES-Cre;Ai14-504851.11.01.01,40.4,Cre-,2.0,1000.0,nucleus_present,-50.0,-50.0,13:26:03 -0800,13:23:58 -0800,13:28:22 -0800,13:14:59 -0800,13:15:56 -0800,jessicat,SUCCESS,RSPd,layer 5,PFS4_200103_702_A01,coronal_region,default_value
2020-01-03 13:00:07 -0800,Mouse,Pvalb-IRES-Cre;Ai14-504851.11.01.02,54.1,Cre-,2.0,2000.0,nucleus_present,-45.0,-45.0,13:50:01 -0800,13:47:58 -0800,13:51:33 -0800,13:40:26 -0800,13:42:07 -0800,jessicat,SUCCESS,RSPv,layer 2/3,PFS4_200103_703_A01,coronal_region,default_value
2020-01-06 13:12:44 -0800,Mouse,Vip-IRES-Cre;Ai14-504179.11.01.02,47.1,Cre-,1.0,3000.0,nucleus_present,-40.0,-40.0,14:20:51 -0800,14:18:02 -0800,14:22:22 -0800,14:12:48 -0800,14:14:20 -0800,jessicat,SUCCESS,SSp,layer 5,PFS4_200106_702_A01,coronal_region,default_value
2020-01-06 13:12:44 -0800,Mouse,Vip-IRES-Cre;Ai14-504179.11.01.01,79.3,Cre-,1.0,7.0,nucleus_present,-47.0,-47.0,13:45:37 -0800,13:43:51 -0800,13:49:54 -0800,13:37:16 -0800,13:39:57 -0800,jessicat,SUCCESS,RSPd,layer 5,PFS4_200106_701_A01,coronal_region,default_value
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-02-04 14:13:07 -0800,Human,H20.03.303.11.08.01.04,72.7,,2.0,1900.0,nucleus_present,-56.0,-56.0,16:38:16 -0800,16:34:40 -0800,16:43:43 -0800,16:24:15 -0800,16:26:22 -0800,ramr,SUCCESS,TCx,layer 1,PAS4_200204_457_A01,other_region,default_value
2020-02-04 14:13:07 -0800,Human,H20.03.303.11.08.01.02,45.9,,2.0,2000.0,nucleus_present,-55.0,-55.0,15:42:20 -0800,15:40:49 -0800,15:44:43 -0800,15:32:51 -0800,15:35:01 -0800,ramr,SUCCESS,TCx,layer 1,PAS4_200204_455_A01,other_region,default_value
2020-02-04 14:13:07 -0800,Human,H20.03.303.11.08.01.07,52.7,,1.0,3.2,nucleus_present,-55.0,-55.0,18:26:23 -0800,18:23:36 -0800,18:29:34 -0800,18:17:10 -0800,18:20:00 -0800,ramr,SUCCESS,TCx,layer 4,PAS4_200204_460_A01,other_region,default_value
2020-02-04 14:13:07 -0800,Human,H20.03.303.11.08.01.01,54.3,,2.0,1400.0,nucleus_present,-55.0,-55.0,15:07:30 -0800,15:04:18 -0800,15:10:56 -0800,14:52:55 -0800,14:55:08 -0800,ramr,SUCCESS,TCx,layer 1,PAS4_200204_454_A01,other_region,default_value


##### File Paths

In [13]:
csv_path = "C:/Users/ramr/Documents/Github/analysis_projects/csv/"
excel_path = "C:/Users/ramr/Documents/Github/analysis_projects/excel/"

##### Writing to csv_file

In [14]:
jem.to_csv(csv_path + "jem.csv")
jem_2020.to_csv(csv_path + "jem_2020.csv")

##### Writing to excel_file

In [15]:
writer = pd.ExcelWriter(excel_path + "new_region.xlsx")
jem.to_excel(writer, "jem_metadata", freeze_panes=(1,0))
jem_2020.to_excel(writer, "jem_2020", freeze_panes=(1,0))
cor.to_excel(writer, "coronal_region", freeze_panes=(1,0))
sag.to_excel(writer, "sagittal_region", freeze_panes=(1,0))
other.to_excel(writer, "other_region", freeze_panes=(1,0))
v1.to_excel(writer, "v1_region", freeze_panes=(1,0))
writer.save()

##### Figure out how to make this into useable function

In [16]:
def create_new_column(new_col, cond_col, region, region_name):
    """
    Creating new column
    
    Args:
        as
    Returns:
        None
    """
    jem_2020[new_col] = "default_value"
    
    c_region = ["RSPd", "RSPv", "SSp"] 
    s_region = ["MOs", "MOp", "ORB", "CTXsp"]
    o_region = ["TCx", "HY"]
    v_region = ["VISp"]
    
    jem_2020[new_col][jem_2020[cond_col].str.contains("|".join(region))] = region_name
    
#create_new_column("new_region", "roi_major", c_region, "coronal_region")
#create_new_column("new_region", "roi_major", s_region, "sagittal_region")
#create_new_column("new_region", "roi_major", o_region, "other_region")
#create_new_column("new_region", "roi_major", v_region, "v1_region")