##### Imports

In [1]:
import pandas as pd
import csv
import logging
from read_jem import read_jem_csv, sort_df

import seaborn as sns

##### Logging Set-up

In [2]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

##### Miscellaneous Settings

In [3]:
sns.set(context = "notebook", style = "ticks", font="verdana", font_scale = 1.35)
pd.set_option("display.max_colwidth",150) #Expands the number of characters shown in the columns
pd.set_option('display.max_columns', None)

##### Logging Comments about this project

In [4]:
logger.info("""Purpose: Personal Tracker for 2020
            
            Checklist:
            1) Counts of RSPd, RSPv, SSp for coronal
            2) Track ORB, CLA, MO for sagittal
            3) Obtains ephys features for all the mouse regions
            4) Plot similarities/differences""")

INFO:__main__:Purpose: Personal Tracker for 2020
            
            Checklist:
            1) Counts of RSPd, RSPv, SSp for coronal
            2) Track ORB, CLA, MO for sagittal
            3) Obtains ephys features for all the mouse regions
            4) Plot similarities/differences


##### Load in JEM form automatically from file path

In [5]:
def read_shiny_csv(fields=None):
    """
    Finds and read jem metadata csv.
    
    Args: 
        directory_name: file directory path of jem matadata file
        jem_file: jem metadata file name

    Returns:
        jem: a pandas dataframe with jem metadata
    """
    global shiny  # Define as global to return outside of function
    
    csv_path = "//allen/programs/celltypes/workgroups/rnaseqanalysis/shiny/patch_seq/star/mouse_patchseq_VISp_current/mapping.df.with.bp.40.lastmap.csv"
    shiny = pd.read_csv(csv_path, usecols=fields)
    logger.info("Read jem metadata csv (date: 2/07/2020)")

In [6]:
read_shiny_csv()

  if (await self.run_code(code, result,  async_=asy)):
INFO:__main__:Read jem metadata csv (date: 2/07/2020)


##### Using read_jem to import jem_metadata

In [8]:
#If you want to filter on the columns immediately
all_jem_fields=["date", "organism_name", "name", "container", "rigOperator",
                "status", "roi_major", "roi_minor",
                "extraction.postPatch", "extraction.endPipetteR",
                "extraction.pressureApplied", "extraction.retractionPressureApplied",
                "recording.timeStart", "recording.timeWholeCellStart",
                "extraction.timeExtractionStart", "extraction.timeExtractionEnd", "extraction.timeRetractionEnd",
                "cell_depth",
                "approach.creCell",
                "approach.sliceHealth"]

In [10]:
#If you want to filter on the columns immediately
#jem.tail(1490) # first recording 2020-01-03 10:40:30 -0800 and last recording of 2019: 2019-12-23 15:51:07

jem = read_jem_csv()
logger.info("Imported jem_metadata.csv as a dataframe")

INFO:read_jem:Read jem metadata csv (date: 2/04/2020)
INFO:__main__:Imported jem_metadata.csv as a dataframe


In [None]:
#jem_2020["p_container"] = jem_2020.container.str[0:4]
#logger.info("Creating column to check p_container")
#jem_2020.p_container.unique()

##### Using sort_df to sort jem dataframe

In [12]:
r_user = ["ramr"] 
m_user = ["PA", "PC"]

jem_2020 = sort_df(jem, r_user, m_user)
logger.info("Sorted jem by date range: 1/03/2020 - present")

INFO:read_jem:Date Range: 2020-01-03 to Present 2020
INFO:read_jem:Dropped NaNs from index date column
INFO:read_jem:Dropped NaNs from rigOperator and container column
INFO:read_jem:Created dataframe with selected users in container column
INFO:__main__:Sorted jem by date range: 1/03/2020 - present


In [14]:
jem_2020

Unnamed: 0_level_0,organism_name,name,extraction.endPipetteR,extraction.postPatch,rigOperator,status,roi_major,roi_minor,container
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-03 10:40:30 -0800,Mouse,Gad2-IRES-Cre;Ai14-504539.10.01.01,1000.0,nucleus_present,jessicat,SUCCESS,RSPd,layer 2/3,PFS4_200103_701_A01
2020-01-03 13:00:07 -0800,Mouse,Pvalb-IRES-Cre;Ai14-504851.11.01.01,1000.0,nucleus_present,jessicat,SUCCESS,RSPd,layer 5,PFS4_200103_702_A01
2020-01-03 13:00:07 -0800,Mouse,Pvalb-IRES-Cre;Ai14-504851.11.01.02,2000.0,nucleus_present,jessicat,SUCCESS,RSPv,layer 2/3,PFS4_200103_703_A01
2020-01-06 13:12:44 -0800,Mouse,Vip-IRES-Cre;Ai14-504179.11.01.01,7.0,nucleus_present,jessicat,SUCCESS,RSPd,layer 5,PFS4_200106_701_A01
2020-01-06 13:12:44 -0800,Mouse,Vip-IRES-Cre;Ai14-504179.11.01.02,3000.0,nucleus_present,jessicat,SUCCESS,SSp,layer 5,PFS4_200106_702_A01
...,...,...,...,...,...,...,...,...,...
2020-02-04 14:13:07 -0800,Human,H20.03.303.11.08.01.04,1900.0,nucleus_present,ramr,SUCCESS,TCx,layer 1,PAS4_200204_457_A01
2020-02-04 14:13:07 -0800,Human,H20.03.303.11.08.01.06,110.0,nucleus_present,ramr,SUCCESS,TCx,layer 4,PAS4_200204_459_A01
2020-02-04 14:13:07 -0800,Human,H20.03.303.11.08.01.02,2000.0,nucleus_present,ramr,SUCCESS,TCx,layer 1,PAS4_200204_455_A01
2020-02-04 14:13:07 -0800,Human,H20.03.303.11.08.01.01,1400.0,nucleus_present,ramr,SUCCESS,TCx,layer 1,PAS4_200204_454_A01


In [10]:
mjem_2020 = jem_2020[jem_2020.organism_name == "Mouse"]
logger.info("Mouse count: %s", mjem_2020.organism_name.count())
hjem_2020 = jem_2020[jem_2020.organism_name == "Human"]
logger.info("Human count: %s", hjem_2020.organism_name.count())

INFO:__main__:Mouse count: 55
INFO:__main__:Human count: 30


In [11]:
jem_2020

Unnamed: 0_level_0,organism_name,extraction.endPipetteR,extraction.postPatch,rigOperator,status,roi_major,roi_minor,container
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-07 10:11:39 -0800,Mouse,2000.0,nucleus_present,ramr,SUCCESS,MOp,layer 2/3,PAS4_200107_451_A01
2020-01-07 15:19:20 -0800,Mouse,2000.0,nucleus_present,ramr,SUCCESS,RSPd,layer 5,PAS4_200107_452_A01
2020-01-10 13:15:48 -0800,Mouse,500.0,nucleus_present,ramr,SUCCESS,MOp,layer 5,PAS4_200110_451_A01
2020-01-10 13:15:48 -0800,Mouse,2.5,nucleus_present,ramr,SUCCESS,SSp,layer 5,PAS4_200110_454_A01
2020-01-10 13:15:48 -0800,Mouse,6.0,nucleus_present,ramr,SUCCESS,RSPd,layer 5,PAS4_200110_452_A01
...,...,...,...,...,...,...,...,...
2020-02-04 14:13:07 -0800,Human,1900.0,nucleus_present,ramr,SUCCESS,TCx,layer 1,PAS4_200204_457_A01
2020-02-04 14:13:07 -0800,Human,110.0,nucleus_present,ramr,SUCCESS,TCx,layer 4,PAS4_200204_459_A01
2020-02-04 14:13:07 -0800,Human,2000.0,nucleus_present,ramr,SUCCESS,TCx,layer 1,PAS4_200204_455_A01
2020-02-04 14:13:07 -0800,Human,1400.0,nucleus_present,ramr,SUCCESS,TCx,layer 1,PAS4_200204_454_A01


In [12]:
mjem_2020["new_region"] = "default_value"
logger.info("Created a new column(new_region) with default_value")

c_region = ["RSPd", "RSPv", "SSp"] 
s_region = ["MOs", "MOp", "ORB", "CTXsp"]
o_region = ["TCx", "HY"]
v_region = ["VISp"]

mjem_2020["new_region"][mjem_2020.roi_major.str.contains("|".join(c_region))] = "coronal_region"
mjem_2020["new_region"][mjem_2020.roi_major.str.contains("|".join(s_region))] = "sagittal_region"
mjem_2020["new_region"][mjem_2020.roi_major.str.contains("|".join(o_region))] = "other_region"
mjem_2020["new_region"][mjem_2020.roi_major.str.contains("|".join(v_region))] = "v1_region"
logger.info("Filled in new_region with corresponding region")

cor = mjem_2020[mjem_2020["new_region"] == "coronal_region"] 
sag = mjem_2020[mjem_2020["new_region"] == "sagittal_region"]
other = mjem_2020[mjem_2020["new_region"] == "other_region"]
v1 = mjem_2020[mjem_2020["new_region"] == "v1_region"]
logger.info("Created region pandas dataframes")

logger.info("Coronal Region count: %s", cor.organism_name.count())
logger.info("Sagittal Region count: %s", sag.organism_name.count())
logger.info("Other Region count: %s", other.organism_name.count())
logger.info("V1 Region count: %s", v1.organism_name.count())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
INFO:__main__:Created a new column(new_region) with default_value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/u

In [13]:
mjem_2020

Unnamed: 0_level_0,organism_name,extraction.endPipetteR,extraction.postPatch,rigOperator,status,roi_major,roi_minor,container,new_region
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-07 10:11:39 -0800,Mouse,2000.0,nucleus_present,ramr,SUCCESS,MOp,layer 2/3,PAS4_200107_451_A01,sagittal_region
2020-01-07 15:19:20 -0800,Mouse,2000.0,nucleus_present,ramr,SUCCESS,RSPd,layer 5,PAS4_200107_452_A01,coronal_region
2020-01-10 13:15:48 -0800,Mouse,500.0,nucleus_present,ramr,SUCCESS,MOp,layer 5,PAS4_200110_451_A01,sagittal_region
2020-01-10 13:15:48 -0800,Mouse,2.5,nucleus_present,ramr,SUCCESS,SSp,layer 5,PAS4_200110_454_A01,coronal_region
2020-01-10 13:15:48 -0800,Mouse,6.0,nucleus_present,ramr,SUCCESS,RSPd,layer 5,PAS4_200110_452_A01,coronal_region
2020-01-10 13:15:48 -0800,Mouse,2000.0,nucleus_present,ramr,SUCCESS,SSp,layer 2/3,PAS4_200110_453_A01,coronal_region
2020-01-10 15:29:02 -0800,Mouse,2900.0,nucleus_present,ramr,SUCCESS,RSPv,layer 2/3,PAS4_200110_455_A01,coronal_region
2020-01-10 15:29:02 -0800,Mouse,2300.0,nucleus_present,ramr,SUCCESS,RSPv,layer 2/3,PAS4_200110_456_A01,coronal_region
2020-01-13 11:14:15 -0800,Mouse,6.1,nucleus_present,ramr,SUCCESS,ORB,layer 5,PAS4_200113_451_A01,sagittal_region
2020-01-13 11:14:15 -0800,Mouse,8.7,nucleus_present,ramr,SUCCESS,ORB,layer 5,PAS4_200113_452_A01,sagittal_region


##### File Paths

In [14]:
csv_path = "C:/Users/ramr/Documents/Github/analysis_projects/csv/"
excel_path = "C:/Users/ramr/Documents/Github/analysis_projects/excel/"

##### Writing to csv_file

In [15]:
shiny.to_csv(csv_path + "shiny.csv")

##### Writing to excel_file

In [16]:
writer = pd.ExcelWriter(excel_path + "personal_tracker.xlsx")
mjem_2020.to_excel(writer, "mouse_2020", freeze_panes=(1,0))
hjem_2020.to_excel(writer, "human_2020", freeze_panes=(1,0))
cor.to_excel(writer, "coronal_region", freeze_panes=(1,0))
sag.to_excel(writer, "sagittal_region", freeze_panes=(1,0))
v1.to_excel(writer, "v1_region", freeze_panes=(1,0))
writer.save()