In [1]:
# import packages
import pandas as pd

# read in data
df = pd.read_csv(
    "/Users/robbiei2/Library/CloudStorage/Box-Box/Robbie's Shared Folder/Projects/Seizure Pattern/Data Tables/animal_eeg_main_record.csv"
)
df.head()

Unnamed: 0,eeg_id,cage_number,ear_tag,sex,year,start_date,end_date,computer,channel,injection_date,injection,cycle_length,cycle_group,diestrus_dates,estrus_dates,proestrus_dates,unlabeled_dates
0,221.0,1079.21,N,female,2019,1/30/19,2/17/19,1,6,12/6/18,KA_left,11.0,long,"2/11, 2/7, 2/2, 2/15","2/4, 2/14","2/14, 2/3","2/5, 2/6, 2/8"
1,225.0,1079.12,2L,female,2019,1/30/19,2/17/19,1,7,12/5/18,KA_left,9.0,long,"2/1, 2/2, 2/16, 2/13","2/6, 2/7","2/4, 2/14, 2/5",2/3
2,226.0,1080.21,N,female,2019,1/30/19,2/17/19,1,5,12/6/18,KA_right,16.0,long,"2/6, 2/7, 2/14, 2/1","2/5, 2/6",2/21,"2/2, 2/3, 2/4"
3,227.0,1080.11,L,female,2019,1/30/19,2/25/19,2,1,12/5/18,KA_right,10.0,long,"2/15, 2/10, 2/6, 2/2",2/4,"2/8, 2/11","2/5, 2/7, 2/9"
4,230.0,1080.12,2L,female,2019,1/30/19,2/17/19,1,8,12/6/18,KA_left,6.0,regular,"2/11, 2/7","2/6, 2/15, 2/1","2/14, 2/20","2/2, 2/3, 2/4, 2/5"


In [8]:
# melt data
df_melt = df[
    [
        "cage_number",
        "ear_tag",
        "sex",
        "year",
        "computer",
        "channel",
        "injection",
        "cycle_group",
        "diestrus_dates",
        "estrus_dates",
        "proestrus_dates",
        "unlabeled_dates",
    ]
]
df_melt = pd.melt(
    df_melt,
    id_vars=[
        "cage_number",
        "ear_tag",
        "sex",
        "year",
        "computer",
        "channel",
        "injection",
        "cycle_group",
    ],
    value_vars=["diestrus_dates", "estrus_dates", "proestrus_dates", "unlabeled_dates"],
    var_name="cycle_stage",
    value_name="dates",
)
df_melt.head()

Unnamed: 0,cage_number,ear_tag,sex,year,computer,channel,injection,cycle_group,cycle_stage,dates
0,1079.21,N,female,2019,1,6,KA_left,long,diestrus_dates,"2/11, 2/7, 2/2, 2/15"
1,1079.12,2L,female,2019,1,7,KA_left,long,diestrus_dates,"2/1, 2/2, 2/16, 2/13"
2,1080.21,N,female,2019,1,5,KA_right,long,diestrus_dates,"2/6, 2/7, 2/14, 2/1"
3,1080.11,L,female,2019,2,1,KA_right,long,diestrus_dates,"2/15, 2/10, 2/6, 2/2"
4,1080.12,2L,female,2019,1,8,KA_left,regular,diestrus_dates,"2/11, 2/7"


In [13]:
# reformat data to get dates into their own rows in MMDDYYYY format
df_melt["dates_split"] = df_melt["dates"].str.split(",")  # split dates into list
df_melt = df_melt.explode("dates_split")  # make list of dates into its own column
df_melt["dates_split"] = df_melt["dates_split"].str.strip()  # remove whitespace
df_melt[["month", "day"]] = df_melt["dates_split"].str.split("/", expand=True)[[0, 1]]

# split the dates_split vector into two separate vectors for M and D
df_melt["month"] = df_melt["month"].str.zfill(2)  # pad M with a zero
df_melt["day"] = df_melt["day"].str.zfill(2)  # pad D with a zero
df_melt["date_MMDDYYYY"] = (
    df_melt["month"].astype(str)
    + df_melt["day"].astype(str)
    + df_melt["year"].astype(str)
)

# drop duplicate rows and clarify meaning behind estrous cycle data
df_cleaned = df_melt.drop_duplicates()
df_cleaned = df_cleaned.drop(columns=["dates", "dates_split"])

df_cleaned.loc[
    df_cleaned["cycle_stage"].str.contains("diestrus"), "cycle_stage"
] = "diestrus"
df_cleaned.loc[
    df_cleaned["cycle_stage"].str.contains("estrus"), "cycle_stage"
] = "estrus"
df_cleaned.loc[
    df_cleaned["cycle_stage"].str.contains("proestrus"), "cycle_stage"
] = "proestrus"
df_cleaned.loc[
    df_cleaned["cycle_stage"].str.contains("unlabeled"), "cycle_stage"
] = "unlabeled"

df_cleaned.head()

Unnamed: 0,cage_number,ear_tag,sex,year,computer,channel,injection,cycle_group,cycle_stage,month,day,date_MMDDYYYY
0,1079.21,N,female,2019,1,6,KA_left,long,estrus,2,11,2112019
0,1079.21,N,female,2019,1,6,KA_left,long,estrus,2,7,2072019
0,1079.21,N,female,2019,1,6,KA_left,long,estrus,2,2,2022019
0,1079.21,N,female,2019,1,6,KA_left,long,estrus,2,15,2152019
1,1079.12,2L,female,2019,1,7,KA_left,long,estrus,2,1,2012019


In [None]:
""" 
next steps:
1. reorder columns 
2. combine cage_number and ear_tag into mouse_id and remove original columns
3. generate filepath
4. automate retrieval of files
"""