In [5]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Directory containing the mouse location files
directory = '../data/processed/'

# Get a list of all the mouse location files
mouse_files = [file for file in os.listdir(directory) if file.startswith('mouse_') and file.endswith('_locations.csv')]

# Iterate over each mouse file
for mouse_file in mouse_files:
    # Import the data
    df = pd.read_csv(os.path.join(directory, mouse_file))
    
    # Extract the mouse identifier from the filename
    mouse_id = mouse_file.split('_')[1]
    
    # Drop rows containing 0 in the unit number column
    df = df[df['Unit number'] != 0]
    
    # Convert 'Time' column to datetime if not already
    df['Time'] = pd.to_datetime(df['Time'])
    
    # Create a new column 'Time_Diff' to store the time differences
    df['Time_Diff'] = df['Time'].diff()
    
    # Create a new column 'Hour' to store the hour of each timestamp (1-24)
    df['Hour'] = df['Time'].dt.hour + 1
    
    # Define the cage labels
    cage_labels = ['red_cage', 'orange_cage', 'yellow_cage', 'green_cage', 'blue_cage']
    
    # Initialize an empty list to store the rows
    rows = []
    
    # Iterate over each hour from 1 to 24
    for hour in range(1, 25):
        row = {'Hour': hour}
        
        # Iterate over each cage label
        for cage in cage_labels:
            # Filter rows for the current cage and hour
            cage_df = df[(df['Location'] == cage) & (df['Hour'] == hour)].copy()
            
            # Calculate the time spent in the cage in minutes
            time_spent = cage_df['Time_Diff'].dt.total_seconds().sum() / 60
            
            # Add the time spent to the row dictionary
            row[cage] = time_spent
        
        # Append the row to the list of rows
        rows.append(row)
    
    # Create the result DataFrame from the list of rows
    result_df = pd.DataFrame(rows)
    
    # Export the result DataFrame to a CSV file
    output_directory = '../data/intermediate/'
    output_file = f'mouse_{mouse_id}_cage_time.csv'
    result_df.to_csv(os.path.join(output_directory, output_file), index=False)

In [8]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Directory containing the mouse location files
directory = '../data/processed/'

# Create the output directory if it doesn't exist
output_directory = '../data/intermediate/'
os.makedirs(output_directory, exist_ok=True)

# Get a list of all the subdirectories in the processed directory
subdirectories = [d for d in os.listdir(directory) if os.path.isdir(os.path.join(directory, d))]

# Iterate over each subdirectory
for subdirectory in subdirectories:
    # Get a list of all the mouse location files in the subdirectory
    mouse_files = [file for file in os.listdir(os.path.join(directory, subdirectory)) if file.startswith('mouse_') and file.endswith('_locations.csv')]
    
    # Iterate over each mouse file
    for mouse_file in mouse_files:
        # Import the data
        df = pd.read_csv(os.path.join(directory, subdirectory, mouse_file))
        
        # Extract the mouse identifier from the filename
        mouse_id = mouse_file.split('_')[1]
        
        # Drop rows containing 0 in the unit number column
        df = df[df['Unit number'] != 0]
        
        # Convert 'Time' column to datetime if not already
        df['Time'] = pd.to_datetime(df['Time'])
        
        # Create a new column 'Time_Diff' to store the time differences
        df['Time_Diff'] = df['Time'].diff()
        
        # Create a new column 'Hour' to store the hour of each timestamp (1-24)
        df['Hour'] = df['Time'].dt.hour + 1
        
        # Define the cage labels
        cage_labels = ['red_cage', 'orange_cage', 'yellow_cage', 'green_cage', 'blue_cage']
        
        # Initialize an empty list to store the rows
        rows = []
        
        # Iterate over each hour from 1 to 24
        for hour in range(1, 25):
            row = {'Hour': hour}
            
            # Iterate over each cage label
            for cage in cage_labels:
                # Filter rows for the current cage and hour
                cage_df = df[(df['Location'] == cage) & (df['Hour'] == hour)].copy()
                
                # Calculate the time spent in the cage in minutes
                time_spent = cage_df['Time_Diff'].dt.total_seconds().sum() / 60
                
                # Add the time spent to the row dictionary
                row[cage] = time_spent
            
            # Append the row to the list of rows
            rows.append(row)
        
        # Create the result DataFrame from the list of rows
        result_df = pd.DataFrame(rows)
        
        # Export the result DataFrame to a CSV file
        output_subdirectory = os.path.join(output_directory, subdirectory)
        os.makedirs(output_subdirectory, exist_ok=True)  # Create the subdirectory if it doesn't exist
        output_file = f'mouse_{mouse_id}_cage_time.csv'
        result_df.to_csv(os.path.join(output_subdirectory, output_file), index=False)

# time diff

In [20]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Directory containing the mouse location files
directory = '../data/processed/'

# Import the data
df = pd.read_csv('../data/processed/20240301/mouse_0007A01F42_locations.csv')

# Extract the mouse identifier from the filename
mouse_id = mouse_file.split('_')[1]

# Drop rows containing 0 in the unit number column
df = df[df['Unit number'] != 0]

# Convert 'Time' column to datetime if not already
df['Time'] = pd.to_datetime(df['Time'])

# Create a new column 'Time_Diff' to store the time differences
df['Time_Diff'] = df['Time'].diff()

# Create a new column 'Hour' to store the hour of each timestamp (1-24)
df['Hour'] = df['Time'].dt.hour

# Define the cage labels
cage_labels = ['red_cage', 'orange_cage', 'yellow_cage', 'green_cage', 'blue_cage']

# Initialize an empty list to store the rows
rows = []


In [4]:
import os
import pandas as pd

# Directory containing the mouse location files
directory = '../data/processed/'

# Create the output directory if it doesn't exist
output_directory = '../data/intermediate/'
os.makedirs(output_directory, exist_ok=True)

# Get a list of all the subdirectories in the processed directory
subdirectories = [d for d in os.listdir(directory) if os.path.isdir(os.path.join(directory, d))]

# Define the cage labels
cage_labels = ['red_cage', 'orange_cage', 'yellow_cage', 'green_cage', 'blue_cage']

# Iterate over each subdirectory
for subdirectory in subdirectories:
    # Get a list of all the mouse location files in the subdirectory
    mouse_files = [file for file in os.listdir(os.path.join(directory, subdirectory)) if file.startswith('mouse_') and file.endswith('_locations.csv')]

    # Iterate over each mouse file
    for mouse_file in mouse_files:
        # Import the data
        df = pd.read_csv(os.path.join(directory, subdirectory, mouse_file))

        # Extract the mouse identifier from the filename
        mouse_id = mouse_file.split('_')[1]

        # Drop rows containing 0 in the unit number column and rows where location is "UNKNOWN"
        df = df[(df['Unit number'] != 0) & (df['Location'] != 'UNKNOWN')]

        # Filter the data for the desired cages only
        df = df[df['Location'].isin(cage_labels)]

        # Convert 'Time' column to datetime if not already
        df['Time'] = pd.to_datetime(df['Time'])

        # Sort the DataFrame by 'Time' in ascending order
        df = df.sort_values('Time')

        # Create a new column 'Hour' to store the hour of each timestamp (0-23)
        df['Hour'] = df['Time'].dt.hour

        # Initialize an empty list to store the rows
        rows = []

        # Iterate over each hour from 0 to 23
        for hour in range(24):
            row = {'Hour': hour}

            # Filter rows for the current hour
            hour_df = df[df['Hour'] == hour].copy()

            # Get the start and end time of the current hour
            start_time = hour_df['Time'].min()
            end_time = start_time + pd.Timedelta(hours=1)

            # Initialize variables to track the time spent in each cage
            cage_start_time = start_time
            current_cage = None

            # Iterate over each row in the hour DataFrame
            for _, row_data in hour_df.iterrows():
                # Get the current cage and timestamp
                cage = row_data['Location']
                timestamp = row_data['Time']

                # Check if the cage has changed
                if cage != current_cage:
                    # If there was a previous cage, calculate the time spent in it
                    if current_cage is not None:
                        time_spent = (timestamp - cage_start_time).total_seconds() / 60
                        row[current_cage] = time_spent

                    # Update the current cage and start time
                    current_cage = cage
                    cage_start_time = timestamp

            # Calculate the time spent in the last cage
            if current_cage is not None:
                time_spent = (end_time - cage_start_time).total_seconds() / 60
                row[current_cage] = time_spent

            # Fill in 0 for cages with no time spent
            for cage in cage_labels:
                if cage not in row:
                    row[cage] = 0

            # Append the row to the list of rows
            rows.append(row)

        # Create the result DataFrame from the list of rows
        result_df = pd.DataFrame(rows)

        # Export the result DataFrame to a CSV file
        output_subdirectory = os.path.join(output_directory, subdirectory)
        os.makedirs(output_subdirectory, exist_ok=True)  # Create the subdirectory if it doesn't exist
        output_file = f'mouse_{mouse_id}_cage_time.csv'
        result_df.to_csv(os.path.join(output_subdirectory, output_file), index=False)