In [1]:
import os
import glob
import pandas as pd
from pathlib import Path
import argparse

class VideoManager:
    def __init__(self, base_dir="/nas/project_data/B1_Behavior/rush/mb_inhouse/mb_imi_zolti_/mb"):
        self.base_dir = Path(base_dir)
        print(f"Base directory: {base_dir}")

    def list_camera_views(self):
        """List all camera view directories (cam_side_l, cam_side_r, cam_top)"""
        return [d for d in self.base_dir.glob("cam_*") if d.is_dir()]
    
    def list_folders(self, camera_dir):
        """List all folders (calibration, imi, zolti) for a given camera view"""
        return [d for d in camera_dir.glob("*") if d.is_dir()]
    
    def list_persons(self, camera_dir):
        """List only person folders (imi, zolti) for a given camera view, excluding calibration"""
        return [d for d in camera_dir.glob("*") if d.is_dir() and d.name != "calibration"]
        
    def list_sessions(self, person_dir):
        """List all session directories for a given person"""
        return [d for d in person_dir.glob("session*") if d.is_dir()]
    
    def list_videos(self, session_dir):
        """List all video files in a session directory"""
        video_extensions = ["*.mp4", "*.avi", "*.mov"]
        videos = []
        for ext in video_extensions:
            videos.extend(session_dir.glob(ext))
        
        return sorted(videos) #
    
    def create_inventory(self):
        """Create a comprehensive inventory of all videos"""
        records = []
        
        for camera_dir in self.list_camera_views():
            camera_view = camera_dir.name
            
            for person_dir in self.list_persons(camera_dir):
                person = person_dir.name
                
                for session_dir in self.list_sessions(person_dir):
                    session = session_dir.name
                    
                    # Sort videos within this session directory
                    videos = sorted(self.list_videos(session_dir))
                    
                    # Assign index-based sync identifiers based on sort order
                    for index, video_file in enumerate(videos):
                        file_size_mb = video_file.stat().st_size / (1024 * 1024)
                        
                        records.append({
                            "camera_view": camera_view,
                            "person": person,
                            "session": session,
                            "filename": video_file.name,
                            "path": str(video_file),
                            "size_mb": round(file_size_mb, 2),
                            "position_index": index
                        })
            # Handle calibration folder separately if it exists
            calibration_dir = camera_dir / "calibration"
            if calibration_dir.exists() and calibration_dir.is_dir():
                # Sort calibration videos
                videos = sorted(self.list_videos(calibration_dir))
                
                for index, video_file in enumerate(videos):
                    file_size_mb = video_file.stat().st_size / (1024 * 1024)
                    
                    # Use a combination of 'calibration' and index as the sync group
                    sync_group = f"calibration_{index}"
                    
                    records.append({
                        "camera_view": camera_view,
                        "person": "calibration",
                        "session": "",  # No session for calibration
                        "filename": video_file.name,
                        "path": str(video_file),
                        "size_mb": round(file_size_mb, 2),
                        "position_index": index
                    })
        
        return pd.DataFrame(records)
    
   

In [2]:

base_dir= "/nas/project_data/B1_Behavior/rush/mb_inhouse/mb_imi_zolti_/mb"
inventory= True 
output = "video_inventory.csv",      
manager = VideoManager(base_dir)


Base directory: /nas/project_data/B1_Behavior/rush/mb_inhouse/mb_imi_zolti_/mb


In [3]:
df = manager.create_inventory()

In [4]:
df.columns

Index(['camera_view', 'person', 'session', 'filename', 'path', 'size_mb',
       'position_index'],
      dtype='object')

In [5]:
result = df.pivot(
    index=['person', 'session', 'position_index'],
    columns='camera_view',
    values=['filename', 'path', 'size_mb']
)


In [6]:
# Flatten the multi-level column names
result.columns = ['_'.join(col).strip() for col in result.columns.values]
result = result.reset_index()

In [7]:
result = pd.pivot_table(
    df,
    index=['person', 'session', 'position_index'],
    columns='camera_view',
    values=['filename', 'path', 'size_mb'],
    aggfunc={'path': 'first', 'size_mb': 'mean'}
)

In [8]:
result.to_csv('test.csv')

In [8]:
result.columns = ['_'.join(col).strip() for col in result.columns.values]

In [9]:
result = result.reset_index()


In [10]:
result.columns

Index(['person', 'session', 'position_index', 'path_cam_side_l',
       'path_cam_side_r', 'path_cam_top', 'size_mb_cam_side_l',
       'size_mb_cam_side_r', 'size_mb_cam_top'],
      dtype='object')

In [14]:
import pandas as pd
import os
import shutil

def organize_videos_by_session(df, base_output_dir):
    """
    Organizes video files into folders based on person_session identifiers
    
    Parameters:
    df (DataFrame): DataFrame containing 'person_session' and path columns for different camera views
    base_output_dir (str): Base directory where folders will be created
    """
    # Make sure base output directory exists
    if not os.path.exists(base_output_dir):
        os.makedirs(base_output_dir)
    
    # Get all path columns (those containing 'path_')
    path_columns = [col for col in df.columns if 'path_' in col]
    
    # Create mapping for camera view names from column names
    # Example: 'path_cam_top' -> 'cam_top'
    camera_view_names = {col: col.replace('path_', '') for col in path_columns}
    
    # Process each row in the DataFrame
    for index, row in df.iterrows():
        # Get person_session value for this row
        session_id = row['person_session']
        
        # Create a directory for this session
        session_dir = os.path.join(base_output_dir, session_id)
        if not os.path.exists(session_dir):
            os.makedirs(session_dir)

        # Process each camera view for this session
        for path_col in path_columns:
            # Get the source path
            src_path = row[path_col]
            
            # Skip if path is missing
            if pd.isna(src_path) or not src_path:
                continue
                
            # Get camera view name for filename
            camera_view = camera_view_names[path_col]
            
            # Extract the original filename from the path
            original_filename = os.path.basename(src_path)
            
            # Create a destination path with camera view prefix
            dest_filename = f"{camera_view}_{original_filename}"
            dest_path = os.path.join(session_dir, dest_filename)
            
            # Copy the file (use shutil.move if you want to move instead of copy)
            try:
                print(f"Copying {src_path} to {dest_path}")
                shutil.copy2(src_path, dest_path)
            except Exception as e:
                print(f"Error processing {src_path}: {str(e)}")
    
    print(f"Video organization complete. Files organized in {base_output_dir}")

In [15]:
result

Unnamed: 0,person,session,position_index,path_cam_side_l,path_cam_side_r,path_cam_top,size_mb_cam_side_l,size_mb_cam_side_r,size_mb_cam_top
0,calibration,,0,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,118.61,118.64,118.63
1,calibration,,1,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,128.83,128.91,128.9
2,imi,session1,0,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,130.1,130.08,110.65
3,imi,session1,1,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,75.09,75.09,63.83
4,imi,session1,2,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,130.19,130.23,110.7
5,imi,session1,3,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,132.2,132.22,112.43
6,imi,session1,4,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,134.06,134.09,114.07
7,imi,session1,5,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,132.99,133.03,113.21
8,imi,session1,6,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,131.8,131.82,112.09
9,imi,session1,7,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,/nas/project_data/B1_Behavior/rush/mb_inhouse/...,136.05,136.05,115.67


In [20]:
# Get all path columns (those containing 'path_')
path_columns = [col for col in result.columns if 'path_' in col]

In [21]:
camera_view_names = {col: col.replace('path_', '') for col in path_columns}


In [22]:
path_columns

['path_cam_side_l', 'path_cam_side_r', 'path_cam_top']

In [23]:
result.columns

Index(['person', 'session', 'position_index', 'path_cam_side_l',
       'path_cam_side_r', 'path_cam_top', 'size_mb_cam_side_l',
       'size_mb_cam_side_r', 'size_mb_cam_top'],
      dtype='object')

In [32]:
import shutil
output_path = '/nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data'
# Process each row in the DataFrame
index = 0 
for index, row in result.iterrows():
    if index < 19: continue
    # Get person_session value for this row
    session_id = row['person'] + '_' + row['session'] + '_' + str(index)
    # Create a directory for this session
    session_dir = os.path.join(output_path, session_id)
    if not os.path.exists(session_dir):
        os.makedirs(session_dir)
    
    # Process each camera view for this session
    for path_col in path_columns:
        src_path = row[path_col]

         # Get camera view name for filename
        camera_view = camera_view_names[path_col]
        
        # Extract the original filename from the path
        original_filename = os.path.basename(src_path)
        
        # Create a destination path with camera view prefix
        dest_filename = f"{camera_view}_{original_filename}"

        dest_path = os.path.join(session_dir, dest_filename)

        # Copy the file (use shutil.move if you want to move instead of copy)
        try:
            print(f"Copying {src_path} to {dest_path}")
            shutil.copy2(src_path, dest_path)
        except Exception as e:
            print(f"Error processing {src_path}: {str(e)}")

Copying /nas/project_data/B1_Behavior/rush/mb_inhouse/mb_imi_zolti_/mb/cam_side_l/zolti/session1/Basler_a2A1920-160ucBAS__40399468__20250219_174949435.mp4 to /nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/zolti_session1_19/cam_side_l_Basler_a2A1920-160ucBAS__40399468__20250219_174949435.mp4
Copying /nas/project_data/B1_Behavior/rush/mb_inhouse/mb_imi_zolti_/mb/cam_side_r/zolti/session1/Basler_a2A1920-160ucBAS__40399460__20250219_174948489.mp4 to /nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/zolti_session1_19/cam_side_r_Basler_a2A1920-160ucBAS__40399460__20250219_174948489.mp4
Copying /nas/project_data/B1_Behavior/rush/mb_inhouse/mb_imi_zolti_/mb/cam_top/zolti/session1/Basler_a2A1920-160ucBAS__40401031__20250219_174950347.mp4 to /nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/zolti_session1_19/cam_top_Basler_a2A1920-160ucBAS__40401031__20250219_174950347.mp4
Copying /nas/project_data/B1_Behavior/rush/mb_inhouse/m

In [50]:
import os
import subprocess

def extract_frames_subprocess(video_path, output_dir, frame_rate=None, quality=1):
    """
    Extract frames from a video using subprocess to run ffmpeg command.
    
    Args:
        video_path (str): Path to the video file
        output_dir (str): Directory to save extracted frames
        frame_rate (float, optional): Extract frames at this rate (fps)
                                     If None, extract all frames
        quality (int, optional): JPEG quality (1-31, lower is better quality)
                                Default is 1 for highest quality
    
    Returns:
        bool: True if extraction was successful, False otherwise
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Use the specific ffmpeg path
    ffmpeg_path = '/home/kaan/bin/ffmpeg'
    
    # Build the command as a list (safer than shell=True)
    output_pattern = os.path.join(output_dir, "frame_%04d.jpg")
    cmd = [ffmpeg_path, '-i', video_path]
    
    if frame_rate is not None:
        cmd.extend(['-r', str(frame_rate)])
    
    cmd.extend(['-q:v', str(quality), '-qmin', str(quality), '-qmax', str(quality)])
    cmd.append(output_pattern)
    cmd.append('-y')
    print(cmd)
    
    try:
        # Execute the command using subprocess with args as list
        result = subprocess.run(
            cmd,
            check=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )

        # Display standard output
        print("Standard Output:")
        print(result.stdout)
        
        # Display standard error
        print("Standard Error:")
        print(result.stderr)
        print(f"Command executed successfully")
        return True
    except subprocess.CalledProcessError as e:
        print(f"Error executing command")
        print(f"Error details: {e}")
        print(f"STDERR: {e.stderr}")
        return False

In [47]:
!source ~/.bashrc

In [19]:
import shutil
output_path = '/nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data'
# Process each row in the DataFrame
index = 0 
for index, row in result.iterrows():
    # Get person_session value for this row
    if row['person'] == 'calibration': continue
    session_id = row['person'] + '_' + row['session'] + '_' +str(index)
    # Create a directory for this session
    session_dir = os.path.join(output_path, session_id)
    if not os.path.exists(session_dir):
        os.makedirs(session_dir)
    
    # Process each camera view for this session
    for path_col in path_columns:
        src_path = row[path_col]
        
         # Get camera view name for filename
        camera_view = camera_view_names[path_col]
        # Extract the original filename from the path
        original_filename = os.path.basename(src_path)
        # Create a destination path with camera view prefix
        dest_filename = f"{camera_view}_{original_filename}"

        video_path = os.path.join(session_dir, dest_filename)
        output_path = os.path.join(session_dir, camera_view)
        
        if not os.path.exists(output_path):
            os.makedirs(output_path)

        #print(os.path.exists(video_path))
        print(video_path)
        #print(extract_frames_subprocess(video_path, output_path))


/nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/imi_session1_2/cam_side_l_Basler_a2A1920-160ucBAS__40399468__20250219_170320088.mp4
/nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/imi_session1_2/cam_side_r_Basler_a2A1920-160ucBAS__40399460__20250219_170316318.mp4
/nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/imi_session1_2/cam_top_Basler_a2A1920-160ucBAS__40401031__20250219_170326473.mp4
/nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/imi_session1_2/cam_top/imi_session1_3/cam_side_l_Basler_a2A1920-160ucBAS__40399468__20250219_170702025.mp4
/nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/imi_session1_2/cam_top/imi_session1_3/cam_side_r_Basler_a2A1920-160ucBAS__40399460__20250219_170658686.mp4
/nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/imi_session1_2/cam_top/imi_session1_3/cam_top_Basler_a2A1920-160ucBAS__40401031__20250219_170709473.mp4
/

In [31]:
base_output_path = '/nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data'
# Process each row in the DataFrame
index = 0 
for index, row in result.iterrows():
    if index < 19: continue
    # Get person_session value for this row
    if row['person'] == 'calibration': continue
    session_id = row['person'] + '_' + row['session'] + '_' +str(index)
    # Create a directory for this session
    session_dir = os.path.join(base_output_path, session_id)
    if not os.path.exists(session_dir):
        os.makedirs(session_dir)
    
    # Process each camera view for this session
    for path_col in path_columns:
        src_path = row[path_col]
        
        # Get camera view name for filename
        camera_view = camera_view_names[path_col]
        # Extract the original filename from the path
        original_filename = os.path.basename(src_path)
        # Create a destination path with camera view prefix
        dest_filename = f"{camera_view}_{original_filename}"
        video_path = os.path.join(session_dir, dest_filename)
        frames_output_path = os.path.join(session_dir, camera_view)
        
        if not os.path.exists(frames_output_path):
            os.makedirs(frames_output_path)
        
        # Check if video file exists at the expected path
        if os.path.exists(video_path):
            print(f"File exists: {video_path}")
            print(extract_frames_subprocess(video_path, frames_output_path))
        else:
            print(f"File missing: {video_path}")
            print(f"Original source path: {src_path}")

        

File exists: /nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/zolti_session1_19/cam_side_l_Basler_a2A1920-160ucBAS__40399468__20250219_174949435.mp4
Command executed successfully
True
File exists: /nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/zolti_session1_19/cam_side_r_Basler_a2A1920-160ucBAS__40399460__20250219_174948489.mp4
Command executed successfully
True
File exists: /nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/zolti_session1_19/cam_top_Basler_a2A1920-160ucBAS__40401031__20250219_174950347.mp4
Command executed successfully
True
File missing: /nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/zolti_session1_20/cam_side_l_Basler_a2A1920-160ucBAS__40399468__20250219_175132281.mp4
Original source path: /nas/project_data/B1_Behavior/rush/mb_inhouse/mb_imi_zolti_/mb/cam_side_l/zolti/session1/Basler_a2A1920-160ucBAS__40399468__20250219_175132281.mp4
File missing: /nas/project_data/B1_Behav

In [52]:
base_output_path = '/nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data'
# Process each row in the DataFrame
index = 0 
for index, row in result.iterrows():
    # Get person_session value for this row
    if row['person'] == 'calibration': continue
    session_id = row['person'] + '_' + row['session'] + '_' +str(index)
    # Create a directory for this session
    session_dir = os.path.join(base_output_path, session_id)
    if not os.path.exists(session_dir):
        os.makedirs(session_dir)
    
    # Process each camera view for this session
    for path_col in path_columns:
        src_path = row[path_col]
        
        # Get camera view name for filename
        camera_view = camera_view_names[path_col]
        # Extract the original filename from the path
        original_filename = os.path.basename(src_path)
        # Create a destination path with camera view prefix
        dest_filename = f"{camera_view}_{original_filename}"
        video_path = os.path.join(session_dir, dest_filename)
        frames_output_path = os.path.join(session_dir, camera_view)
        
        if not os.path.exists(frames_output_path):
            os.makedirs(frames_output_path)
        print(session_id,len(os.listdir(frames_output_path)))
        

imi_session1_2 1893
imi_session1_2 1893
imi_session1_2 1893
imi_session1_3 1092
imi_session1_3 1092
imi_session1_3 1092
imi_session1_4 1895
imi_session1_4 1895
imi_session1_4 1895
imi_session1_5 1923
imi_session1_5 1923
imi_session1_5 1923
imi_session1_6 1951
imi_session1_6 1951
imi_session1_6 1951
imi_session1_7 1934
imi_session1_7 1934
imi_session1_7 1934
imi_session1_8 1918
imi_session1_8 1918
imi_session1_8 1918
imi_session1_9 1980
imi_session1_9 1980
imi_session1_9 1980
imi_session2_10 1883
imi_session2_10 1883
imi_session2_10 1883
imi_session2_11 1926
imi_session2_11 1926
imi_session2_11 1926
imi_session2_12 1930
imi_session2_12 1930
imi_session2_12 1930
imi_session2_13 23
imi_session2_13 23
imi_session2_13 23
imi_session2_14 1926
imi_session2_14 1926
imi_session2_14 1926
imi_session2_15 1945
imi_session2_15 1945
imi_session2_15 1945
zolti_session1_16 1902
zolti_session1_16 1902
zolti_session1_16 1902
zolti_session1_17 1906
zolti_session1_17 1906
zolti_session1_17 1906
zolti_sess

In [33]:
## FIXING SOME PROBLEMATIC CASE 

In [34]:
base_output_path = '/nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data'

In [51]:
for session_name in os.listdir(base_output_path):
    index  = int(session_name.split('_')[-1])
    if index < 20: continue
    session_dir = os.path.join(base_output_path, session_name)
    for video_name in os.listdir(session_dir):
        if not video_name.endswith('.mp4'): continue
        video_path = os.path.join(session_dir, video_name)
        if 'top' in video_name:
            camera_view = '_'.join(video_name.split('_', 3)[0:2])
        else:
            camera_view = '_'.join(video_name.split('_', 3)[0:3])

        frame_output_path = os.path.join(session_dir,camera_view)

        if not os.path.exists(frame_output_path):
            os.makedirs(frame_output_path)
        extract_frames_subprocess(video_path, frames_output_path)
    

['/home/kaan/bin/ffmpeg', '-i', '/nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/zolti_session1_19/cam_side_l_Basler_a2A1920-160ucBAS__40399468__20250219_174949435.mp4', '-q:v', '1', '-qmin', '1', '-qmax', '1', '/nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/zolti_session2_25/cam_top/frame_%04d.jpg', '-y']
Command executed successfully
['/home/kaan/bin/ffmpeg', '-i', '/nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/zolti_session1_19/cam_side_r_Basler_a2A1920-160ucBAS__40399460__20250219_174948489.mp4', '-q:v', '1', '-qmin', '1', '-qmax', '1', '/nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/zolti_session2_25/cam_top/frame_%04d.jpg', '-y']
Command executed successfully
['/home/kaan/bin/ffmpeg', '-i', '/nas/project_data/B1_Behavior/rush/mb_inhouse/postprocess/nipg_hoi_data/zolti_session1_19/cam_top_Basler_a2A1920-160ucBAS__40401031__20250219_174950347.mp4', '-q:v', '1', '-qmin', '1', '-qmax',