In [None]:
import sys
sys.path.append('..')

import shutil
from pathlib import Path
import re

In [None]:
def parse_filename(filename):
    """
    Parse PIRATE calibration filename and extract relevant information.
    
    Parameters
    ----------
    filename : str
        Original PIRATE filename
        
    Returns
    -------
    dict or None
        Dictionary with parsed information or None if parsing fails
    """
    # Remove .fits extension
    name = filename.replace('.fits', '')
    
    # Split by underscores
    parts = name.split('_')
    
    # Determine if it's a flat or bias
    if 'flats' in name.lower():
        frame_type = 'flat'
        # Format: PIRATE_NUMBER_flats_FILTER_ID_YYYY_MM_DD_HH_MM_SS
        try:
            filter_band = parts[3]  # B, V, R, etc.
            frame_id = parts[4]
            date_parts = parts[5:8]  # YYYY, MM, DD
            date = '_'.join(date_parts)
            
            return {
                'type': frame_type,
                'filter': filter_band,
                'date': date,
                'id': frame_id,
                'original': filename
            }
        except IndexError:
            print(f"Could not parse flat file: {filename}")
            return None
            
    elif 'bias' in name.lower():
        frame_type = 'bias'
        # Format: PIRATE_NUMBER_Bias22_ID_YYYY_MM_DD_HH_MM_SS
        try:
            frame_id = parts[3]
            date_parts = parts[4:7]  # YYYY, MM, DD
            date = '_'.join(date_parts)
            
            return {
                'type': frame_type,
                'filter': None,  # Bias frames don't have filters
                'date': date,
                'id': frame_id,
                'original': filename
            }
        except IndexError:
            print(f"Could not parse bias file: {filename}")
            return None
    
    else:
        print(f"Unknown file type: {filename}")
        return None


def create_new_filename(info):
    """
    Create simplified filename from parsed information.
    
    Parameters
    ----------
    info : dict
        Parsed file information
        
    Returns
    -------
    str
        New simplified filename
    """
    if info['type'] == 'flat':
        # Format: YYYY_MM_DD_FILTER_flat_ID.fits
        new_name = f"{info['date']}_{info['filter']}_flat_{info['id'].zfill(2)}.fits"
    else:  # bias
        # Format: YYYY_MM_DD_bias_ID.fits
        new_name = f"{info['date']}_bias_{info['id'].zfill(2)}.fits"
    
    return new_name

## Test Parsing on Sample Files

In [None]:
# Test the parsing function
test_files = [
    'PIRATE_158371_flats_R_01_2025_09_01_19_54_25.fits',
    'PIRATE_161217_Bias22_0_2025_09_20_18_36_49.fits',
    'PIRATE_162424_flats_B_04_2025_09_25_19_17_25.fits',
    'PIRATE_163073_Bias22_1_2025_09_29_18_25_45.fits'
]

print("Testing filename parsing:")
print("=" * 70)
for filename in test_files:
    info = parse_filename(filename)
    if info:
        new_name = create_new_filename(info)
        print(f"Original: {filename}")
        print(f"New:      {new_name}")
        print(f"Type:     {info['type']}, Filter: {info['filter']}, Date: {info['date']}")
        print()

## Preview Reorganization (Dry Run)

In [None]:
# Set paths
data_dir = Path('../data')
calibration_dir = data_dir / 'calibration'

print(f"Calibration directory: {calibration_dir.absolute()}")
print()

# Find all week directories
week_dirs = sorted([d for d in calibration_dir.glob('week*') if d.is_dir()])

if not week_dirs:
    print(f"No week directories found in {calibration_dir}")
else:
    print(f"Found {len(week_dirs)} week directories:")
    for week_dir in week_dirs:
        fits_count = len(list(week_dir.glob('*.fits')))
        print(f"  {week_dir.name}: {fits_count} FITS files")
    print()
    
    # Preview first few files from each week
    print("Sample files (first 2 from each week):")
    print("=" * 70)
    for week_dir in week_dirs:
        print(f"\n{week_dir.name}:")
        fits_files = sorted(week_dir.glob('*.fits'))[:2]
        for fits_file in fits_files:
            info = parse_filename(fits_file.name)
            if info:
                new_name = create_new_filename(info)
                print(f"  {fits_file.name}")
                print(f"  -> {info['type']}/{new_name}")

In [None]:
def reorganize_calibration_files(calibration_dir, dry_run=True):
    """
    Reorganize calibration files from week folders into flats/ and bias/ folders.
    
    Parameters
    ----------
    calibration_dir : Path
        Path to calibration directory containing week1, week2, etc.
    dry_run : bool
        If True, only print what would be done without making changes
    """
    calibration_dir = Path(calibration_dir)
    
    # Create new directory structure
    flats_dir = calibration_dir / 'flats'
    bias_dir = calibration_dir / 'bias'
    
    if not dry_run:
        flats_dir.mkdir(exist_ok=True)
        bias_dir.mkdir(exist_ok=True)
        print(f"Created directories:")
        print(f"   {flats_dir}")
        print(f"   {bias_dir}")
    else:
        print(f"[DRY RUN] Would create directories:")
        print(f"   {flats_dir}")
        print(f"   {bias_dir}")
    
    print()
    
    # Find all week directories
    week_dirs = sorted([d for d in calibration_dir.glob('week*') if d.is_dir()])
    
    if not week_dirs:
        print(f"No week directories found in {calibration_dir}")
        return
    
    print(f"Found {len(week_dirs)} week directories: {[d.name for d in week_dirs]}\n")
    
    # Statistics
    stats = {
        'flats_moved': 0,
        'bias_moved': 0,
        'errors': 0,
        'duplicates': 0
    }
    
    # Track new filenames to detect duplicates
    new_filenames = {'flat': set(), 'bias': set()}
    
    # Process each week directory
    for week_dir in week_dirs:
        print(f"Processing {week_dir.name}...")
        
        # Find all FITS files
        fits_files = sorted(week_dir.glob('*.fits'))
        
        if not fits_files:
            print(f"  No FITS files found in {week_dir.name}")
            continue
        
        print(f"  Found {len(fits_files)} FITS files")
        
        for fits_file in fits_files:
            # Parse filename
            info = parse_filename(fits_file.name)
            
            if info is None:
                stats['errors'] += 1
                continue
            
            # Create new filename
            new_filename = create_new_filename(info)
            
            # Determine destination directory
            if info['type'] == 'flat':
                dest_dir = flats_dir
                file_type = 'flat'
            else:
                dest_dir = bias_dir
                file_type = 'bias'
            
            dest_path = dest_dir / new_filename
            
            # Check for duplicates
            if new_filename in new_filenames[file_type]:
                print(f" DUPLICATE: {new_filename}")
                # Add week number to filename to make it unique
                name_parts = new_filename.rsplit('.', 1)
                new_filename = f"{name_parts[0]}_{week_dir.name}.{name_parts[1]}"
                dest_path = dest_dir / new_filename
                stats['duplicates'] += 1
            
            new_filenames[file_type].add(new_filename)
            
            # Move or copy file
            if dry_run:
                if fits_files.index(fits_file) < 2:  # Only show first 2 per week
                    print(f"  [DRY RUN] {fits_file.name} -> {file_type}/{new_filename}")
            else:
                try:
                    shutil.copy2(fits_file, dest_path)
                    if info['type'] == 'flat':
                        stats['flats_moved'] += 1
                    else:
                        stats['bias_moved'] += 1
                except Exception as e:
                    print(f" Error copying {fits_file.name}: {e}")
                    stats['errors'] += 1
        
        print()
    
    # Print summary
    print("=" * 70)
    print("SUMMARY")
    print("=" * 70)
    if dry_run:
        print("[DRY RUN - No files were actually moved]")
    print(f"Flat fields: {stats['flats_moved']}")
    print(f"Bias frames: {stats['bias_moved']}")
    print(f"Duplicates handled: {stats['duplicates']}")
    print(f"Errors: {stats['errors']}")
    print()
    
    if not dry_run:
        print("✅ Reorganization complete!")
        print()
        print("Original week directories are preserved.")
        print("You can delete them manually if reorganization looks correct.")
    
    return stats

In [None]:
# Run in DRY RUN mode first to preview
print("DRY RUN - Preview of reorganization")
print("=" * 70)
stats = reorganize_calibration_files(calibration_dir, dry_run=True)

In [None]:
# UNCOMMENT AND RUN THIS CELL TO ACTUALLY REORGANIZE FILES
print("ACTUAL RUN - Reorganizing files")
print("=" * 70)
stats = reorganize_calibration_files(calibration_dir, dry_run=False)

## Verify New Structure

In [None]:
# Check the new directory structure
flats_dir = calibration_dir / 'flats'
bias_dir = calibration_dir / 'bias'

if flats_dir.exists():
    flats = sorted(flats_dir.glob('*.fits'))
    print(f"Flats directory: {len(flats)} files")
    print("Sample files:")
    for f in flats[:5]:
        print(f"  {f.name}")
    if len(flats) > 5:
        print(f"  ... and {len(flats) - 5} more")
else:
    print("Flats directory not created yet")

print()

if bias_dir.exists():
    bias = sorted(bias_dir.glob('*.fits'))
    print(f"Bias directory: {len(bias)} files")
    print("Sample files:")
    for f in bias[:5]:
        print(f"  {f.name}")
    if len(bias) > 5:
        print(f"  ... and {len(bias) - 5} more")
else:
    print("Bias directory not created yet")

In [1]:
import os
from pathlib import Path

# Set the directory containing your files
standards_dir = Path("data/raw/standard_stars")

# Loop through all files in the directory
for file in standards_dir.glob("*star2*.fits"):
    new_name = file.name.replace("OLS", "OSL")
    new_path = file.parent / new_name
    os.rename(file, new_path)
    print(f"Renamed: {file.name} -> {new_name}")

Renamed: PIRATE_164643_OLS_ROE_EXO1_SA110_506_star2_00_SA110_506_00_Filter_B_02_2025_10_06_20_14_11.fits -> PIRATE_164643_OSL_ROE_EXO1_SA110_506_star2_00_SA110_506_00_Filter_B_02_2025_10_06_20_14_11.fits
Renamed: PIRATE_164645_OLS_ROE_EXO1_SA110_506_star2_00_SA110_506_00_Filter_I_02_2025_10_06_20_16_17.fits -> PIRATE_164645_OSL_ROE_EXO1_SA110_506_star2_00_SA110_506_00_Filter_I_02_2025_10_06_20_16_17.fits
Renamed: PIRATE_164635_OLS_ROE_EXO1_SA110_506_star2_00_SA110_506_00_Filter_R_00_2025_10_06_20_07_50.fits -> PIRATE_164635_OSL_ROE_EXO1_SA110_506_star2_00_SA110_506_00_Filter_R_00_2025_10_06_20_07_50.fits
Renamed: PIRATE_164632_OLS_ROE_EXO1_SA110_506_star2_00_SA110_506_00_Filter_I_00_2025_10_06_20_06_45.fits -> PIRATE_164632_OSL_ROE_EXO1_SA110_506_star2_00_SA110_506_00_Filter_I_00_2025_10_06_20_06_45.fits
Renamed: PIRATE_164637_OLS_ROE_EXO1_SA110_506_star2_00_SA110_506_00_Filter_V_00_2025_10_06_20_08_37.fits -> PIRATE_164637_OSL_ROE_EXO1_SA110_506_star2_00_SA110_506_00_Filter_V_00_2025_