In [1]:
import pandas as pd
import os
import datetime

# Replace 'your_file.xlsx' with the path to your .xlsx file
df = pd.read_csv('snow_cam_details.csv').dropna(subset=['Camera'])

# Directory where the files are stored
data_store_path = '../data_store/data/'

# Helper function to convert date in dd/mm/yyyy format to the corresponding Julian day
def date_to_julian(date_str):
    date_obj = datetime.datetime.strptime(date_str, '%d/%m/%Y')
    year = date_obj.year
    day_of_year = date_obj.timetuple().tm_yday
    return year, day_of_year

# Dictionary to store the set of expected file prefixes for each camera
camera_files = {}

# Loop through each row in the DataFrame to gather all necessary file patterns
for index, row in df.iterrows():
    camera_name = row['Camera']
    date_str = row['Date']
    
    # Convert the date to year and Julian day
    year, julian_day = date_to_julian(date_str)
    
    # Construct the expected file prefixes based on year and Julian day
    expected_prefixes = [
        f'MOD10A1_{camera_name}.{year}{julian_day:03d}',
        f'MYD10A1_{camera_name}.{year}{julian_day:03d}',
        f'MOD10A1F_{camera_name}.{year}{julian_day:03d}',
        f'MYD10A1F_{camera_name}.{year}{julian_day:03d}'
    ]
    
    # Add the file prefixes to the dictionary
    if camera_name not in camera_files:
        camera_files[camera_name] = set()
    camera_files[camera_name].update(expected_prefixes)

# Loop through the dictionary to process each camera's directories
for camera_name, prefixes in camera_files.items():
    # Define the folder names to check
    folder_patterns = [
        f'MOD10A1_{camera_name}',
        f'MYD10A1_{camera_name}',
        f'MOD10A1F_{camera_name}',
        f'MYD10A1F_{camera_name}'
    ]
    
    for folder_name in folder_patterns:
        dir_path = os.path.join(data_store_path, folder_name)
        
        if os.path.exists(dir_path):
            # List all files in the directory
            files_in_dir = os.listdir(dir_path)
            
            # Loop through all files in the directory
            for file_name in files_in_dir:
                # Check if the file matches any of the expected prefixes and ends with '.hdf'
                if not any(file_name.startswith(prefix) for prefix in prefixes) or not file_name.endswith('.hdf'):
                    file_to_delete = os.path.join(dir_path, file_name)
                    print(f"Deleting file: {file_to_delete}")
                    os.remove(file_to_delete)
                else:
                    print(f"Keeping file: {os.path.join(dir_path, file_name)}")

print("Completed processing.")

Completed processing.


In [2]:
camera_files.items()

dict_items([('sodankyla_full', {'MOD10A1_sodankyla_full.2022006', 'MYD10A1_sodankyla_full.2023128', 'MOD10A1F_sodankyla_full.2020250', 'MOD10A1_sodankyla_full.2020117', 'MOD10A1_sodankyla_full.2019136', 'MYD10A1_sodankyla_full.2020250', 'MOD10A1F_sodankyla_full.2022228', 'MOD10A1F_sodankyla_full.2023227', 'MOD10A1F_sodankyla_full.2023107', 'MOD10A1F_sodankyla_full.2023128', 'MOD10A1F_sodankyla_full.2022348', 'MYD10A1_sodankyla_full.2020113', 'MOD10A1_sodankyla_full.2022261', 'MOD10A1F_sodankyla_full.2020287', 'MYD10A1_sodankyla_full.2022064', 'MOD10A1F_sodankyla_full.2021066', 'MYD10A1_sodankyla_full.2019165', 'MOD10A1_sodankyla_full.2019296', 'MYD10A1_sodankyla_full.2020192', 'MOD10A1_sodankyla_full.2019234', 'MYD10A1F_sodankyla_full.2021280', 'MYD10A1F_sodankyla_full.2023136', 'MYD10A1F_sodankyla_full.2023107', 'MOD10A1F_sodankyla_full.2019252', 'MOD10A1F_sodankyla_full.2023190', 'MYD10A1_sodankyla_full.2022308', 'MOD10A1_sodankyla_full.2023074', 'MYD10A1F_sodankyla_full.2023219', 'M