In [None]:
import pandas as pd
import os, sys 
import glob

In [None]:
df_colnames = ['timestamp', 'icao24', 'lat', 'lon', 'crs', 'callsign', 'alt']

In [None]:
!pip install tqdm

In [9]:
import multiprocessing as mp
from tqdm import tqdm

# Worker must be top-level for multiprocessing pickling
def _process_one_date_dir(args):
    path, date_dir, colnames = args
    date_files = glob.glob(os.path.join(path, date_dir, '*.csv'))
    # Filter out files starting with ._
    date_files = [f for f in date_files if not os.path.basename(f).startswith('._')]
    date_files.sort()

    catalogue_dfs = []
    for file in date_files:
        file_df = pd.read_csv(file, header=None, names=colnames)
        file_df['id'] = file_df['icao24'].str.upper() + file_df['callsign'].str.strip().str.upper()
        unique_ids = file_df['id'].unique()
        file_catalogue = pd.DataFrame({
            'id': unique_ids,
            'file_name': os.path.splitext(os.path.basename(file))[0],
            'folder_name': date_dir
        })
        catalogue_dfs.append(file_catalogue)

    if catalogue_dfs:
        catalogue_df = pd.concat(catalogue_dfs, ignore_index=True)
    else:
        catalogue_df = pd.DataFrame(columns=['id', 'file_name', 'folder_name'])

    catalogue_df.to_csv(os.path.join(path, date_dir, f'cat_{date_dir}.cat'), index=False)
    return date_dir


def write_id_catalogue_for_each_date(path: str):
    date_dirs = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
    date_dirs.sort()

    n_workers = max(1, mp.cpu_count() - 1)
    args_list = [(path, d, df_colnames) for d in date_dirs]

    # Use fork in notebooks to avoid spawn pickling issues on macOS
    ctx = mp.get_context('fork')
    with ctx.Pool(processes=n_workers) as pool:
        list(tqdm(
            pool.imap(_process_one_date_dir, args_list),
            total=len(date_dirs),
            desc='Date dirs'
        ))


write_id_catalogue_for_each_date('../summer24/raw')


Date dirs: 100%|██████████| 184/184 [25:47<00:00,  8.41s/it]


In [11]:
# Recursively remove all .ipynb_checkpoints
!find ../summer24/raw/ -type d -name ".ipynb_checkpoints" -exec rm -rf {} +


In [12]:
def process_catalogue_folder_level(path: str):
    # Get a list of all directories in the specified path
    date_dirs = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
    # Sort the directories by name
    date_dirs.sort()
    # For each directory, get a list of all files in the directory
    for date_dir in date_dirs:
        # Get the cat file: 
        cat_file = os.path.join(path, date_dir, f'cat_{date_dir}.cat')
        # Read the cat file
        cat_df = pd.read_csv(cat_file)
        print(cat_file)
        # Change file_name to folder_name + '/' + file_name
        cat_df['file_name'] = cat_df.apply(lambda x: os.path.join(str(x['folder_name']), str(x['file_name'])), axis=1)
        # Convert the file_name column to str
        cat_df['file_name'] = cat_df['file_name'].astype(str)
        # columns: id, file_name
        # Group by ID and aggregate filenames with comma separator
        merged_df = cat_df.groupby('id')['file_name'].agg(','.join).reset_index()
        # Save the merged df to a csv file
        merged_df.to_csv(os.path.join(path, date_dir, f'cat2_{date_dir}.cat2'), index=False)

process_catalogue_folder_level('../summer24/raw')



../summer24/raw/2024-04-01/cat_2024-04-01.cat
../summer24/raw/2024-04-02/cat_2024-04-02.cat
../summer24/raw/2024-04-03/cat_2024-04-03.cat
../summer24/raw/2024-04-04/cat_2024-04-04.cat
../summer24/raw/2024-04-05/cat_2024-04-05.cat
../summer24/raw/2024-04-06/cat_2024-04-06.cat
../summer24/raw/2024-04-07/cat_2024-04-07.cat
../summer24/raw/2024-04-08/cat_2024-04-08.cat
../summer24/raw/2024-04-09/cat_2024-04-09.cat
../summer24/raw/2024-04-10/cat_2024-04-10.cat
../summer24/raw/2024-04-11/cat_2024-04-11.cat
../summer24/raw/2024-04-12/cat_2024-04-12.cat
../summer24/raw/2024-04-13/cat_2024-04-13.cat
../summer24/raw/2024-04-14/cat_2024-04-14.cat
../summer24/raw/2024-04-15/cat_2024-04-15.cat
../summer24/raw/2024-04-16/cat_2024-04-16.cat
../summer24/raw/2024-04-17/cat_2024-04-17.cat
../summer24/raw/2024-04-18/cat_2024-04-18.cat
../summer24/raw/2024-04-19/cat_2024-04-19.cat
../summer24/raw/2024-04-20/cat_2024-04-20.cat
../summer24/raw/2024-04-21/cat_2024-04-21.cat
../summer24/raw/2024-04-22/cat_202

In [13]:
def cross_folders_category_merging(path: str):
    # Get a list of all directories in the specified path
    date_dirs = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
    # Sort the directories by name
    date_dirs.sort()
    
    for date_dir in date_dirs:
        # Folder 1
        # Get the last CSV filename
        date_files = glob.glob(os.path.join(path, date_dir, '*.csv'))
        date_files = [f for f in date_files if not f.startswith('._')]
        date_files.sort()
        last_csv_file = os.path.basename(date_files[-1])
        last_csv_file_base = last_csv_file.split('.')[0]
        print(f'last_csv_file: {last_csv_file}')
        cat1_filename = f'cat2_{date_dir}.cat2'

        # Folder 2
        # Get the first CSV filename in the next folder
        if date_dirs.index(date_dir) + 1 == len(date_dirs):
            continue
        next_date_dir = date_dirs[date_dirs.index(date_dir) + 1]
        next_date_files = glob.glob(os.path.join(path, next_date_dir, '*.csv'))
        next_date_files = [f for f in next_date_files if not f.startswith('._')]
        next_date_files.sort()
        first_csv_file = os.path.basename(next_date_files[0])
        first_csv_file_base = first_csv_file.split('.')[0]
        print(f'first_csv_file: {first_csv_file}')
        cat2_filename = f'cat2_{next_date_dir}.cat2'

        import csv
        # Read CAT1 into a dictionary: key = id, value = file_name string
        cat1_data = {}
        with open(os.path.join(path, date_dir, cat1_filename), newline='') as f:
            reader = csv.DictReader(f)
            for row in reader:
                cat1_data[row['id']] = row['file_name']

        # Read CAT2 into a dictionary: key = id, value = file_name string
        cat2_data = {}
        with open(os.path.join(path, next_date_dir, cat2_filename), newline='') as f:
            reader = csv.DictReader(f)
            for row in reader:
                cat2_data[row['id']] = row['file_name']

        # Process each ID in CAT1 that might span both files
        for id_, cat1_files in list(cat1_data.items()):
            # Check if CAT1 file_name contains last_csv_file
            if last_csv_file_base in cat1_files:
                # Check if the same id exists in CAT2 and its file_name contains first_csv_file
                if id_ in cat2_data and first_csv_file_base in cat2_data[id_]:
                    # Concatenate CAT2 file_name to CAT1 file_name (you may choose a separator, here we use a comma)
                    cat1_data[id_] = cat1_files + "," + cat2_data[id_]
                    # Remove the entry from CAT2 as it has now been merged
                    del cat2_data[id_]

        # Optionally, write the updated CAT1 data back to a CSV file
        with open(os.path.join(path, date_dir, f'cat3_{date_dir}.cat3'), 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(['id', 'file_name'])
            for id_, file_names in cat1_data.items():
                writer.writerow([id_, file_names])

        # And write the updated CAT2 data (with spanning IDs removed) back to a CSV file
        with open(str(os.path.join(path, next_date_dir, f'cat3_{next_date_dir}.cat3')), 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(['id', 'file_name'])
            for id_, file_names in cat2_data.items():
                writer.writerow([id_, file_names])

cross_folders_category_merging('../summer24/raw')



last_csv_file: 1712005200.csv
first_csv_file: 1712008800.csv
last_csv_file: 1712091600.csv
first_csv_file: 1712095200.csv
last_csv_file: 1712178000.csv
first_csv_file: 1712181600.csv
last_csv_file: 1712264400.csv
first_csv_file: 1712268000.csv
last_csv_file: 1712350800.csv
first_csv_file: 1712354400.csv
last_csv_file: 1712437200.csv
first_csv_file: 1712440800.csv
last_csv_file: 1712523600.csv
first_csv_file: 1712527200.csv
last_csv_file: 1712610000.csv
first_csv_file: 1712613600.csv
last_csv_file: 1712696400.csv
first_csv_file: 1712700000.csv
last_csv_file: 1712782800.csv
first_csv_file: 1712786400.csv
last_csv_file: 1712869200.csv
first_csv_file: 1712872800.csv
last_csv_file: 1712955600.csv
first_csv_file: 1712959200.csv
last_csv_file: 1713042000.csv
first_csv_file: 1713045600.csv
last_csv_file: 1713128400.csv
first_csv_file: 1713132000.csv
last_csv_file: 1713214800.csv
first_csv_file: 1713218400.csv
last_csv_file: 1713301200.csv
first_csv_file: 1713304800.csv
last_csv_file: 171338760

In [19]:
def filter_for_consecutive_timestamps(path: str):
    # Get a list of all directories in the specified path
    date_dirs = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
    # Sort the directories by name
    date_dirs.sort()

    for date_dir in date_dirs:
        # Get the cat file: 
        cat_file = os.path.join(path, date_dir, f'cat3_{date_dir}.cat3')
        # Read the cat file
        cat_df = pd.read_csv(cat_file)
        # columns: id, file_name
        ids_marked_to_delete = []

        # Iterate through each row in the dataframe
        for idx, row in tqdm(cat_df.iterrows(), total=len(cat_df), desc=f'Processing {date_dir}'):
            id_ = row['id']
            file_name = row['file_name']
            
            # Split the timestamps and extract numbers after backslash
            timestamps = []
            for ts in file_name.split(','):
                try:
                    timestamps.append(int(ts.split('/')[1]))
                except:
                    # Skip if timestamp can't be parsed
                    continue
                    
            # Sort timestamps to check if they're consecutive
            timestamps.sort()
            
            # Check if timestamps are 3600 seconds apart
            for i in range(len(timestamps)-1):
                if timestamps[i+1] - timestamps[i] != 3600:
                    ids_marked_to_delete.append(id_)
                    break


        # Filter out the ids marked to delete
        print(f'{len(ids_marked_to_delete)} / {len(cat_df)} ids marked to delete')
        cat_df = cat_df[~cat_df['id'].isin(ids_marked_to_delete)]

        # Save the filtered cat_df to a csv file
        cat_df.to_csv(os.path.join(path, date_dir, f'cat4_{date_dir}.cat4'), index=False)

filter_for_consecutive_timestamps('../summer24/raw')


Processing 2024-04-01: 100%|██████████| 29283/29283 [00:02<00:00, 10701.11it/s]


960 / 29283 ids marked to delete


Processing 2024-04-02: 100%|██████████| 30517/30517 [00:02<00:00, 10970.10it/s]


1539 / 30517 ids marked to delete


Processing 2024-04-03: 100%|██████████| 31005/31005 [00:02<00:00, 10655.48it/s]


1621 / 31005 ids marked to delete


Processing 2024-04-04: 100%|██████████| 31903/31903 [00:03<00:00, 10339.50it/s]


1710 / 31903 ids marked to delete


Processing 2024-04-05: 100%|██████████| 32913/32913 [00:02<00:00, 11024.11it/s]


1775 / 32913 ids marked to delete


Processing 2024-04-06: 100%|██████████| 30235/30235 [00:02<00:00, 11192.29it/s]


1805 / 30235 ids marked to delete


Processing 2024-04-07: 100%|██████████| 32435/32435 [00:03<00:00, 10708.09it/s]


1602 / 32435 ids marked to delete


Processing 2024-04-08: 100%|██████████| 32395/32395 [00:02<00:00, 11146.74it/s]


1709 / 32395 ids marked to delete


Processing 2024-04-09: 100%|██████████| 31661/31661 [00:02<00:00, 11357.18it/s]


1532 / 31661 ids marked to delete


Processing 2024-04-10: 100%|██████████| 32541/32541 [00:02<00:00, 11036.63it/s]


1804 / 32541 ids marked to delete


Processing 2024-04-11: 100%|██████████| 34101/34101 [00:03<00:00, 11342.78it/s]


2020 / 34101 ids marked to delete


Processing 2024-04-12: 100%|██████████| 34455/34455 [00:03<00:00, 11092.39it/s]


2143 / 34455 ids marked to delete


Processing 2024-04-13: 100%|██████████| 31881/31881 [00:02<00:00, 11291.36it/s]


2067 / 31881 ids marked to delete


Processing 2024-04-14: 100%|██████████| 35876/35876 [00:03<00:00, 11350.62it/s]


1922 / 35876 ids marked to delete


Processing 2024-04-15: 100%|██████████| 33497/33497 [00:03<00:00, 11075.90it/s]


1580 / 33497 ids marked to delete


Processing 2024-04-16: 100%|██████████| 32336/32336 [00:02<00:00, 11254.29it/s]


1658 / 32336 ids marked to delete


Processing 2024-04-17: 100%|██████████| 30799/30799 [00:02<00:00, 11226.22it/s]


1819 / 30799 ids marked to delete


Processing 2024-04-18: 100%|██████████| 34123/34123 [00:03<00:00, 10729.40it/s]


1986 / 34123 ids marked to delete


Processing 2024-04-19: 100%|██████████| 34935/34935 [00:03<00:00, 10735.53it/s]


2627 / 34935 ids marked to delete


Processing 2024-04-20: 100%|██████████| 32274/32274 [00:03<00:00, 10231.83it/s]


1666 / 32274 ids marked to delete


Processing 2024-04-21: 100%|██████████| 32897/32897 [00:02<00:00, 11202.61it/s]


1493 / 32897 ids marked to delete


Processing 2024-04-22: 100%|██████████| 32989/32989 [00:02<00:00, 11119.62it/s]


1646 / 32989 ids marked to delete


Processing 2024-04-23: 100%|██████████| 32833/32833 [00:02<00:00, 10953.09it/s]


1881 / 32833 ids marked to delete


Processing 2024-04-24: 100%|██████████| 32829/32829 [00:02<00:00, 11270.61it/s]


1921 / 32829 ids marked to delete


Processing 2024-04-25: 100%|██████████| 31405/31405 [00:02<00:00, 11250.55it/s]


1844 / 31405 ids marked to delete


Processing 2024-04-26: 100%|██████████| 34971/34971 [00:03<00:00, 11207.69it/s]


1833 / 34971 ids marked to delete


Processing 2024-04-27: 100%|██████████| 31742/31742 [00:02<00:00, 11039.63it/s]


1693 / 31742 ids marked to delete


Processing 2024-04-28: 100%|██████████| 32734/32734 [00:03<00:00, 10821.16it/s]


1463 / 32734 ids marked to delete


Processing 2024-04-29: 100%|██████████| 33133/33133 [00:03<00:00, 10857.64it/s]


1886 / 33133 ids marked to delete


Processing 2024-04-30: 100%|██████████| 33563/33563 [00:03<00:00, 10773.31it/s]


1954 / 33563 ids marked to delete


Processing 2024-05-01: 100%|██████████| 33841/33841 [00:03<00:00, 10918.87it/s]


1886 / 33841 ids marked to delete


Processing 2024-05-02: 100%|██████████| 33520/33520 [00:03<00:00, 10942.32it/s]


1636 / 33520 ids marked to delete


Processing 2024-05-03: 100%|██████████| 34807/34807 [00:03<00:00, 11251.88it/s]


1563 / 34807 ids marked to delete


Processing 2024-05-04: 100%|██████████| 32993/32993 [00:02<00:00, 11150.11it/s]


1879 / 32993 ids marked to delete


Processing 2024-05-05: 100%|██████████| 34575/34575 [00:03<00:00, 11243.73it/s]


1579 / 34575 ids marked to delete


Processing 2024-05-06: 100%|██████████| 34344/34344 [00:03<00:00, 10983.76it/s]


1491 / 34344 ids marked to delete


Processing 2024-05-07: 100%|██████████| 34597/34597 [00:03<00:00, 11259.20it/s]


1823 / 34597 ids marked to delete


Processing 2024-05-08: 100%|██████████| 35582/35582 [00:03<00:00, 11313.47it/s]


2097 / 35582 ids marked to delete


Processing 2024-05-09: 100%|██████████| 35904/35904 [00:03<00:00, 11361.27it/s]


2448 / 35904 ids marked to delete


Processing 2024-05-10: 100%|██████████| 32548/32548 [00:03<00:00, 10563.41it/s]


1976 / 32548 ids marked to delete


Processing 2024-05-11: 100%|██████████| 34224/34224 [00:03<00:00, 11370.06it/s]


2408 / 34224 ids marked to delete


Processing 2024-05-12: 100%|██████████| 36565/36565 [00:03<00:00, 11396.71it/s]


2111 / 36565 ids marked to delete


Processing 2024-05-13: 100%|██████████| 35581/35581 [00:03<00:00, 11124.19it/s]


1778 / 35581 ids marked to delete


Processing 2024-05-14: 100%|██████████| 35059/35059 [00:03<00:00, 11329.02it/s]


1903 / 35059 ids marked to delete


Processing 2024-05-15: 100%|██████████| 35208/35208 [00:03<00:00, 11361.38it/s]


1963 / 35208 ids marked to delete


Processing 2024-05-16: 100%|██████████| 35453/35453 [00:03<00:00, 11156.12it/s]


1820 / 35453 ids marked to delete


Processing 2024-05-17: 100%|██████████| 35345/35345 [00:03<00:00, 11338.84it/s]


2027 / 35345 ids marked to delete


Processing 2024-05-18: 100%|██████████| 33451/33451 [00:02<00:00, 11367.48it/s]


1781 / 33451 ids marked to delete


Processing 2024-05-19: 100%|██████████| 35116/35116 [00:03<00:00, 10606.81it/s]


1734 / 35116 ids marked to delete


Processing 2024-05-20: 100%|██████████| 11333/11333 [00:01<00:00, 10924.36it/s]


146 / 11333 ids marked to delete


Processing 2024-05-21: 100%|██████████| 31342/31342 [00:02<00:00, 11207.73it/s]


1473 / 31342 ids marked to delete


Processing 2024-05-22: 100%|██████████| 34323/34323 [00:03<00:00, 10780.48it/s]


1727 / 34323 ids marked to delete


Processing 2024-05-23: 100%|██████████| 36178/36178 [00:03<00:00, 11372.21it/s]


2012 / 36178 ids marked to delete


Processing 2024-05-24: 100%|██████████| 37017/37017 [00:03<00:00, 11352.99it/s]


2028 / 37017 ids marked to delete


Processing 2024-05-25: 100%|██████████| 34683/34683 [00:03<00:00, 11349.42it/s]


2127 / 34683 ids marked to delete


Processing 2024-05-26: 100%|██████████| 35580/35580 [00:03<00:00, 11378.05it/s]


1831 / 35580 ids marked to delete


Processing 2024-05-27: 100%|██████████| 36300/36300 [00:03<00:00, 11330.30it/s]


1986 / 36300 ids marked to delete


Processing 2024-05-28: 100%|██████████| 35170/35170 [00:03<00:00, 11042.09it/s]


1921 / 35170 ids marked to delete


Processing 2024-05-29: 100%|██████████| 36217/36217 [00:03<00:00, 11028.65it/s]


2010 / 36217 ids marked to delete


Processing 2024-05-30: 100%|██████████| 35912/35912 [00:03<00:00, 10882.46it/s]


1821 / 35912 ids marked to delete


Processing 2024-05-31: 100%|██████████| 36564/36564 [00:03<00:00, 11092.52it/s]


1800 / 36564 ids marked to delete


Processing 2024-06-01: 100%|██████████| 34302/34302 [00:03<00:00, 10150.52it/s]


1848 / 34302 ids marked to delete


Processing 2024-06-02: 100%|██████████| 37164/37164 [00:03<00:00, 10573.27it/s]


1900 / 37164 ids marked to delete


Processing 2024-06-03: 100%|██████████| 36690/36690 [00:03<00:00, 11002.09it/s]


1855 / 36690 ids marked to delete


Processing 2024-06-04: 100%|██████████| 34351/34351 [00:03<00:00, 10327.32it/s]


2021 / 34351 ids marked to delete


Processing 2024-06-05: 100%|██████████| 35502/35502 [00:03<00:00, 10881.07it/s]


2384 / 35502 ids marked to delete


Processing 2024-06-06: 100%|██████████| 35964/35964 [00:03<00:00, 10587.90it/s]


2470 / 35964 ids marked to delete


Processing 2024-06-07: 100%|██████████| 36618/36618 [00:03<00:00, 11115.04it/s]


2379 / 36618 ids marked to delete


Processing 2024-06-08: 100%|██████████| 33671/33671 [00:02<00:00, 11348.16it/s]


2068 / 33671 ids marked to delete


Processing 2024-06-09: 100%|██████████| 34835/34835 [00:03<00:00, 11345.93it/s]


1805 / 34835 ids marked to delete


Processing 2024-06-10: 100%|██████████| 35365/35365 [00:03<00:00, 10698.70it/s]


1763 / 35365 ids marked to delete


Processing 2024-06-11: 100%|██████████| 34731/34731 [00:03<00:00, 11142.34it/s]


2115 / 34731 ids marked to delete


Processing 2024-06-12: 100%|██████████| 36147/36147 [00:03<00:00, 11330.62it/s]


2386 / 36147 ids marked to delete


Processing 2024-06-13: 100%|██████████| 36646/36646 [00:03<00:00, 11078.78it/s]


2367 / 36646 ids marked to delete


Processing 2024-06-14: 100%|██████████| 36378/36378 [00:03<00:00, 11252.05it/s]


3558 / 36378 ids marked to delete


Processing 2024-06-15: 100%|██████████| 33399/33399 [00:03<00:00, 11039.74it/s]


1695 / 33399 ids marked to delete


Processing 2024-06-16: 100%|██████████| 35561/35561 [00:03<00:00, 10894.79it/s]


1905 / 35561 ids marked to delete


Processing 2024-06-17: 100%|██████████| 36226/36226 [00:03<00:00, 11327.95it/s]


2110 / 36226 ids marked to delete


Processing 2024-06-18: 100%|██████████| 35796/35796 [00:03<00:00, 11301.26it/s]


2300 / 35796 ids marked to delete


Processing 2024-06-19: 100%|██████████| 36688/36688 [00:03<00:00, 11173.26it/s]


2459 / 36688 ids marked to delete


Processing 2024-06-20: 100%|██████████| 36790/36790 [00:03<00:00, 11338.51it/s]


4593 / 36790 ids marked to delete


Processing 2024-06-21: 100%|██████████| 36535/36535 [00:03<00:00, 11304.83it/s]


2103 / 36535 ids marked to delete


Processing 2024-06-22: 100%|██████████| 34529/34529 [00:03<00:00, 11348.38it/s]


2044 / 34529 ids marked to delete


Processing 2024-06-23: 100%|██████████| 36574/36574 [00:03<00:00, 11364.53it/s]


2255 / 36574 ids marked to delete


Processing 2024-06-24: 100%|██████████| 37189/37189 [00:03<00:00, 11393.93it/s]


2312 / 37189 ids marked to delete


Processing 2024-06-25: 100%|██████████| 36522/36522 [00:03<00:00, 10870.28it/s]


2501 / 36522 ids marked to delete


Processing 2024-06-26: 100%|██████████| 35796/35796 [00:03<00:00, 11009.09it/s]


10291 / 35796 ids marked to delete


Processing 2024-06-27: 100%|██████████| 42097/42097 [00:03<00:00, 11389.94it/s]


3019 / 42097 ids marked to delete


Processing 2024-06-28: 100%|██████████| 37683/37683 [00:03<00:00, 11325.35it/s]


2339 / 37683 ids marked to delete


Processing 2024-06-29: 100%|██████████| 35154/35154 [00:03<00:00, 11100.08it/s]


3992 / 35154 ids marked to delete


Processing 2024-06-30: 100%|██████████| 35491/35491 [00:03<00:00, 10353.56it/s]


1830 / 35491 ids marked to delete


Processing 2024-07-01: 100%|██████████| 36233/36233 [00:03<00:00, 10852.91it/s]


1990 / 36233 ids marked to delete


Processing 2024-07-02: 100%|██████████| 36049/36049 [00:03<00:00, 11059.26it/s]


2138 / 36049 ids marked to delete


Processing 2024-07-03: 100%|██████████| 36291/36291 [00:03<00:00, 10761.04it/s]


2001 / 36291 ids marked to delete


Processing 2024-07-04: 100%|██████████| 36795/36795 [00:03<00:00, 11006.23it/s]


2234 / 36795 ids marked to delete


Processing 2024-07-05: 100%|██████████| 37804/37804 [00:03<00:00, 11253.02it/s]


2301 / 37804 ids marked to delete


Processing 2024-07-06: 100%|██████████| 33623/33623 [00:03<00:00, 10819.91it/s]


2139 / 33623 ids marked to delete


Processing 2024-07-07: 100%|██████████| 35959/35959 [00:03<00:00, 10517.64it/s]


1909 / 35959 ids marked to delete


Processing 2024-07-08: 100%|██████████| 37280/37280 [00:03<00:00, 10683.64it/s]


2139 / 37280 ids marked to delete


Processing 2024-07-09: 100%|██████████| 36205/36205 [00:03<00:00, 10503.63it/s]


2348 / 36205 ids marked to delete


Processing 2024-07-10: 100%|██████████| 36437/36437 [00:03<00:00, 11203.54it/s]


2249 / 36437 ids marked to delete


Processing 2024-07-11: 100%|██████████| 37582/37582 [00:03<00:00, 11011.74it/s]


2504 / 37582 ids marked to delete


Processing 2024-07-12: 100%|██████████| 36784/36784 [00:03<00:00, 11278.63it/s]


2027 / 36784 ids marked to delete


Processing 2024-07-13: 100%|██████████| 35745/35745 [00:03<00:00, 11259.86it/s]


2063 / 35745 ids marked to delete


Processing 2024-07-14: 100%|██████████| 37337/37337 [00:03<00:00, 11014.65it/s]


2446 / 37337 ids marked to delete


Processing 2024-07-15: 100%|██████████| 37118/37118 [00:03<00:00, 11291.59it/s]


2173 / 37118 ids marked to delete


Processing 2024-07-16: 100%|██████████| 36161/36161 [00:03<00:00, 11277.45it/s]


2110 / 36161 ids marked to delete


Processing 2024-07-17: 100%|██████████| 37469/37469 [00:03<00:00, 11058.25it/s]


2667 / 37469 ids marked to delete


Processing 2024-07-18: 100%|██████████| 38217/38217 [00:03<00:00, 10797.32it/s]


2728 / 38217 ids marked to delete


Processing 2024-07-19: 100%|██████████| 36540/36540 [00:03<00:00, 11030.23it/s]


2686 / 36540 ids marked to delete


Processing 2024-07-20: 100%|██████████| 35786/35786 [00:03<00:00, 11269.04it/s]


2426 / 35786 ids marked to delete


Processing 2024-07-21: 100%|██████████| 35930/35930 [00:03<00:00, 11277.18it/s]


2025 / 35930 ids marked to delete


Processing 2024-07-22: 100%|██████████| 36631/36631 [00:03<00:00, 11094.06it/s]


2231 / 36631 ids marked to delete


Processing 2024-07-23: 100%|██████████| 36524/36524 [00:03<00:00, 11276.97it/s]


2221 / 36524 ids marked to delete


Processing 2024-07-24: 100%|██████████| 37231/37231 [00:03<00:00, 11283.62it/s]


2561 / 37231 ids marked to delete


Processing 2024-07-25: 100%|██████████| 37286/37286 [00:03<00:00, 11090.13it/s]


2430 / 37286 ids marked to delete


Processing 2024-07-26: 100%|██████████| 37364/37364 [00:03<00:00, 11293.35it/s]


2357 / 37364 ids marked to delete


Processing 2024-07-27: 100%|██████████| 35886/35886 [00:03<00:00, 10864.99it/s]


2232 / 35886 ids marked to delete


Processing 2024-07-28: 100%|██████████| 37466/37466 [00:03<00:00, 11065.80it/s]


2451 / 37466 ids marked to delete


Processing 2024-07-29: 100%|██████████| 37458/37458 [00:03<00:00, 11279.80it/s]


2307 / 37458 ids marked to delete


Processing 2024-07-30: 100%|██████████| 37202/37202 [00:03<00:00, 11063.36it/s]


2603 / 37202 ids marked to delete


Processing 2024-07-31: 100%|██████████| 37404/37404 [00:03<00:00, 11268.31it/s]


2518 / 37404 ids marked to delete


Processing 2024-08-01: 100%|██████████| 36480/36480 [00:03<00:00, 11293.24it/s]


2237 / 36480 ids marked to delete


Processing 2024-08-02: 100%|██████████| 37687/37687 [00:03<00:00, 11078.17it/s]


2202 / 37687 ids marked to delete


Processing 2024-08-03: 100%|██████████| 35823/35823 [00:03<00:00, 11264.28it/s]


2148 / 35823 ids marked to delete


Processing 2024-08-04: 100%|██████████| 36525/36525 [00:03<00:00, 11280.13it/s]


2040 / 36525 ids marked to delete


Processing 2024-08-05: 100%|██████████| 37105/37105 [00:03<00:00, 11074.89it/s]


2108 / 37105 ids marked to delete


Processing 2024-08-06: 100%|██████████| 36725/36725 [00:03<00:00, 10914.26it/s]


2361 / 36725 ids marked to delete


Processing 2024-08-07: 100%|██████████| 36067/36067 [00:03<00:00, 10970.57it/s]


2211 / 36067 ids marked to delete


Processing 2024-08-08: 100%|██████████| 36767/36767 [00:03<00:00, 11199.87it/s]


2195 / 36767 ids marked to delete


Processing 2024-08-09: 100%|██████████| 37649/37649 [00:04<00:00, 9331.13it/s] 


2236 / 37649 ids marked to delete


Processing 2024-08-10: 100%|██████████| 35921/35921 [00:03<00:00, 10036.04it/s]


2245 / 35921 ids marked to delete


Processing 2024-08-11: 100%|██████████| 37094/37094 [00:03<00:00, 10972.38it/s]


2298 / 37094 ids marked to delete


Processing 2024-08-12: 100%|██████████| 36800/36800 [00:03<00:00, 10865.47it/s]


2121 / 36800 ids marked to delete


Processing 2024-08-13: 100%|██████████| 35895/35895 [00:03<00:00, 10563.18it/s]


2174 / 35895 ids marked to delete


Processing 2024-08-14: 100%|██████████| 35862/35862 [00:03<00:00, 10711.03it/s]


2102 / 35862 ids marked to delete


Processing 2024-08-15: 100%|██████████| 36036/36036 [00:03<00:00, 10931.96it/s]


2148 / 36036 ids marked to delete


Processing 2024-08-16: 100%|██████████| 37481/37481 [00:03<00:00, 10770.04it/s]


2323 / 37481 ids marked to delete


Processing 2024-08-17: 100%|██████████| 35591/35591 [00:03<00:00, 11153.15it/s]


2238 / 35591 ids marked to delete


Processing 2024-08-18: 100%|██████████| 35640/35640 [00:03<00:00, 10729.27it/s]


3594 / 35640 ids marked to delete


Processing 2024-08-19: 100%|██████████| 37053/37053 [00:03<00:00, 10758.39it/s]


1926 / 37053 ids marked to delete


Processing 2024-08-20: 100%|██████████| 36327/36327 [00:03<00:00, 11211.18it/s]


2072 / 36327 ids marked to delete


Processing 2024-08-21: 100%|██████████| 36640/36640 [00:03<00:00, 11010.56it/s]


2180 / 36640 ids marked to delete


Processing 2024-08-22: 100%|██████████| 36918/36918 [00:03<00:00, 11222.45it/s]


2220 / 36918 ids marked to delete


Processing 2024-08-23: 100%|██████████| 37467/37467 [00:03<00:00, 11007.05it/s]


2023 / 37467 ids marked to delete


Processing 2024-08-24: 100%|██████████| 35032/35032 [00:03<00:00, 11229.56it/s]


2759 / 35032 ids marked to delete


Processing 2024-08-25: 100%|██████████| 36330/36330 [00:03<00:00, 11195.64it/s]


1922 / 36330 ids marked to delete


Processing 2024-08-26: 100%|██████████| 37069/37069 [00:03<00:00, 10922.52it/s]


2088 / 37069 ids marked to delete


Processing 2024-08-27: 100%|██████████| 36538/36538 [00:03<00:00, 10141.62it/s]


2261 / 36538 ids marked to delete


Processing 2024-08-28: 100%|██████████| 38225/38225 [00:03<00:00, 11249.30it/s]


2637 / 38225 ids marked to delete


Processing 2024-08-29: 100%|██████████| 38110/38110 [00:03<00:00, 11226.29it/s]


2560 / 38110 ids marked to delete


Processing 2024-08-30: 100%|██████████| 38662/38662 [00:03<00:00, 11009.26it/s]


2507 / 38662 ids marked to delete


Processing 2024-08-31: 100%|██████████| 35945/35945 [00:03<00:00, 11196.71it/s]


2411 / 35945 ids marked to delete


Processing 2024-09-01: 100%|██████████| 36743/36743 [00:03<00:00, 11083.05it/s]


2116 / 36743 ids marked to delete


Processing 2024-09-02: 100%|██████████| 36359/36359 [00:03<00:00, 11241.47it/s]


1808 / 36359 ids marked to delete


Processing 2024-09-03: 100%|██████████| 35970/35970 [00:03<00:00, 10626.27it/s]


2034 / 35970 ids marked to delete


Processing 2024-09-04: 100%|██████████| 36234/36234 [00:03<00:00, 11221.35it/s]


2109 / 36234 ids marked to delete


Processing 2024-09-05: 100%|██████████| 36084/36084 [00:03<00:00, 11200.03it/s]


1855 / 36084 ids marked to delete


Processing 2024-09-06: 100%|██████████| 37467/37467 [00:03<00:00, 10989.71it/s]


2051 / 37467 ids marked to delete


Processing 2024-09-07: 100%|██████████| 34929/34929 [00:03<00:00, 10748.22it/s]


2222 / 34929 ids marked to delete


Processing 2024-09-08: 100%|██████████| 35096/35096 [00:03<00:00, 11159.44it/s]


1655 / 35096 ids marked to delete


Processing 2024-09-09: 100%|██████████| 35690/35690 [00:03<00:00, 10724.83it/s]


1639 / 35690 ids marked to delete


Processing 2024-09-10: 100%|██████████| 35100/35100 [00:03<00:00, 10638.65it/s]


1859 / 35100 ids marked to delete


Processing 2024-09-11: 100%|██████████| 34958/34958 [00:03<00:00, 11127.56it/s]


1845 / 34958 ids marked to delete


Processing 2024-09-12: 100%|██████████| 35958/35958 [00:03<00:00, 10717.31it/s]


1981 / 35958 ids marked to delete


Processing 2024-09-13: 100%|██████████| 36698/36698 [00:03<00:00, 11175.80it/s]


2045 / 36698 ids marked to delete


Processing 2024-09-14: 100%|██████████| 34051/34051 [00:03<00:00, 10985.29it/s]


2035 / 34051 ids marked to delete


Processing 2024-09-15: 100%|██████████| 35765/35765 [00:03<00:00, 11223.17it/s]


1956 / 35765 ids marked to delete


Processing 2024-09-16: 100%|██████████| 35825/35825 [00:03<00:00, 11211.30it/s]


1921 / 35825 ids marked to delete


Processing 2024-09-17: 100%|██████████| 35594/35594 [00:03<00:00, 11001.69it/s]


2171 / 35594 ids marked to delete


Processing 2024-09-18: 100%|██████████| 36130/36130 [00:03<00:00, 11220.96it/s]


2290 / 36130 ids marked to delete


Processing 2024-09-19: 100%|██████████| 36627/36627 [00:03<00:00, 11237.90it/s]


2218 / 36627 ids marked to delete


Processing 2024-09-20: 100%|██████████| 37312/37312 [00:03<00:00, 11160.50it/s]


2203 / 37312 ids marked to delete


Processing 2024-09-21: 100%|██████████| 34571/34571 [00:03<00:00, 11185.63it/s]


2193 / 34571 ids marked to delete


Processing 2024-09-22: 100%|██████████| 35420/35420 [00:03<00:00, 10969.35it/s]


1916 / 35420 ids marked to delete


Processing 2024-09-23: 100%|██████████| 35061/35061 [00:03<00:00, 11198.79it/s]


1738 / 35061 ids marked to delete


Processing 2024-09-24: 100%|██████████| 34436/34436 [00:03<00:00, 11216.66it/s]


1959 / 34436 ids marked to delete


Processing 2024-09-25: 100%|██████████| 35060/35060 [00:03<00:00, 10973.82it/s]


1982 / 35060 ids marked to delete


Processing 2024-09-26: 100%|██████████| 35085/35085 [00:03<00:00, 11251.40it/s]


1840 / 35085 ids marked to delete


Processing 2024-09-27: 100%|██████████| 35824/35824 [00:03<00:00, 11229.33it/s]


1686 / 35824 ids marked to delete


Processing 2024-09-28: 100%|██████████| 33860/33860 [00:03<00:00, 10406.27it/s]


1909 / 33860 ids marked to delete


Processing 2024-09-29: 100%|██████████| 35800/35800 [00:03<00:00, 11242.16it/s]


2060 / 35800 ids marked to delete


Processing 2024-09-30: 100%|██████████| 35050/35050 [00:03<00:00, 10966.13it/s]


1651 / 35050 ids marked to delete


Processing 2024-10-01: 100%|██████████| 646/646 [00:00<00:00, 9826.11it/s]


0 / 646 ids marked to delete


# Attention: the first cell is the single-thread version for debugging, the second cell is production

In [None]:
from tqdm import tqdm

def build_full_trajectory_for_callsigns(path: str):
    # Get a list of all directories in the specified path
    date_dirs = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
    # Sort the directories by name
    date_dirs.sort()
    

    # Create a new directory called cs
    os.makedirs(os.path.join(path, 'cs'), exist_ok=True)

    df_plain = pd.DataFrame(columns=df_colnames)
    
    for date_dir in date_dirs:
        # Create a new directory inside cs
        os.makedirs(os.path.join(path, 'cs', date_dir), exist_ok=True)
        # Get the cat file: 
        cat_file = os.path.join(path, date_dir, f'cat4_{date_dir}.cat4')
        # Read the cat file
        instruction_df = pd.read_csv(cat_file)
        # columns: id, file_name
        
        # All CSV files to read from instruction_df 
        # Split the file_name strings and get unique CSV files
        all_csv_files_to_read = []
        for files in instruction_df['file_name']:
            # Split on comma to get individual files
            file_list = files.split(',')
            all_csv_files_to_read.extend(file_list)
            
        # Remove duplicates while preserving order
        all_csv_files_to_read = list(dict.fromkeys(all_csv_files_to_read))

        print(f'{len(all_csv_files_to_read)} csv files to read')

        for csv_file in tqdm(all_csv_files_to_read, desc=f'Processing {date_dir}', total=len(all_csv_files_to_read)):
            ids_to_read = []
            # Find ids whose file_name contains the current csv_file
            # Only keep what is after the last backslash
            csv_file_base = csv_file.split('/')[-1] # file name 123456.csv
            csv_file_datedir = csv_file.split('/')[-2] # folder 2024-02-20
            ids_to_read = instruction_df[instruction_df['file_name'].str.contains(csv_file_base)]['id'].tolist()
            df = pd.read_csv(os.path.join(path, csv_file_datedir, f'{csv_file_base}.csv'), header=None, names=df_colnames)
            # Add the id column to the df by concatenating icao24 and callsign
            df['id'] = df['icao24'].str.upper() + df['callsign'].str.strip().str.upper()
            # Filter the df to only include rows with ids in ids_to_read
            df = df[df['id'].isin(ids_to_read)]
            # Add the df to df_plain
            df_plain = pd.concat([df_plain, df], ignore_index=True)

        # Sort df_plain by timestamp
        df_plain = df_plain.sort_values(by=['id', 'timestamp'])
        # Save the df_plain to a csv file
        df_plain.to_csv(os.path.join(path, 'cs', date_dir, f'cs_{date_dir}.csv'), index=False)


        
        


build_full_trajectory_for_callsigns('summer23/raw')
            
            
        


In [20]:
from mpire import WorkerPool
import pandas as pd
import os

def process_single_date_dir(date_dir, path, df_colnames):
    # Create a new DataFrame for this date_dir
    df_plain = pd.DataFrame(columns=df_colnames)
    
    # Create directory for output
    os.makedirs(os.path.join(path, 'cs', date_dir), exist_ok=True)
    
    # Get the cat file
    cat_file = os.path.join(path, date_dir, f'cat4_{date_dir}.cat4')
    instruction_df = pd.read_csv(cat_file)
    
    # Get unique CSV files to read
    all_csv_files_to_read = []
    for files in instruction_df['file_name']:
        file_list = files.split(',')
        all_csv_files_to_read.extend(file_list)
    all_csv_files_to_read = list(dict.fromkeys(all_csv_files_to_read))
    
    print(f'{date_dir}: {len(all_csv_files_to_read)} csv files to read')
    
    for csv_file in all_csv_files_to_read:
        csv_file_base = csv_file.split('/')[-1]
        csv_file_datedir = csv_file.split('/')[-2]
        ids_to_read = instruction_df[instruction_df['file_name'].str.contains(csv_file_base)]['id'].tolist()
        
        df = pd.read_csv(os.path.join(path, csv_file_datedir, f'{csv_file_base}.csv'), 
                        header=None, names=df_colnames)
        df['id'] = df['icao24'].str.upper() + df['callsign'].str.strip().str.upper()
        df = df[df['id'].isin(ids_to_read)]
        df_plain = pd.concat([df_plain, df], ignore_index=True)
    
    # Sort and save
    df_plain = df_plain.sort_values(by=['id', 'timestamp'])
    output_path = os.path.join(path, 'cs', date_dir, f'cs_{date_dir}.csv')
    df_plain.to_csv(output_path, index=False)
    return f"Completed processing {date_dir}"

def build_full_trajectory_for_callsigns(path: str):
    # Get and sort directories
    date_dirs = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
    date_dirs.sort()
    
    # Create output directory
    os.makedirs(os.path.join(path, 'cs'), exist_ok=True)
    
    # Set up parallel processing
    n_workers = os.cpu_count() - 1  # Leave one CPU free
    with WorkerPool(n_workers) as pool:
        results = pool.map(process_single_date_dir, 
                         [(d, path, df_colnames) for d in date_dirs],
                         progress_bar=True)
    
    for result in results:
        print(result)

# Call the function
build_full_trajectory_for_callsigns('../summer24/raw')

 19%|█▉        | 35/184 [13:57<1:06:28, 26.77s/it]

2024-05-12: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)


2024-05-13: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 20%|██        | 37/184 [14:17<46:27, 18.96s/it]

2024-05-15: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 21%|██        | 38/184 [14:49<56:01, 23.02s/it]

2024-05-14: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 21%|██        | 39/184 [15:21<1:01:41, 25.53s/it]

2024-05-17: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 22%|██▏       | 40/184 [15:29<48:59, 20.41s/it]  

2024-05-16: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 22%|██▏       | 41/184 [16:05<59:52, 25.12s/it]

2024-05-18: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 23%|██▎       | 42/184 [16:15<48:31, 20.50s/it]

2024-05-20: 12 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)


2024-05-19: 35 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 24%|██▍       | 45/184 [17:02<38:51, 16.77s/it]

2024-05-22: 48 csv files to read
2024-05-21: 41 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
  df_plain = pd.concat([df_plain, df], ignore_index=True)
 25%|██▌       | 46/184 [17:10<32:41, 14.22s/it]

2024-05-26: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 26%|██▌       | 47/184 [17:43<45:14, 19.81s/it]

2024-05-24: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 26%|██▌       | 48/184 [18:07<47:39, 21.02s/it]

2024-05-27: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 27%|██▋       | 49/184 [18:26<46:20, 20.60s/it]

2024-05-25: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 27%|██▋       | 50/184 [18:43<43:42, 19.57s/it]

2024-05-23: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 28%|██▊       | 51/184 [19:17<52:51, 23.84s/it]

2024-05-28: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 28%|██▊       | 52/184 [19:32<46:31, 21.15s/it]

2024-05-30: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 29%|██▉       | 53/184 [19:59<50:14, 23.01s/it]

2024-05-31: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 29%|██▉       | 54/184 [20:24<50:41, 23.39s/it]

2024-06-01: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 30%|███       | 56/184 [20:41<32:39, 15.31s/it]

2024-06-02: 48 csv files to read
2024-05-29: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
  df_plain = pd.concat([df_plain, df], ignore_index=True)
 31%|███       | 57/184 [21:34<55:56, 26.43s/it]

2024-06-04: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 32%|███▏      | 58/184 [22:08<1:00:28, 28.79s/it]

2024-06-03: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 32%|███▏      | 59/184 [22:21<50:04, 24.04s/it]  

2024-06-06: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 33%|███▎      | 60/184 [22:55<55:50, 27.02s/it]

2024-06-07: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 34%|███▎      | 62/184 [23:11<34:16, 16.86s/it]

2024-06-09: 48 csv files to read
2024-06-08: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
  df_plain = pd.concat([df_plain, df], ignore_index=True)
 34%|███▍      | 63/184 [23:56<50:58, 25.28s/it]

2024-06-10: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 35%|███▍      | 64/184 [24:09<42:59, 21.50s/it]

2024-06-05: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 35%|███▌      | 65/184 [24:37<46:43, 23.56s/it]

2024-06-11: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 36%|███▌      | 66/184 [25:14<54:12, 27.56s/it]

2024-06-12: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 36%|███▋      | 67/184 [25:40<52:25, 26.88s/it]

2024-06-13: 42 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 37%|███▋      | 68/184 [25:46<39:49, 20.60s/it]

2024-06-15: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 38%|███▊      | 69/184 [26:19<46:55, 24.48s/it]

2024-06-14: 31 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 38%|███▊      | 70/184 [26:40<44:27, 23.40s/it]

2024-06-17: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 39%|███▊      | 71/184 [26:52<37:21, 19.83s/it]

2024-06-18: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 39%|███▉      | 72/184 [27:00<30:42, 16.45s/it]

2024-06-16: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 40%|████      | 74/184 [27:38<29:24, 16.04s/it]

2024-06-20: 30 csv files to read
2024-06-19: 31 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
  df_plain = pd.concat([df_plain, df], ignore_index=True)
 41%|████      | 75/184 [28:09<37:18, 20.54s/it]

2024-06-22: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)


2024-06-21: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 42%|████▏     | 77/184 [29:22<56:38, 31.76s/it]

2024-06-27: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 42%|████▏     | 78/184 [29:28<42:28, 24.04s/it]

2024-06-26: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 43%|████▎     | 79/184 [29:39<35:08, 20.08s/it]

2024-06-23: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 43%|████▎     | 80/184 [29:56<33:10, 19.14s/it]

2024-06-24: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 44%|████▍     | 81/184 [30:04<27:13, 15.86s/it]

2024-06-25: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 45%|████▍     | 82/184 [30:49<41:43, 24.55s/it]

2024-06-28: 30 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 45%|████▌     | 83/184 [31:27<48:10, 28.62s/it]

2024-06-29: 32 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 46%|████▌     | 84/184 [32:09<54:31, 32.71s/it]

2024-06-30: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 46%|████▌     | 85/184 [32:30<48:10, 29.20s/it]

2024-07-02: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 47%|████▋     | 86/184 [32:40<38:17, 23.44s/it]

2024-07-01: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 47%|████▋     | 87/184 [32:51<31:53, 19.73s/it]

2024-07-03: 48 csv files to read


 48%|████▊     | 88/184 [32:54<23:08, 14.47s/it]

2024-07-05: 40 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
  df_plain = pd.concat([df_plain, df], ignore_index=True)
 48%|████▊     | 89/184 [33:20<28:30, 18.00s/it]

2024-07-06: 47 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 49%|████▉     | 90/184 [34:32<53:35, 34.21s/it]

2024-07-04: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 49%|████▉     | 91/184 [34:40<40:50, 26.35s/it]

2024-07-07: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 50%|█████     | 92/184 [35:18<45:35, 29.73s/it]

2024-07-09: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 51%|█████     | 94/184 [35:35<27:31, 18.35s/it]

2024-07-11: 48 csv files to read
2024-07-10: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
  df_plain = pd.concat([df_plain, df], ignore_index=True)
 52%|█████▏    | 95/184 [35:40<21:10, 14.28s/it]

2024-07-12: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 52%|█████▏    | 96/184 [36:33<37:50, 25.80s/it]

2024-07-08: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 53%|█████▎    | 97/184 [37:16<45:04, 31.08s/it]

2024-07-13: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 53%|█████▎    | 98/184 [37:39<41:05, 28.67s/it]

2024-07-14: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 54%|█████▍    | 99/184 [38:00<37:11, 26.25s/it]

2024-07-15: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 54%|█████▍    | 100/184 [38:17<33:09, 23.68s/it]

2024-07-17: 48 csv files to read


 55%|█████▍    | 101/184 [38:20<24:07, 17.44s/it]

2024-07-18: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
  df_plain = pd.concat([df_plain, df], ignore_index=True)
 55%|█████▌    | 102/184 [39:21<41:43, 30.54s/it]

2024-07-16: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 56%|█████▌    | 103/184 [39:51<40:38, 30.10s/it]

2024-07-20: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 57%|█████▋    | 104/184 [40:10<35:49, 26.87s/it]

2024-07-19: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 57%|█████▋    | 105/184 [40:13<25:54, 19.68s/it]

2024-07-21: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 58%|█████▊    | 106/184 [41:08<39:16, 30.21s/it]

2024-07-23: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)


2024-07-24: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 59%|█████▊    | 108/184 [41:53<35:55, 28.37s/it]

2024-07-25: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)


2024-07-22: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 60%|█████▉    | 110/184 [42:47<36:48, 29.84s/it]

2024-07-26: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 60%|██████    | 111/184 [42:53<27:26, 22.55s/it]

2024-07-27: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 61%|██████    | 112/184 [43:30<32:27, 27.04s/it]

2024-07-28: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 61%|██████▏   | 113/184 [43:44<27:16, 23.04s/it]

2024-07-30: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)


2024-07-29: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 62%|██████▎   | 115/184 [44:27<27:37, 24.02s/it]

2024-07-31: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 63%|██████▎   | 116/184 [45:19<37:02, 32.69s/it]

2024-08-01: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 64%|██████▎   | 117/184 [45:31<29:24, 26.34s/it]

2024-08-03: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 64%|██████▍   | 118/184 [45:58<29:18, 26.65s/it]

2024-08-04: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 65%|██████▍   | 119/184 [46:13<24:50, 22.93s/it]

2024-08-02: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 65%|██████▌   | 120/184 [46:32<23:15, 21.80s/it]

2024-08-06: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 66%|██████▌   | 121/184 [47:00<24:56, 23.76s/it]

2024-08-07: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 66%|██████▋   | 122/184 [47:46<31:31, 30.51s/it]

2024-08-05: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 67%|██████▋   | 123/184 [48:03<26:51, 26.43s/it]

2024-08-09: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 67%|██████▋   | 124/184 [48:27<25:36, 25.61s/it]

2024-08-10: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 68%|██████▊   | 125/184 [48:50<24:17, 24.71s/it]

2024-08-11: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 68%|██████▊   | 126/184 [49:07<21:38, 22.38s/it]

2024-08-12: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)


2024-08-08: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 70%|██████▉   | 128/184 [50:25<31:54, 34.19s/it]

2024-08-13: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 70%|███████   | 129/184 [50:36<24:53, 27.16s/it]

2024-08-14: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 71%|███████   | 130/184 [50:53<21:45, 24.17s/it]

2024-08-15: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 71%|███████   | 131/184 [51:33<25:31, 28.89s/it]

2024-08-17: 30 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 72%|███████▏  | 132/184 [51:43<20:08, 23.23s/it]

2024-08-16: 48 csv files to read


 72%|███████▏  | 133/184 [51:44<14:11, 16.70s/it]

2024-08-18: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
  df_plain = pd.concat([df_plain, df], ignore_index=True)
 73%|███████▎  | 134/184 [52:37<23:03, 27.66s/it]

2024-08-19: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 73%|███████▎  | 135/184 [52:53<19:37, 24.03s/it]

2024-08-20: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 74%|███████▍  | 137/184 [53:28<15:01, 19.17s/it]

2024-08-23: 48 csv files to read
2024-08-22: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
  df_plain = pd.concat([df_plain, df], ignore_index=True)
 75%|███████▌  | 138/184 [54:08<19:23, 25.28s/it]

2024-08-25: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 76%|███████▌  | 139/184 [54:13<14:34, 19.43s/it]

2024-08-21: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 76%|███████▌  | 140/184 [54:27<13:03, 17.80s/it]

2024-08-24: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 77%|███████▋  | 141/184 [55:48<26:22, 36.79s/it]

2024-08-26: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 77%|███████▋  | 142/184 [56:15<23:36, 33.72s/it]

2024-08-27: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 78%|███████▊  | 143/184 [56:35<20:12, 29.57s/it]

2024-08-29: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 78%|███████▊  | 144/184 [56:40<14:47, 22.18s/it]

2024-08-28: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 79%|███████▉  | 145/184 [56:45<11:07, 17.11s/it]

2024-08-30: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 79%|███████▉  | 146/184 [57:43<18:31, 29.24s/it]

2024-09-01: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 80%|███████▉  | 147/184 [57:51<14:04, 22.82s/it]

2024-08-31: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 80%|████████  | 148/184 [58:52<20:36, 34.34s/it]

2024-09-02: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 81%|████████  | 149/184 [59:01<15:42, 26.92s/it]

2024-09-03: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 82%|████████▏ | 150/184 [59:26<14:48, 26.12s/it]

2024-09-04: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 82%|████████▏ | 151/184 [59:31<10:59, 19.99s/it]

2024-09-06: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 83%|████████▎ | 152/184 [59:49<10:17, 19.30s/it]

2024-09-05: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 83%|████████▎ | 153/184 [1:00:30<13:17, 25.74s/it]

2024-09-08: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 84%|████████▎ | 154/184 [1:01:16<15:54, 31.81s/it]

2024-09-07: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 84%|████████▍ | 155/184 [1:01:28<12:36, 26.08s/it]

2024-09-09: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 85%|████████▍ | 156/184 [1:01:37<09:39, 20.70s/it]

2024-09-10: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 85%|████████▌ | 157/184 [1:02:32<13:58, 31.06s/it]

2024-09-11: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 86%|████████▌ | 158/184 [1:02:37<10:05, 23.30s/it]

2024-09-13: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 86%|████████▋ | 159/184 [1:02:45<07:49, 18.79s/it]

2024-09-12: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 87%|████████▋ | 160/184 [1:03:46<12:32, 31.34s/it]

2024-09-14: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 88%|████████▊ | 161/184 [1:04:29<13:20, 34.82s/it]

2024-09-15: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)


2024-09-17: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 89%|████████▊ | 163/184 [1:05:07<09:54, 28.30s/it]

2024-09-16: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 89%|████████▉ | 164/184 [1:05:28<08:46, 26.34s/it]

2024-09-18: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)


2024-09-19: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 90%|█████████ | 166/184 [1:06:19<08:17, 27.66s/it]

2024-09-20: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 91%|█████████ | 167/184 [1:06:47<07:55, 27.99s/it]

2024-09-23: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)


2024-09-21: 48 csv files to read


 92%|█████████▏| 169/184 [1:06:53<03:47, 15.17s/it]

2024-09-22: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
  df_plain = pd.concat([df_plain, df], ignore_index=True)
 92%|█████████▏| 170/184 [1:07:46<06:11, 26.56s/it]

2024-09-25: 48 csv files to read


 93%|█████████▎| 172/184 [1:07:48<02:41, 13.49s/it]

2024-09-26: 48 csv files to read2024-09-24: 48 csv files to read



  df_plain = pd.concat([df_plain, df], ignore_index=True)
  df_plain = pd.concat([df_plain, df], ignore_index=True)
  df_plain = pd.concat([df_plain, df], ignore_index=True)
 94%|█████████▍| 173/184 [1:08:52<05:14, 28.61s/it]

2024-09-27: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 95%|█████████▍| 174/184 [1:09:19<04:40, 28.03s/it]

2024-09-29: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 95%|█████████▌| 175/184 [1:09:51<04:23, 29.26s/it]

2024-09-28: 48 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 96%|█████████▌| 176/184 [1:09:59<03:03, 22.90s/it]

2024-09-30: 26 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
 97%|█████████▋| 178/184 [1:10:10<01:22, 13.68s/it]

2024-10-01: 2 csv files to read


  df_plain = pd.concat([df_plain, df], ignore_index=True)
100%|██████████| 184/184 [1:11:25<00:00, 11.77s/it]


Completed processing 2024-04-01
Completed processing 2024-04-02
Completed processing 2024-04-03
Completed processing 2024-04-04
Completed processing 2024-04-05
Completed processing 2024-04-06
Completed processing 2024-04-07
Completed processing 2024-04-08
Completed processing 2024-04-09
Completed processing 2024-04-10
Completed processing 2024-04-11
Completed processing 2024-04-12
Completed processing 2024-04-13
Completed processing 2024-04-14
Completed processing 2024-04-15
Completed processing 2024-04-16
Completed processing 2024-04-17
Completed processing 2024-04-18
Completed processing 2024-04-19
Completed processing 2024-04-20
Completed processing 2024-04-21
Completed processing 2024-04-22
Completed processing 2024-04-23
Completed processing 2024-04-24
Completed processing 2024-04-25
Completed processing 2024-04-26
Completed processing 2024-04-27
Completed processing 2024-04-28
Completed processing 2024-04-29
Completed processing 2024-04-30
Completed processing 2024-05-01
Complete