In [5]:
import pandas as pd
from pathlib import Path

def count_unique_game_ids(storage_directory: str) -> int:
    '''
    Count the number of unique game IDs across all partitions in the specified storage directory.

    Arguments:
        storage_directory : Directory containing the partitions.

    Returns:
        The total count of unique game IDs.
    '''

    unique_game_ids = set()
    storage_path    = Path(storage_directory)

    # Loop through all total_ply directories
    for total_ply_dir in storage_path.iterdir():
        if total_ply_dir.is_dir():
            # Path to the data.parquet file
            partition_path = total_ply_dir / "data.parquet"

            # Check if the file exists
            if partition_path.exists():
                # Read the Parquet file
                df = pd.read_parquet(partition_path)

                # Add unique game IDs from this partition to the set
                unique_game_ids.update(df['game_id'].unique())

    print(f"Total number of unique game IDs: {len(unique_game_ids)}")
    return len(unique_game_ids)

def main():
    storage_directory = "/Users/Macington/Documents/Projects/Project Gambit/Games/Storage" # Directory containing original partitions

    count_unique_game_ids(storage_directory)

if __name__ == "__main__":
    main()


Total number of unique game IDs: 7302


In [6]:
from pathlib import Path
from shutil import copyfile

def copy_num_rows_files(src_directory: str, dest_directory: str):
    '''
    Copy the matching "num_rows.txt" file from every partition in the source directory
    and put it in the matching partition folder inside the destination directory.

    Arguments:
        src_directory  : Source directory containing the original partitions.
        dest_directory : Destination directory where the files will be copied.
    '''

    src_path  = Path(src_directory)
    dest_path = Path(dest_directory)

    # Loop through all total_ply directories in the source path
    for total_ply_dir in src_path.iterdir():
        if total_ply_dir.is_dir():
            # Path to the num_rows.txt file in the source directory
            src_num_rows_path = total_ply_dir / "num_rows.txt"

            # Check if the file exists
            if src_num_rows_path.exists():
                # Create the corresponding destination directory if needed
                dest_total_ply_dir = dest_path / total_ply_dir.name
                dest_total_ply_dir.mkdir(parents=True, exist_ok=True)

                # Path to the num_rows.txt file in the destination directory
                dest_num_rows_path = dest_total_ply_dir / "num_rows.txt"

                # Copy the file
                copyfile(src_num_rows_path, dest_num_rows_path)
                print(f"Copied {src_num_rows_path} to {dest_num_rows_path}")

    print("Copying completed!")

def main():
    src_directory  = "/Users/Macington/Documents/Projects/Project Scotch/Games/Storage" # Source directory containing original partitions
    dest_directory = "/Users/Macington/Documents/Projects/Project Gambit/Games/Storage" # Destination directory to copy files to

    copy_num_rows_files(src_directory, dest_directory)

if __name__ == "__main__":
    main()


Copied /Users/Macington/Documents/Projects/Project Scotch/Games/Storage/total_ply=13/num_rows.txt to /Users/Macington/Documents/Projects/Project Gambit/Games/Storage/total_ply=13/num_rows.txt
Copied /Users/Macington/Documents/Projects/Project Scotch/Games/Storage/total_ply=155/num_rows.txt to /Users/Macington/Documents/Projects/Project Gambit/Games/Storage/total_ply=155/num_rows.txt
Copied /Users/Macington/Documents/Projects/Project Scotch/Games/Storage/total_ply=199/num_rows.txt to /Users/Macington/Documents/Projects/Project Gambit/Games/Storage/total_ply=199/num_rows.txt
Copied /Users/Macington/Documents/Projects/Project Scotch/Games/Storage/total_ply=152/num_rows.txt to /Users/Macington/Documents/Projects/Project Gambit/Games/Storage/total_ply=152/num_rows.txt
Copied /Users/Macington/Documents/Projects/Project Scotch/Games/Storage/total_ply=14/num_rows.txt to /Users/Macington/Documents/Projects/Project Gambit/Games/Storage/total_ply=14/num_rows.txt
Copied /Users/Macington/Documents/