In [None]:
# import requests
# import json
# url = "http://164.52.202.16:8005/latest/icd_match"
# headers = {'Content-Type': 'text/plain'} 

# payload = '"car accident"' # what you're trying to search 
# response = requests.post(url, data=payload, headers=headers)

# print(json.dumps(response.json(), indent=3))


{
   "response": [
      {
         "code": "",
         "code_desc": "",
         "concept_match_list": [
            {
               "code": "W01.0XXA",
               "code_desc": "Fall on same level from slipping, tripping and stumbling without subsequent striking against object, initial encounter",
               "match_score": 0.64,
               "sub_concept": "ACCIDENT AL FALL ON ROAD"
            },
            {
               "code": "V89.9",
               "code_desc": "Person injured in unspecified vehicle accident",
               "match_score": 0.71,
               "sub_concept": "Person injured in unspecified vehicle accident"
            },
            {
               "code": "W55.22XA",
               "code_desc": "Struck by cow, initial encounter",
               "match_score": 0.75,
               "sub_concept": "Road Traffic Accident ( Hit by Ox"
            },
            {
               "code": "V89.2XXA",
               "code_desc": "Person injured in unspec

In [2]:
import os
import shutil
import hashlib
from collections import defaultdict

def calculate_file_hash(filepath, hash_algorithm='md5', chunk_size=8192):
    """
    Calculates the hash of a file's content.
    Args:
        filepath (str): The path to the file.
        hash_algorithm (str): The hashing algorithm to use (e.g., 'md5', 'sha1', 'sha256').
        chunk_size (int): The size of chunks to read from the file.

    Returns:
        str: The hexadecimal hash digest of the file, or None if the file cannot be read.
    """
    try:
        if hash_algorithm == 'md5':
            hasher = hashlib.md5()
        elif hash_algorithm == 'sha1':
            hasher = hashlib.sha1()
        elif hash_algorithm == 'sha256':
            hasher = hashlib.sha256()
        else:
            raise ValueError(f"Unsupported hash algorithm: {hash_algorithm}")

        with open(filepath, 'rb') as f:
            while chunk := f.read(chunk_size):
                hasher.update(chunk)
        return hasher.hexdigest()
    except FileNotFoundError:
        # print(f"  Warning: File not found during hash calculation: {filepath}") # Too noisy for non-existent files
        return None
    except PermissionError:
        print(f"  Warning: Permission denied to read file for hash calculation: {filepath}")
        return None
    except Exception as e:
        print(f"  Warning: Error calculating hash for {filepath}: {e}")
        return None

def copy_all_files_to_one_folder_no_duplicates(source_directory, destination_directory, hash_algorithm='md5'):
    """
    Traverses the source_directory, finds all files, and copies them into
    the destination_directory, ensuring no duplicate files (based on content hash)
    are copied. If a file with the same name but different content exists
    in the destination, a new name will be generated.

    Ignores .pyc files and files without extensions.

    Args:
        source_directory (str): The path to the directory to traverse.
        destination_directory (str): The path to the directory where all files
                                     will be copied.
        hash_algorithm (str): The hashing algorithm to use for content comparison
                              (e.g., 'md5', 'sha1', 'sha256'). 'md5' is faster,
                              'sha256' is more secure but slower.
    """
    if not os.path.isdir(source_directory):
        print(f"Error: Source directory '{source_directory}' does not exist or is not a directory.")
        return

    os.makedirs(destination_directory, exist_ok=True)
    print(f"Destination directory '{destination_directory}' ensured.")

    # Dictionary to store hashes of files already in the destination
    # Key: hash, Value: list of filenames with that hash in destination
    destination_file_hashes = defaultdict(list)
    print(f"Scanning existing files in '{destination_directory}' for hashes...")
    for root, _, files in os.walk(destination_directory):
        for file_name in files:
            # Skip .pyc and extensionless files when scanning destination too
            _name, ext = os.path.splitext(file_name)
            if ext.lower() == '.pyc' or not ext:
                continue

            dest_file_path = os.path.join(root, file_name)
            file_hash = calculate_file_hash(dest_file_path, hash_algorithm)
            if file_hash:
                destination_file_hashes[file_hash].append(file_name)
    print(f"Found {sum(len(v) for v in destination_file_hashes.values())} existing files in destination.")


    copied_count = 0
    skipped_duplicates_count = 0
    skipped_ignored_types_count = 0
    skipped_other_reasons_count = 0
    filename_conflicts_resolved_count = 0

    print(f"Starting file copy from '{source_directory}' to '{destination_directory}'...")

    for root, _, files in os.walk(source_directory):
        for file_name in files:
            source_file_path = os.path.join(root, file_name)

            # --- New Exclusion Logic ---
            _name, ext = os.path.splitext(file_name)
            if ext.lower() == '.pyc':
                print(f"  Skipped (Ignored Type): '{file_name}' is a .pyc file.")
                skipped_ignored_types_count += 1
                continue
            if not ext: # Check if there's no extension
                print(f"  Skipped (Ignored Type): '{file_name}' has no extension.")
                skipped_ignored_types_count += 1
                continue
            # --- End New Exclusion Logic ---

            source_file_hash = calculate_file_hash(source_file_path, hash_algorithm)

            if not source_file_hash:
                print(f"  Skipped (Hash Error): Could not get hash for '{source_file_path}'.")
                skipped_other_reasons_count += 1
                continue

            # Check if an identical file (by content hash) already exists in destination
            if source_file_hash in destination_file_hashes:
                print(f"  Skipped (Duplicate Content): '{file_name}' (from '{root}') already exists in destination.")
                skipped_duplicates_count += 1
                continue

            # Determine destination path, handling filename conflicts if content is new
            destination_file_path = os.path.join(destination_directory, file_name)
            original_file_name = file_name # Keep original for logging

            # Handle potential filename conflicts in the destination
            # This is for files with NEW content but conflicting names
            if os.path.exists(destination_file_path):
                name, ext = os.path.splitext(file_name)
                counter = 1
                while True:
                    new_file_name = f"{name}_{counter}{ext}"
                    temp_destination_path = os.path.join(destination_directory, new_file_name)
                    if not os.path.exists(temp_destination_path):
                        destination_file_path = temp_destination_path
                        file_name = new_file_name # Update file_name for logging and hash storage
                        filename_conflicts_resolved_count += 1
                        print(f"  Conflict: '{original_file_name}' already exists. Copying as '{new_file_name}'.")
                        break
                    counter += 1

            try:
                shutil.copy2(source_file_path, destination_file_path)
                # After successful copy, add its hash to our tracking
                destination_file_hashes[source_file_hash].append(file_name)
                print(f"  Copied: '{source_file_path}' -> '{destination_file_path}'")
                copied_count += 1
            except FileNotFoundError:
                print(f"  Skipped (Not Found): '{source_file_path}' (file might have been moved/deleted during traversal)")
                skipped_other_reasons_count += 1
            except PermissionError:
                print(f"  Skipped (Permission Denied): '{source_file_path}'")
                skipped_other_reasons_count += 1
            except Exception as e:
                print(f"  Skipped (Error): '{source_file_path}' - {e}")
                skipped_other_reasons_count += 1

    print("\n--- Copy Summary ---")
    print(f"Total files copied: {copied_count}")
    print(f"Total files skipped (duplicate content): {skipped_duplicates_count}")
    print(f"Total files skipped (ignored type): {skipped_ignored_types_count}")
    print(f"Total files skipped (other reasons/errors): {skipped_other_reasons_count}")
    print(f"Filename conflicts resolved by renaming: {filename_conflicts_resolved_count}")
    print(f"All unique files consolidated to: '{destination_directory}'")

if __name__ == "__main__":
    source_dir = "/Users/anushverma/Desktop/lumar/flask_app"
    destination_dir = "/Users/anushverma/Desktop/lumar/copy"
    hashing_algo = 'md5' # or 'sha256'

    copy_all_files_to_one_folder_no_duplicates(source_dir, destination_dir, hashing_algo)

Destination directory '/Users/anushverma/Desktop/lumar/copy' ensured.
Scanning existing files in '/Users/anushverma/Desktop/lumar/copy' for hashes...
Found 0 existing files in destination.
Starting file copy from '/Users/anushverma/Desktop/lumar/flask_app' to '/Users/anushverma/Desktop/lumar/copy'...
  Copied: '/Users/anushverma/Desktop/lumar/flask_app/run.py' -> '/Users/anushverma/Desktop/lumar/copy/run.py'
  Copied: '/Users/anushverma/Desktop/lumar/flask_app/config.py' -> '/Users/anushverma/Desktop/lumar/copy/config.py'
  Copied: '/Users/anushverma/Desktop/lumar/flask_app/app/__init__.py' -> '/Users/anushverma/Desktop/lumar/copy/__init__.py'
  Copied: '/Users/anushverma/Desktop/lumar/flask_app/app/core/credentials.py' -> '/Users/anushverma/Desktop/lumar/copy/credentials.py'
  Copied: '/Users/anushverma/Desktop/lumar/flask_app/app/core/database_client.py' -> '/Users/anushverma/Desktop/lumar/copy/database_client.py'
  Copied: '/Users/anushverma/Desktop/lumar/flask_app/app/core/data_loa