In [30]:
import hashlib
import csv
from prettytable import PrettyTable

# Generate globally unique deterministic strings
def generate_unique_strings(total, length):
    unique_strings = [f"{str(i).zfill(length)}" for i in range(1, total + 1)]
    return unique_strings

# Hash function to compute full hash
def compute_full_hash(input_string, algorithm):
    hash_function = hashlib.new(algorithm)
    hash_function.update(input_string.encode('utf-8'))
    return hash_function.hexdigest()

# Test collision resistance
def test_collision_resistance(algorithm, string_sizes, iterations_list):
    results = []
    for size in string_sizes:
        # Generate a single set of unique strings for all iterations
        max_iterations = max(iterations_list)
        unique_strings = generate_unique_strings(max_iterations, size)
        
        for iterations in iterations_list:
            hash_set = set()
            collisions = 0
            for i in range(iterations):
                full_hash = compute_full_hash(unique_strings[i], algorithm)
                if full_hash in hash_set:
                    collisions += 1
                hash_set.add(full_hash)
            collision_percentage = (collisions / iterations) * 100
            results.append([algorithm, size, iterations, collisions, collision_percentage])
    return results

# Main function
def main():
    # Algorithms to test
    algorithms = ['md5', 'sha1', 'sha256', 'blake2b']
    # String sizes to test
    string_sizes = [4, 8, 16, 32]
    # Number of iterations to test
    iterations_list = [10000, 100000, 1000000, 10000000]

    # Prepare PrettyTable for terminal output
    table = PrettyTable()
    table.field_names = ["Algorithm", "String Size", "Iterations", "Collisions", "Collision %"]

    # CSV file to store results
    with open('hashing_collision_results.csv', mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Algorithm", "String Size", "Iterations", "Collisions", "Collision Percentage (%)"])

        for algo in algorithms:
            results = test_collision_resistance(algo, string_sizes, iterations_list)
            for result in results:
                formatted_percentage = f"{result[4]:.10f}"
                # Add results to PrettyTable
                table.add_row([result[0], result[1], result[2], result[3], formatted_percentage])
                # Write results to CSV
                writer.writerow([result[0], result[1], result[2], result[3], formatted_percentage])

    # Print PrettyTable
    print(table)

if __name__ == "__main__":
    main()


+-----------+-------------+------------+------------+--------------+
| Algorithm | String Size | Iterations | Collisions | Collision %  |
+-----------+-------------+------------+------------+--------------+
|    md5    |      4      |   10000    |     0      | 0.0000000000 |
|    md5    |      4      |   100000   |     0      | 0.0000000000 |
|    md5    |      4      |  1000000   |     0      | 0.0000000000 |
|    md5    |      4      |  10000000  |     0      | 0.0000000000 |
|    md5    |      8      |   10000    |     0      | 0.0000000000 |
|    md5    |      8      |   100000   |     0      | 0.0000000000 |
|    md5    |      8      |  1000000   |     0      | 0.0000000000 |
|    md5    |      8      |  10000000  |     0      | 0.0000000000 |
|    md5    |      16     |   10000    |     0      | 0.0000000000 |
|    md5    |      16     |   100000   |     0      | 0.0000000000 |
|    md5    |      16     |  1000000   |     0      | 0.0000000000 |
|    md5    |      16     |  10000