In [135]:
'''
PrimeDiffEx Prime DIfference Explorer - V0.2 Alpha 

Changelog: Working on it. 15 July 2023


'''
import gzip
import random
from math import gcd
from sympy import isprime, simplify
from fractions import Fraction
from collections import deque, Counter
from itertools import islice
import time
import json
import csv
import os
from datetime import datetime
import pickle
import shutil
from itertools import product
from collections import defaultdict


# The Miller-Rabin primality test is a probabilistic primality test: an algorithm which 
# determines whether a given number is likely to be prime, similar to the Fermat primality test 
# and the Solovay-Strassen primality test. Its original version, as described by Miller 
# (who proved that the test is correct for all prime numbers) and extended by Rabin (who 
# dealt with the composite, or non-prime, numbers), is deterministic, but the determinism 
# relies on the unproven generalized Riemann hypothesis. Michael O. Rabin modified it to 
# obtain an unconditional probabilistic algorithm.
def miller_rabin(n, k):  # number of tests
    if n < 2:
        return False
    for p in [2, 3, 5, 7, 11, 13, 17, 19, 23]:
        if n % p == 0:
            return n == p
    r, s = 0, n - 1
    while s % 2 == 0:
        r += 1
        s //= 2
    for _ in range(k):
        a = random.randrange(2, n - 1)
        x = pow(a, s, n)
        if x == 1 or x == n - 1:
            continue
        for _ in range(r - 1):
            x = pow(x, 2, n)
            if x == n - 1:
                break
        else:
            return False
    return True
    
# Find the next prime number greater than the input number. Uses the Miller-Rabin primality test.
def find_next_prime(start_number, miller_rabin_iterations):
    if start_number % 2 == 0:
        start_number += 1
    else:
        start_number += 2
    number = start_number
    while True:
        is_prime = miller_rabin(number, miller_rabin_iterations)
        if is_prime:
            return number
        number += 2

# Find a sequence of prime numbers, starting from a specified number.
def find_prime_sequence(start_number, num_primes, miller_rabin_iterations, verbose):
    primes = []
    current_number = start_number
    if num_primes > 1000:
        feedback_factor = 1000
    else:
        feedback_factor = num_primes / 2
    while len(primes) < num_primes:
        current_number = find_next_prime(current_number, miller_rabin_iterations)
        primes.append(current_number)
        if verbose and len(primes) % (num_primes // feedback_factor) == 0:  # Report progress every 0.1%
            print(f"\r{100.0 * len(primes) / num_primes} % done    ", end="")
    print(f"\r{100.0} % done             ", end="")
    print()  # Print a newline at the end to move the cursor to the next line
    return primes

# Calculate the second difference for a sequence of numbers.
def calculate_second_differences(primes):
    gaps = [b - a for a, b in zip(primes[:-1], primes[1:])]
    second_differences = [b - a for a, b in zip(gaps[:-1], gaps[1:])]
    return second_differences


# Calculate the second ratio for a sequence of numbers.
def calculate_second_ratios(primes):
    gaps = [b - a for a, b in zip(primes[:-1], primes[1:])]
    second_differences = [b - a for a, b in zip(gaps[:-1], gaps[1:])]
    second_sums = [a + b for a, b in zip(gaps[:-1], gaps[1:])]
    second_ratios = [Fraction(sd, ss).limit_denominator() if ss != 0 else None for sd, ss in zip(second_differences, second_sums)]
    return second_ratios

# Generate a random number with a specific number of bits.
def generate_random_number(num_bits):
    return random.randint(2**(num_bits-1), 2**num_bits - 1)

# Create a directory to store dataset output files.
def create_output_directory(num_bits, num_primes):
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    directory_name = f"{num_bits}bit{num_primes}_{timestamp}"
    directory = os.path.join(os.getcwd(), directory_name)
    os.makedirs(directory, exist_ok=True)
    return directory

# Write the sequence of primes with their second differences and second ratios to a CSV file.
def write_output_to_csv(primes, sd, sr, base_filename):
    filename = f"{base_filename}_primes.csv"
    with open(filename, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Prime", "Second Difference", "Second Ratio (Fraction)", "Second Ratio (Decimal)"])
        for i in range(len(sd)):  # Include all second differences and second ratios
            writer.writerow([
                str(primes[i+1])[-10:],  # Write the prime number associated with each second difference and second ratio
                sd[i] if i < len(sd) else None, 
                str(sr[i]) if i < len(sr) else None, 
                float(sr[i]) if (i < len(sr) and sr[i] is not None) else None
            ])
            
    filename = f"{base_filename}_sd.csv"
    with open(filename, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Second Difference", "Count", "Percentage"])
        sd_counter = Counter(sd)
        total_count = len(sd)
        for sd, count in sd_counter.most_common():
            writer.writerow([sd, count, 100 * count / total_count])

    filename = f"{base_filename}_sr.csv"
    with open(filename, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Second Ratio (Fraction)", "Count", "Percentage"])
        sr_counter = Counter(sr)
        total_count = len(sr)
        for sr, count in sr_counter.most_common():
            writer.writerow([str(sr), count, 100 * count / total_count])

    filename = f"{base_filename}_state.pkl.gz"
    with gzip.open(filename, 'wb') as file:
        pickle.dump((primes, sd, sr), file)

def calculate_sd_sr_combinations(sd, sr):
    sd_sr_combinations = list(zip(sd, sr))
    return Counter(sd_sr_combinations)

# Write the combinations of second differences and second ratios to a CSV file.
def write_sd_sr_combinations_to_csv(sd_sr_combinations, base_filename):
    total_count = sum(sd_sr_combinations.values())
    filename = f"{base_filename}_sd_sr_combinations.csv"
    with open(filename, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Second Difference", "Second Ratio (Fraction)", "Count", "Percentage"])
        for (sd, sr), count in sd_sr_combinations.most_common():
            writer.writerow([sd, str(sr), count, 100 * count / total_count])

        
# Find sets of primes with specific differences (named prime sets, like "twin primes").
def find_named_prime_sets(primes, prime_sets, num_digits):
    # Define the names associated with each prime set
    # This dictionary could be extended if other prime sets become of interest
    named_prime_sets = {
        2: "Twin primes",
        4: "Cousin primes",
        6: "Sexy primes",
        8: "Octo primes",
        10: "Deca primes",
        12: "Dodeca primes"
    }

    # Initialize an empty dictionary for each prime set
    # This will be populated with pairs of primes that belong to each set
    prime_sets_to_find = {named_prime_sets[set]: [] for set in prime_sets}

    # Scan through the primes, checking the difference between each pair
    for i in range(len(primes) - 1):
        diff = primes[i + 1] - primes[i]

        # If the difference matches a prime set, add the pair to the dictionary
        for prime_set in prime_sets:
            if diff == prime_set:
                prime_sets_to_find[named_prime_sets[prime_set]].append((str(primes[i])[-num_digits:], str(primes[i + 1])[-num_digits:]))

    # Return the dictionary, which contains all the prime pairs for each specified prime set
    return prime_sets_to_find


# Write the totals of named prime sets to a CSV file.
def write_named_prime_sets_totals_to_csv(named_prime_sets, base_filename):
    filename = f"{base_filename}_named_prime_sets_totals.csv"
    with open(filename, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Name", "Total"])
        for name in named_prime_sets.keys():
            writer.writerow([name, len(named_prime_sets[name])])

            
# Write the named prime sets to a CSV file.            
def write_named_prime_sets_to_csv(named_prime_sets, base_filename):
    filename = f"{base_filename}_named_prime_sets.csv"
    with open(filename, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Name", "Prime Set"])
        for name in named_prime_sets.keys():
            for prime_set in named_prime_sets[name]:
                writer.writerow([name, prime_set])

def write_metadata_file(output_directory, primes, num_bits, num_primes, num_digits):
    metadata = {
        "first_prime": primes[0],
        "last_prime": primes[-1],
        "num_bits": num_bits,
        "num_primes": num_primes,
        "num_digits": num_digits,
        "left_digits": str(primes[0])[:-num_digits] if len(str(primes[0])) > num_digits else str(primes[0])
    }
    with open(os.path.join(output_directory, "metadata.json"), 'w') as metadata_file:
        json.dump(metadata, metadata_file, indent=4)

# Write the primes, second differences, and second ratios to a CSV file.
def write_primes_to_csv(primes, second_differences, second_ratios, base_filename, num_digits):
    with open(base_filename + "_primes.csv", 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Prime", "Second Difference", "Second Ratio"])
        for i in range(len(second_differences)):
            writer.writerow([str(primes[i + 1])[ -num_digits:], second_differences[i], second_ratios[i]])




# Write the second differences to a CSV file.
def write_second_differences_to_csv(second_differences, base_filename):
    filename = base_filename + "_sd.csv"
    sd_counter = Counter(second_differences)
    total_counts = len(second_differences)
    with open(filename, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Second Difference", "Count", "Percentage"])
        for sd, count in sd_counter.most_common():
            writer.writerow([sd, count, count / total_counts * 100])

            
# Write the second ratios to a CSV file.
def write_second_ratios_to_csv(second_ratios, base_filename):
    filename = base_filename + "_sr.csv"
    sr_counter = Counter(second_ratios)
    total_counts = len(second_ratios)
    with open(filename, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Second Ratio", "Count", "Percentage"])
        for sr, count in sr_counter.most_common():
            writer.writerow([sr, count, count / total_counts * 100])
            
def write_state_to_pickle(primes, sd, sr, base_filename):
    filename = f"{base_filename}_state.pkl.gz"
    with gzip.open(filename, 'wb') as file:
        pickle.dump((primes, sd, sr), file)

            
def load_pickle_file(pickle_file_path):
    with gzip.open(pickle_file_path, 'rb') as file:
        data = pickle.load(file)
    return data

def unload_variables_from_pickle_file(pickle_file_path):
    # Define a list of variable names that we expect to be loaded from the pickle file
    variable_names = ['primes', 'sd', 'sr', 'sd_sr_combinations', 'named_prime_sets']

    # Get the current global and local symbol tables
    global_vars = globals()
    local_vars = locals()

    # Delete the variables from the global and local symbol tables
    for var_name in variable_names:
        if var_name in global_vars:
            del global_vars[var_name]
        if var_name in local_vars:
            del local_vars[var_name]

            

def run_from_config(config_file):
    with open(config_file, 'r') as file:
        config = json.load(file)
        
    # Set the random seed from the config file, if provided
    random.seed(config.get('random_seed', None))

    miller_rabin_iterations = config.get('miller_rabin_iterations', 5)  # Use 5 as the default

    if config['start_number'] == "random":
        start_number = generate_random_number(config['num_bits'])
    else:
        start_number = config['start_number']

    print("Generating primes...")
    primes = find_prime_sequence(start_number, config['num_primes'], miller_rabin_iterations, verbose=True)
    # Calculate number of digits which can be safely truncated for auto
    num_digits = config.get('num_digits', None)
    if num_digits is not None:
        if num_digits == "auto":
            num_digits = len(str(primes[-1] - primes[0])) + 2
    #When num_digits is null in config, set to 10. 
    else: num_digits = 10   
    print("Calculating named prime sets...")
    prime_sets = config.get('prime_sets', [2, 4, 6, 8, 10, 12])
    named_prime_sets = find_named_prime_sets(primes, prime_sets, num_digits)
    print("Done!")
    print("Calculating second differences...")
    second_differences = calculate_second_differences(primes)
    print("Calculating second ratios...")
    second_ratios = calculate_second_ratios(primes)
    print("Calculating SD-SR combinations...")
    sd_sr_combinations = calculate_sd_sr_combinations(second_differences, second_ratios)
    sd_sr_combinations = Counter(sd_sr_combinations) 
    print("Calculating named prime sets...")
    named_prime_sets = find_named_prime_sets(primes, prime_sets, num_digits)
    print("Done!")
    
    if config['write_output']:
        output_directory = create_output_directory(config['num_bits'], config['num_primes'])
        print(f"Full path to the output directory: {os.path.abspath(output_directory)}")
        print(f"Current working directory: {os.getcwd()}")
        # Copy the configuration file to the output directory
        shutil.copy2(config_file, os.path.join(output_directory, "config.json"))
        # Write metadata file 
        write_metadata_file(output_directory, primes, config['num_bits'], config['num_primes'], num_digits)

        base_filename = os.path.join(output_directory, f"{config['num_bits']}bit{config['num_primes']}")


        if config.get('output_primes', True):
            write_primes_to_csv(primes, second_differences, second_ratios, base_filename, num_digits)
            write_state_to_pickle(primes, second_differences, second_ratios, base_filename)
        if config.get('output_second_differences', True):
            write_second_differences_to_csv(second_differences, base_filename)
        if config.get('output_second_ratios', True):
            write_second_ratios_to_csv(second_ratios, base_filename)
        if config.get('output_sd_sr_combinations', True):
            write_sd_sr_combinations_to_csv(sd_sr_combinations, base_filename)
        if config.get('output_named_prime_sets', True):
            write_named_prime_sets_to_csv(named_prime_sets, base_filename)
        if config.get('output_named_prime_sets_totals', True):
            write_named_prime_sets_totals_to_csv(named_prime_sets, base_filename)


        
    return primes, second_differences, second_ratios, sd_sr_combinations, named_prime_sets


In [136]:
!cat config.json

{
"random_seed": 1234,
"num_bits": 256,
"num_primes": 10000,
"start_number": "random",
"write_output": true,
"output_primes": true,
"output_second_differences": true,
"output_second_ratios": true,
"output_sd_sr_combinations": true,
"output_named_prime_sets": true,
"output_named_prime_sets_totals": true,
"miller_rabin_iterations": 5,
"num_digits": "auto",
"prime_sets": [2, 4]
}

In [137]:
primes, sd, sr, sd_sr_combinations, named_prime_sets = run_from_config('config.json')

Generating primes...
100.0 % done             
Calculating named prime sets...
Done!
Calculating second differences...
Calculating second ratios...
Calculating SD-SR combinations...
Calculating named prime sets...
Done!
Full path to the output directory: /Users/tem/primejuly/256bit10000_20230715_195759
Current working directory: /Users/tem/primejuly


In [139]:
primes[0]


69292538960327306049903804807362518857207638024876774494604948273643417497347

In [140]:
primes[1]

69292538960327306049903804807362518857207638024876774494604948273643417497749

In [142]:
primes[-1] - primes[0]

1768354

In [143]:
!cat config.json

{
"random_seed": 1234,
"num_bits": 8,
"num_primes": 100,
"start_number": "random",
"write_output": true,
"output_primes": true,
"output_second_differences": true,
"output_second_ratios": true,
"output_sd_sr_combinations": true,
"output_named_prime_sets": true,
"output_named_prime_sets_totals": true,
"miller_rabin_iterations": 5,
"num_digits": "auto",
"prime_sets": [2, 4]
}

In [144]:
primes, sd, sr, sd_sr_combinations, named_prime_sets = run_from_config('config.json')

Generating primes...
2.0 % done    4.0 % done    6.0 % done    8.0 % done    10.0 % done    12.0 % done    14.0 % done    16.0 % done    18.0 % done    20.0 % done    22.0 % done    24.0 % done    26.0 % done    28.0 % done    30.0 % done    32.0 % done    34.0 % done    36.0 % done    38.0 % done    40.0 % done    42.0 % done    44.0 % done    46.0 % done    48.0 % done    50.0 % done    52.0 % done    54.0 % done    56.0 % done    58.0 % done    60.0 % done    62.0 % done    64.0 % done    66.0 % done    68.0 % done    70.0 % done    72.0 % done    74.0 % done    76.0 % done    78.0 % done    80.0 % done    82.0 % done    84.0 % done    86.0 % done    88.0 % done    90.0 % done    92.0 % done    94.0 % done    96.0 % done    98.0 % done    100.0 % done    100.0 % done             
Calculating named prime sets...
Done!
Calculating second differences...
Calculating second ratios...
Calculating SD-SR combinations...
Calculating named pr

In [145]:
primes[0]

241

In [146]:
primes[-1]

881

In [123]:
!cat config.json

{
"random_seed": 1234,
"num_bits": 256,
"num_primes": 10000,
"start_number": "random",
"write_output": true,
"output_primes": true,
"output_second_differences": true,
"output_second_ratios": true,
"output_sd_sr_combinations": true,
"output_named_prime_sets": true,
"output_named_prime_sets_totals": true,
"miller_rabin_iterations": 5,
"num_digits": "auto",
"prime_sets": [2, 4]
}

In [147]:
primes, sd, sr, sd_sr_combinations, named_prime_sets = run_from_config('config.json')

Generating primes...
100.0 % done             
Calculating named prime sets...
Done!
Calculating second differences...
Calculating second ratios...
Calculating SD-SR combinations...
Calculating named prime sets...
Done!
Full path to the output directory: /Users/tem/primejuly/256bit10000_20230715_200352
Current working directory: /Users/tem/primejuly


In [148]:
primes[0]

69292538960327306049903804807362518857207638024876774494604948273643417497347

In [149]:
primes[-1]

69292538960327306049903804807362518857207638024876774494604948273643419265701

In [128]:
primes, sd, sr = load_pickle_file("8bit100_20230715_194529/8bit100_state.pkl.gz")

In [129]:
primes[0]

211

In [130]:
primes[-1]

839

In [131]:
primes, sd, sr = load_pickle_file("256bit10000_20230715_194709/256bit10000_state.pkl.gz")

In [132]:
primes[0]

70302485604775097247182212860283555474117777204735597059345894443870799219947

In [151]:
primes[1]

69292538960327306049903804807362518857207638024876774494604948273643417497749

In [154]:
!cat config.json #random_seed removed

{
"num_bits": 256,
"num_primes": 10000,
"start_number": "random",
"write_output": true,
"output_primes": true,
"output_second_differences": true,
"output_second_ratios": true,
"output_sd_sr_combinations": true,
"output_named_prime_sets": true,
"output_named_prime_sets_totals": true,
"miller_rabin_iterations": 5,
"num_digits": "auto",
"prime_sets": [2, 4]
}

In [155]:
primes, sd, sr, sd_sr_combinations, named_prime_sets = run_from_config('config.json')

Generating primes...
100.0 % done             
Calculating named prime sets...
Done!
Calculating second differences...
Calculating second ratios...
Calculating SD-SR combinations...
Calculating named prime sets...
Done!
Full path to the output directory: /Users/tem/primejuly/256bit10000_20230715_201909
Current working directory: /Users/tem/primejuly


In [156]:
primes[0]

69263454140202274896666040813147753680858842313562504511430922677465351239389

In [157]:
primes[-1]

69263454140202274896666040813147753680858842313562504511430922677465353037341

In [158]:
primes, sd, sr, sd_sr_combinations, named_prime_sets = run_from_config('config.json')

Generating primes...
100.0 % done             
Calculating named prime sets...
Done!
Calculating second differences...
Calculating second ratios...
Calculating SD-SR combinations...
Calculating named prime sets...
Done!
Full path to the output directory: /Users/tem/primejuly/256bit10000_20230715_203258
Current working directory: /Users/tem/primejuly


In [159]:
primes[0]

100091444983865287664752696465402507486682355969726198818862562353753518995139

In [160]:
primes[-1]

100091444983865287664752696465402507486682355969726198818862562353753520765403

In [161]:
primes!cat config.json #Random Seed set

{
"random_seed": 9999
"num_bits": 256,
"num_primes": 10000,
"start_number": "random",
"write_output": true,
"output_primes": true,
"output_second_differences": true,
"output_second_ratios": true,
"output_sd_sr_combinations": true,
"output_named_prime_sets": true,
"output_named_prime_sets_totals": true,
"miller_rabin_iterations": 5,
"num_digits": "auto",
"prime_sets": [2, 4]
}

In [163]:
primes, sd, sr, sd_sr_combinations, named_prime_sets = run_from_config('config.json')

Generating primes...
100.0 % done             
Calculating named prime sets...
Done!
Calculating second differences...
Calculating second ratios...
Calculating SD-SR combinations...
Calculating named prime sets...
Done!
Full path to the output directory: /Users/tem/primejuly/256bit10000_20230715_203549
Current working directory: /Users/tem/primejuly


In [164]:
primes[0]

80421540044032553066117126094912829512166524641699413272439915632248591921089

In [165]:
primes[-1]

80421540044032553066117126094912829512166524641699413272439915632248593720901

In [166]:
primes[-1]-primes[0]

1799812

In [167]:
primes, sd, sr, sd_sr_combinations, named_prime_sets = run_from_config('config.json')

Generating primes...
100.0 % done             
Calculating named prime sets...
Done!
Calculating second differences...
Calculating second ratios...
Calculating SD-SR combinations...
Calculating named prime sets...
Done!
Full path to the output directory: /Users/tem/primejuly/256bit10000_20230715_203740
Current working directory: /Users/tem/primejuly


In [168]:
primes[0]

80421540044032553066117126094912829512166524641699413272439915632248591921089

In [169]:
primes[-1]

80421540044032553066117126094912829512166524641699413272439915632248593720901

In [None]:
# The random seed works!