# Prime search
In this notebook we want to factorize numbers, that is, we want to search for prime numbers. We split the search task to the CPU cores available. Once one prime number has been found by a process of a CPU core it must be communicated to the other cores, by interprocess communication (IPC), so that they can work on searching for other unknown primes. 

In [4]:
import math
import time
import timeit
from multiprocessing import Pool
import create_range

## Serial prime search
We use only one core to search for primes, within a range of numbers, one after the others. Once we found a prime factor we simply continue to look for primes in the complement factor that results from dividing the original number by the prime factor. In this strategy only one core is used and there is no need for communication.

In [5]:
def check_prime(n):
    if n % 2 == 0:
        return False
    for i in range(3, int(math.sqrt(n)) + 1, 2):
        if n % i == 0:
            return False
    return True

In [7]:
check_prime(101)

True

In [4]:
primes = []
t1 = time.time()
#number_range = xrange(100000000, 100010000)  # A
#number_range = xrange(100000000, 100100000)  # B
number_range = range(100000000, 101000000)  # C
#number_range = xrange(1000000000, 1000100000)  # D
#number_range = xrange(100000000000, 100000100000)  # E

for possible_prime in number_range:
    if check_prime(possible_prime):
        primes.append(possible_prime)
exec_time = time.time() - t1
print('Took: {:.2f}'.format(exec_time) )
print(len(primes), primes[:10], primes[-10:])

Took: 57.33
54208 [100000007, 100000037, 100000039, 100000049, 100000073, 100000081, 100000123, 100000127, 100000193, 100000213] [100999889, 100999897, 100999901, 100999903, 100999919, 100999939, 100999949, 100999979, 100999981, 100999993]


## Naive pool prime search
We split the range of numbers to search for primes and give one to the CPU core available 

In [2]:
def check_prime_in_range(n_from_i_to_i):
    (n, (from_i, to_i)) = n_from_i_to_i
    if n % 2 == 0:
        return False
    assert from_i % 2 != 0
    for i in range(from_i, int(to_i), 2):
        if n % i == 0:
            return False
    return True


def check_prime(n, pool, nbr_processes):
    from_i = 3
    to_i = int(math.sqrt(n)) + 1
    ranges_to_check = create_range.create(from_i, to_i, nbr_processes)
    ranges_to_check = list(zip(len(ranges_to_check) * [n], ranges_to_check))
    assert len(ranges_to_check) == nbr_processes
    results = pool.map(check_prime_in_range, ranges_to_check)
    if False in results:
        return False
    return True

In [None]:
primes = []
NBR_PROCESSES = 4
pool = multiprocessing.Pool(processes=NBR_PROCESSES)

t1 = time.time()
#number_range = xrange(100000000, 100010000)  # A
#number_range = xrange(100000000, 100100000)  # B
number_range = range(100000000, 101000000)  # C
#number_range = xrange(1000000000, 1000100000)  # D
#number_range = xrange(100000000000, 100000100000)  # E

#are_primes = pool.map(check_prime, number_range)  # original
#primes = np.array(number_range)[np.array(are_primes)]  # original
#
# note using pool.map is fastest, but uses ram
# using pool.imap is slower but uses less ram
# pool.imap_unordered is even slower
are_primes = pool.map(check_prime, number_range)
primes = [p for p in itertools.compress(number_range, are_primes)]
exec_time = time.time() - t1
print('Took: {:.2f}'.format(exec_time) )
print(len(primes), primes[:10], primes[-10:])

In [None]:
NBR_PROCESSES = 4
pool = Pool(processes=NBR_PROCESSES)
#import pdb; pdb.set_trace()
print("Testing with {} processes".format(NBR_PROCESSES))
for label, nbr in [("trivial non-prime", 112272535095295),
                       ("expensive non-prime18_1", 100109100129100369),
                       ("expensive non-prime18_2", 100109100129101027),
                       #("prime", 112272535095293)]:  # 15
                       #("prime17",  10000000002065383)]
                       ("prime18_1", 100109100129100151),
                       ("prime18_2", 100109100129162907)]:
                       #("prime23", 22360679774997896964091)]:
        time_costs = timeit.repeat(stmt="check_prime({}, pool, {})".format(nbr, NBR_PROCESSES), repeat=20, number=1,
                                   setup="from __main__ import pool, check_prime")
        print("{:19} ({}) {: 3.6f}s".format(label, nbr, min(time_costs)))

Testing with 4 processes
