In [2]:
from typing import (
    List,
    Iterable,
    Set
)
import sys
import random
import logging
import numpy as np
import matplotlib.pyplot as plt 

In [3]:
np.set_printoptions(threshold=sys.maxsize)
np.set_printoptions(linewidth=80) 
 
#!conda install line_profile -c conda-forge
%load_ext line_profiler
%load_ext memory_profiler

In [4]:
N = 50000

In [5]:
logging.basicConfig(level=logging.ERROR)
LOGGER = logging.getLogger(__name__)

# Approach 01

Start from 2, get a prime number and remove its divisible numbers from the set of numbers ```numbers = range(2, n+1)```. 

By removing the divisible numbers of prime p, the minimum number in the ```numbers``` becomes automatically the next prime number.

<img src="images/integer_is_prime_plus_divisibles.JPG" align="left" width=650/>

## SET version 
Using Set instead of List to manage the numbers and primes.

In [6]:
def primes(n: int) -> list:
    numbers = set(range(2, n+1))
    primes = set([])
    while len(numbers) > 0:
        LOGGER.debug("len(numbers) is %s", len(numbers))

        # --------------------------------------------------------------------------------
        # The minimum number in the set is always the smallest prime number.
        # --------------------------------------------------------------------------------
        prime: int = min(numbers)
        primes.add(prime)
        LOGGER.debug("prime [%s] for the current numbers %s", prime, numbers)

        # ================================================================================
        # Remove all the numbers divisible by the prime.
        # ================================================================================
        numbers.remove(prime)

        # --------------------------------------------------------------------------------
        # Suppose 'numbers' is a List, instead of Set:
        # When prime=p, the numbers which are still in 'numbers' and between (p, p^2), that 
        # is numbers[p: p^2 +1], are all primes.
        #
        # Becuse for p, divisible numbers are p * P(C(primes, k)), where P(C(primes,k)) is
        # multiplications of all the possible combinations of primes C(primes, k) k:1,2,..
        # e.g. for primes[2,3,5] and k:1,2,3:
        # C([2,3,5], 1) = (2,3,5)        
        # C([2,3,5], 2) = C([2,3,5], 1) + (  2^2,   2*3,   2*5,   3^2,   3*5,   5^2) 
        # C([2,3,5], 3) = C([2,3,5], 2) + (2*2^2, 2^2*3, 2^2*5, 2*3^2, 2*3*5, 2*5^2, 3*3^2, 3*3*5, 3*5^2, 5^3)
        # 
        # For p=5, all divisible numbers less than p^2, those within range(2, 25), are
        # P(C([2,3,5], k=1,2,3)) and the primes in ( numbers - P(C([2,3,5], k=1,2,3)) )
        # P([2], k) = (2,   4,   6,  8,  10,   12,   14,   16,   18,   20,   22,   24   ) 
        # P([3], k) = (  3,      x     9,      x        15,      x        21,      x    ) 
        # P([5], k) = (       5,         x              x              x              25)
        # numbers   = (            7,       11,   13,         17,   19,         23,     )
        # 
        # Therefore, for p(e.g=5), there is no divisible number in numbers[2: p^2].
        # --------------------------------------------------------------------------------
        start = np.square(prime)
        if start > n:
            # --------------------------------------------------------------------------------
            # All renaming numbers are primes because range(p, p^2) has primes only.
            # Hence no more processing to remove divisibles are required.
            # --------------------------------------------------------------------------------
            primes.update(numbers)
            break
        else:
            divisibles = set(range(start, n+1, prime))
            LOGGER.debug("primes(): divisibles are %s", divisibles)
            
            # set(this) -= set(*others) will not cause an error when others has an element
            # which this set does not has.
            numbers -= divisibles
            LOGGER.debug("primes(): numbers after removing divisibles are %s", numbers)
    
    # return primes
    _primes = list(primes)
    _primes.sort()
    return _primes

In [7]:
%lprun \
    -T primes.log \
    -f primes \
    primes(N)

print(open('primes.log', 'r').read())


*** Profile printout saved to text file 'primes.log'. 
Timer unit: 1e-06 s

Total time: 0.068038 s
File: <ipython-input-6-8cbdd747160d>
Function: primes at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def primes(n: int) -> list:
     2         1       2963.0   2963.0      4.4      numbers = set(range(2, n+1))
     3         1          7.0      7.0      0.0      primes = set([])
     4        49        129.0      2.6      0.2      while len(numbers) > 0:
     5        49        820.0     16.7      1.2          LOGGER.debug("len(numbers) is %s", len(numbers))
     6                                           
     7                                                   # --------------------------------------------------------------------------------
     8                                                   # The minimum number in the set is always the smallest prime number.
     9                                      

In [8]:
%%timeit
primes(N)

40.5 ms ± 3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [9]:
%%memit
primes(N)

peak memory: 100.65 MiB, increment: 5.94 MiB


# List version
Use List instead of Set to manage numbers and primes.

In [10]:
def primes_list_version(n: int) -> set:
    numbers = list(range(2, n+1))
    primes = []
    while len(numbers) > 0:
        LOGGER.debug("len(numbers) is %s", len(numbers))

        # --------------------------------------------------------------------------------
        # The minimum number in the set is always the smallest prime number.
        # --------------------------------------------------------------------------------
        prime: int = numbers[0]
        primes.append(prime)
        LOGGER.debug("prime [%s] for the current numbers %s", prime, numbers)

        # ================================================================================
        # Remove all the numbers divisible by the prime.
        # ================================================================================
        numbers.remove(prime)

        # --------------------------------------------------------------------------------
        # Suppose 'numbers' is a List, instead of Set:
        # When prime=p, the numbers which are still in 'numbers' and between (p, p^2), that 
        # is numbers[p: p^2 +1], are all primes.
        #
        # Becuse for p, divisible numbers are p * P(C(primes, k)), where P(C(primes,k)) is
        # multiplications of all the possible combinations of primes C(primes, k) k:1,2,..
        # e.g. for primes[2,3,5] and k:1,2,3:
        # C([2,3,5], 1) = (2,3,5)        
        # C([2,3,5], 2) = C([2,3,5], 1) + (  2^2,   2*3,   2*5,   3^2,   3*5,   5^2) 
        # C([2,3,5], 3) = C([2,3,5], 2) + (2*2^2, 2^2*3, 2^2*5, 2*3^2, 2*3*5, 2*5^2, 3*3^2, 3*3*5, 3*5^2, 5^3)
        # 
        # For p=5, all divisible numbers less than p^2, those within range(2, 25), are
        # P(C([2,3,5], k=1,2,3)) and the primes in ( numbers - P(C([2,3,5], k=1,2,3)) )
        # P([2], k) = (2,   4,   6,  8,  10,   12,   14,   16,   18,   20,   22,   24   ) 
        # P([3], k) = (  3,      x     9,      x        15,      x        21,      x    ) 
        # P([5], k) = (       5,         x              x              x              25)
        # numbers   = (            7,       11,   13,         17,   19,         23,     )
        # 
        # Therefore, for p(e.g=5), there is no divisible number in numbers[2: p^2].
        # --------------------------------------------------------------------------------
        start = np.square(prime)
        if start > n:
            # --------------------------------------------------------------------------------
            # All renaming numbers are primes because range(p, p^2) has primes only.
            # Hence no more processing to remove divisibles are required.
            # --------------------------------------------------------------------------------
            LOGGER.debug("primes(): start %s > %s. Break the loop", start, n)
            primes.extend(numbers)
            break
        else:
            divisibles = range(start, n+1, prime)
            LOGGER.debug("primes(): divisibles are %s", list(divisibles))

            for d in divisibles:
                try:
                    numbers.remove(d)
                except ValueError as e:
                    LOGGER.debug("Removing %s caused %s", d, e)
                    pass
                
            LOGGER.debug("primes(): numbers after removing divisibles are %s", numbers)
        
    return primes

In [11]:
%lprun \
    -T primes_list_version.log \
    -f primes_list_version \
    primes_list_version(N)

print(open('primes_list_version.log', 'r').read())


*** Profile printout saved to text file 'primes_list_version.log'. 
Timer unit: 1e-06 s

Total time: 12.3252 s
File: <ipython-input-10-b8a86aefd36d>
Function: primes_list_version at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def primes_list_version(n: int) -> set:
     2         1       2123.0   2123.0      0.0      numbers = list(range(2, n+1))
     3         1          2.0      2.0      0.0      primes = []
     4        49        122.0      2.5      0.0      while len(numbers) > 0:
     5        49        208.0      4.2      0.0          LOGGER.debug("len(numbers) is %s", len(numbers))
     6                                           
     7                                                   # --------------------------------------------------------------------------------
     8                                                   # The minimum number in the set is always the smallest prime number.
     9    

In [12]:
%%timeit
primes_list_version(N)

18.4 s ± 4.43 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [13]:
%%memit
primes_list_version(N)

peak memory: 96.03 MiB, increment: 0.77 MiB


# Approach 02
## Numpy version
<img src="images/prime_bit_matrix.JPG" align="left" width=650/>