This serves as the main source code for writing all the CUDA kernels for polynomial GPU computations. All of the code was written using the numba cuda library, which compiles the python syntax to C code. The main advantage to this approach is that it allows us to debug easier using the Cuda python debugger and call Numpy pyton serial code which is easier than working with C directly.

In [3]:
from numba import cuda
from numba import float32,int32,int64
import numpy as np

Before doing any work with kernals its nice if we have a concrete way to represnt the polynomial.

In [4]:
def parsePolynomialString(polyString):
    
    
    try:
        polyString = unicode(polyString,"utf-8")
    except TypeError: #check if already unicode
        pass
    
    repls = ('+', '!'), ('-', '!')
    temp = [l.strip() for l in reduce(lambda a, kv: a.replace(*kv), repls, polyString ).split('!')]
    temp2 = np.asarray([split_coefficient_variable(t) for t in temp])
    temp2[:,0] = map(lambda x: u'1' if x==u'' else x,temp2[:,0])
    
    signs = ''.join(ch for ch in polyString if ch == '+' or ch == '-')
    if len(signs)==len(temp2)-1:
        signs = '+'+signs
    signs = np.asarray([c for c in signs])
    
    
    coeffs = [int(s+val) for (s,val) in zip(signs,temp2[:,0])]
    variables = temp2[:,1]
    
    return coeffs,variables

def split_coefficient_variable(string):
    n = len(string)
    for ind,c in enumerate(string):
        if c.isnumeric():
            if ind == n-1:
                return (string,u'0')
            else:
                continue
        else:
            return (string[0:ind],string[ind+1::])
        
def findDegreeUnivariate(v):
    end_ind = -1 # a hack way for a flag
    start_ind = -1
    for i,c in enumerate(v):
        if c.isnumeric() and start_ind == -1:
            start_ind = i
            if start_ind == len(v) -1:
                end_ind = start_ind
                break
        elif (not c.isnumeric() and end_ind == -1 and not start_ind  == -1) or i == len(v)-1:
            end_ind = i
            break

        else:
            continue
    #pdb.set_trace()
    if not start_ind == end_ind:
         if end_ind == 0 and start_ind == -1:
            return 1
         else:
            t = v[start_ind:end_ind+1]
       
    else:
        t = v[start_ind]
    
    return int(t)

def buildDegrees(varsfull):
    ''' Simple wrapper function that builds a list of list for all the degrees of each univariate variable '''
    degreeLists = [[findDegreeUnivariate(var) for var in vars.split()] for vars in varsfull]
    return degreeLists

In [5]:
def vandermonde_interp():
    ''' Uses the generator approach to solve the multivariate interpolation problem f(x_i) = y_i '''

def evaluate_polynomial():
    ''' uses the reduce function to quickly evaluate a polynomial '''

def parallel_modular_reduce():
    ''' reduces all the coefficients by mod'''
    
def extended_eucledian():
    ''' computes the modular inverse'''
    
def parallel_chinese_remaindering():
    ''' Performs parallel chinese remaindering on each set of coefficients '''
    

def primesfrom3to(n):
    """ Returns a array of primes, 3 <= p < n """
    sieve = np.ones(n/2, dtype=np.bool)
    for i in xrange(3,int(n**0.5)+1,2):
        if sieve[i/2]:
            sieve[i*i/2::i] = False
    return 2*np.nonzero(sieve)[0][1::]+1
    
def get_mod_primes(N,M):
    primes = primesfrom3to(N)
    total = 1
    mvect = []
    for p in primes:
        total *= p
        mvect.append(p)
        if total > M:
            return mvect

def generate_evaluationpoints(degreeResult):
    return np.arange(1,degreeResult)

In [38]:
acoeffs,avars = parsePolynomialString('7 x^2 + 5')
bcoeffs,bvars = parsePolynomialString('50 y^1000 z^40 x^3 - 3')

M = 2 * np.max(abs(a))*np.max(abs(b) )
mm = get_mod_primes(100,M*2)

adegree = max([np.sum(_) for _ in buildDegrees(avars)])
bdegree = max([np.sum(_) for _ in buildDegrees(bvars)])

In [40]:
bdegree

1043

In [31]:
[np.sum(_) for _ in buildDegrees(bvars)]

[1043, 0]

In [28]:
reduce(lambda x,y:x+y,)

1043

In [26]:
reduce(sum,buildDegrees(bvars))

TypeError: can only concatenate list (not "int") to list

In [14]:
degreeList for degreeList in buildDegrees(bvars)]

[[1000, 40, 3], [0]]

In [212]:
varsfull = bvars


hey


In [213]:
degreeLists

[[1000, 3], [0]]

In [179]:
findDegreeUnivariate(avars[0])

2

In [197]:
test = u"y^5 x^3"

In [200]:
[findDegreeUnivariate(x) for x in test.split()]

[5, 3]

In [186]:
for vars in avars:
     print findDegreeUnivariate(vars)

5
0


In [135]:
findDegreeUnivariate(v)

u'3'

In [122]:
start_ind == len(v) -1

True

In [115]:
(not c.isnumeric() and end_ind == -1 and not start_ind  == -1) or i == len(v)-1

True

2

In [107]:
start_i

2

In [81]:
calcDegree(varString):
    
    

array([u'y x^3', u'0'], 
      dtype='<U5')

In [60]:
''' For our first example we consider the problem of multiplying the two polynomials a*b (7x+5)(2x-3) '''
a = np.array([7,5])
b = np.array([2,-3])



# For multiplication the degree result is simply the degree of a and b.
range(1,degreeResult)





In [64]:
samplePolyMathematica 

'176717776189025974273681640625 - 524822996828991154174804687500 x^4 + 515033918625080621034667968750 x^8 - 163334210731862516738592187500 x^12 - 2736239170293783025519921875 x^16 - 853567945260137903985750000 x^20 - 3332100748804904946375000 x^24 - 1366093540832634465390000 x^28 + 19097129680976733491250 x^32 - 1070178180326028037800 x^36 + 30307759123324511700 x^40 - 684287794892518696 x^44 + 15919723498675794 x^48 - 261616202648496 x^52 + 4039702363304 x^56 - 52707284976 x^60 + 541355421 x^64 - 5241644 x^68 + 36846 x^72 - 204 x^76 + x^80 - 524822996828991154174804687500 y^4 + 1358410577665290835983398437500 x^4 y^4 - 1121410871803655000470926562500 x^8 y^4 + 268366792559331875120012632500 x^12 y^4 + 18009207366928841424033090000 x^16 y^4 + 1409967474860596621252172400 x^20 y^4 + 37093896071304807884698800 x^24 y^4 + 277169016416649937196688 x^28 y^4 - 45399599055456194122344 x^32 y^4 - 2051064742836371588856 x^36 y^4 - 48917341932402832184 x^40 y^4 + 719118621286728408 x^44 y^4 + 72

In [33]:
primesfrom3to(10000)

array([   3,    5,    7, ..., 9949, 9967, 9973], dtype=int64)

In [None]:
from numba import cuda
from numba import float32,int32,int64
import numpy as np

@cuda.jit('void(int64[:],int64[:])')
def reduceModuloPolynomial(a,b):
    i = cuda.grid(1)
    if i < len(a):
        b[i] = a[i]%7
        


In [None]:
''' We need a way to efficiently'''

@cuda.reduce
def 

In [None]:
N = 10
a = np.ones(N,np.int64)*15
b = np.zeros(N,dtype = np.int64)
reduceModuloPolynomial[1,10](a,b)