This serves as the main source code for writing all the CUDA kernels for polynomial GPU computations. All of the code was written using the numba cuda library, which compiles the python syntax to C code. The main advantage to this approach is that it allows us to debug easier using the Cuda python debugger and call Numpy pyton serial code which is easier than working with C directly.

In [1]:
from numba import cuda
from numba import float32,int32,int64
import numpy as np

In [2]:
samplePolyMathematica = "176717776189025974273681640625 - 524822996828991154174804687500 x^4 + 515033918625080621034667968750 x^8 - 163334210731862516738592187500 x^12 - 2736239170293783025519921875 x^16 - 853567945260137903985750000 x^20 - 3332100748804904946375000 x^24 - 1366093540832634465390000 x^28 + 19097129680976733491250 x^32 - 1070178180326028037800 x^36 + 30307759123324511700 x^40 - 684287794892518696 x^44 + 15919723498675794 x^48 - 261616202648496 x^52 + 4039702363304 x^56 - 52707284976 x^60 + 541355421 x^64 - 5241644 x^68 + 36846 x^72 - 204 x^76 + x^80 - 524822996828991154174804687500 y^4 + 1358410577665290835983398437500 x^4 y^4 - 1121410871803655000470926562500 x^8 y^4 + 268366792559331875120012632500 x^12 y^4 + 18009207366928841424033090000 x^16 y^4 + 1409967474860596621252172400 x^20 y^4 + 37093896071304807884698800 x^24 y^4 + 277169016416649937196688 x^28 y^4 - 45399599055456194122344 x^32 y^4 - 2051064742836371588856 x^36 y^4 - 48917341932402832184 x^40 y^4 + 719118621286728408 x^44 y^4 + 7261206066155664 x^48 y^4 + 1067560459184560 x^52 y^4 - 23159945652624 x^56 y^4 + 34631561424 x^60 y^4 + 1691958932 x^64 y^4 - 8367012 x^68 y^4 - 3876 x^72 y^4 + 20 x^76 y^4 + 515033918625080621034667968750 y^8 - 1121410871803655000470926562500 x^4 y^8 + 692306784034766888513561268750 x^8 y^8 - 70467501514363192927331426016 x^12 y^8 - 14370383052763752759083552808 x^16 y^8 - 1035070187757372853125068016 x^20 y^8 - 55157327620514497044014472 x^24 y^8 - 1692867176854213729029408 x^28 y^8 - 25586983740542496806844 x^32 y^8 - 349299037145852985880 x^36 y^8 + 30001458887173320228 x^40 y^8 - 245641547333553696 x^44 y^8 + 16518615637243832 x^48 y^8 - 382503122607600 x^52 y^8 + 2410846944408 x^56 y^8 - 1844368352 x^60 y^8 + 3264318 x^64 y^8 - 34884 x^68 y^8 + 190 x^72 y^8 - 163334210731862516738592187500 y^12 + 268366792559331875120012632500 x^4 y^12 - 70467501514363192927331426016 x^8 y^12 - 35185880544130730526680680416 x^12 y^12 + 125471981964006724254562800 x^16 y^12 - 39370878297880423433887248 x^20 y^12 + 1325658903120916100843616 x^24 y^12 + 58762465861640264048736 x^28 y^12 - 4133441494425443083464 x^32 y^12 + 132187048341776912760 x^36 y^12 - 7477154238671894304 x^40 y^12 + 196380621346972640 x^44 y^12 - 2444481539437200 x^48 y^12 + 16380609978480 x^52 y^12 - 89650392160 x^56 y^12 + 481233312 x^60 y^12 - 197676 x^64 y^12 + 1140 x^68 y^12 - 2736239170293783025519921875 y^16 + 18009207366928841424033090000 x^4 y^16 - 14370383052763752759083552808 x^8 y^16 + 125471981964006724254562800 x^12 y^16 + 45518134102085647099665324 x^16 y^16 - 1346162509203426484265520 x^20 y^16 + 494713301530700458341864 x^24 y^16 - 30478852648930649485200 x^28 y^16 + 766117785480879991950 x^32 y^16 - 49257912432071129616 x^36 y^16 + 996667975969477864 x^40 y^16 - 14969308168692144 x^44 y^16 + 134140483792428 x^48 y^16 - 396080727440 x^52 y^16 + 2530838232 x^56 y^16 - 790704 x^60 y^16 + 4845 x^64 y^16 - 853567945260137903985750000 y^20 + 1409967474860596621252172400 x^4 y^20 - 1035070187757372853125068016 x^8 y^20 - 39370878297880423433887248 x^12 y^20 - 1346162509203426484265520 x^16 y^20 + 792862389474747514213680 x^20 y^20 - 40989447228398294589744 x^24 y^20 + 2506315850072195314608 x^28 y^20 - 82027905924457512144 x^32 y^20 + 2239974538199165264 x^36 y^20 - 28272809343820752 x^40 y^20 + 259034505245136 x^44 y^20 - 675093398800 x^48 y^20 + 5595177744 x^52 y^20 - 2372112 x^56 y^20 + 15504 x^60 y^20 - 3332100748804904946375000 y^24 + 37093896071304807884698800 x^4 y^24 - 55157327620514497044014472 x^8 y^24 + 1325658903120916100843616 x^12 y^24 + 494713301530700458341864 x^16 y^24 - 40989447228398294589744 x^20 y^24 + 3726751869705254575608 x^24 y^24 - 90010365014082472128 x^28 y^24 + 2402962713127941816 x^32 y^24 - 40695673243715760 x^36 y^24 + 245438792649768 x^40 y^24 + 44423703136 x^44 y^24 + 3446804664 x^48 y^24 - 5534928 x^52 y^24 + 38760 x^56 y^24 - 1366093540832634465390000 y^28 + 277169016416649937196688 x^4 y^28 - 1692867176854213729029408 x^8 y^28 + 58762465861640264048736 x^12 y^28 - 30478852648930649485200 x^16 y^28 + 2506315850072195314608 x^20 y^28 - 90010365014082472128 x^24 y^28 + 1957461248863408704 x^28 y^28 - 67415042002427856 x^32 y^28 + 472305304845552 x^36 y^28 + 2315994778592 x^40 y^28 - 11739293856 x^44 y^28 - 10279152 x^48 y^28 + 77520 x^52 y^28 + 19097129680976733491250 y^32 - 45399599055456194122344 x^4 y^32 - 25586983740542496806844 x^8 y^32 - 4133441494425443083464 x^12 y^32 + 766117785480879991950 x^16 y^32 - 82027905924457512144 x^20 y^32 + 2402962713127941816 x^24 y^32 - 67415042002427856 x^28 y^32 + 731528773777134 x^32 y^32 + 4628229542200 x^36 y^32 - 35100554940 x^40 y^32 - 15418728 x^44 y^32 + 125970 x^48 y^32 - 1070178180326028037800 y^36 - 2051064742836371588856 x^4 y^36 - 349299037145852985880 x^8 y^36 + 132187048341776912760 x^12 y^36 - 49257912432071129616 x^16 y^36 + 2239974538199165264 x^20 y^36 - 40695673243715760 x^24 y^36 + 472305304845552 x^28 y^36 + 4628229542200 x^32 y^36 - 46896722136 x^36 y^36 - 18845112 x^40 y^36 + 167960 x^44 y^36 + 30307759123324511700 y^40 - 48917341932402832184 x^4 y^40 + 30001458887173320228 x^8 y^40 - 7477154238671894304 x^12 y^40 + 996667975969477864 x^16 y^40 - 28272809343820752 x^20 y^40 + 245438792649768 x^24 y^40 + 2315994778592 x^28 y^40 - 35100554940 x^32 y^40 - 18845112 x^36 y^40 + 184756 x^40 y^40 - 684287794892518696 y^44 + 719118621286728408 x^4 y^44 - 245641547333553696 x^8 y^44 + 196380621346972640 x^12 y^44 - 14969308168692144 x^16 y^44 + 259034505245136 x^20 y^44 + 44423703136 x^24 y^44 - 11739293856 x^28 y^44 - 15418728 x^32 y^44 + 167960 x^36 y^44 + 15919723498675794 y^48 + 7261206066155664 x^4 y^48 + 16518615637243832 x^8 y^48 - 2444481539437200 x^12 y^48 + 134140483792428 x^16 y^48 - 675093398800 x^20 y^48 + 3446804664 x^24 y^48 - 10279152 x^28 y^48 + 125970 x^32 y^48 - 261616202648496 y^52 + 1067560459184560 x^4 y^52 - 382503122607600 x^8 y^52 + 16380609978480 x^12 y^52 - 396080727440 x^16 y^52 + 5595177744 x^20 y^52 - 5534928 x^24 y^52 + 77520 x^28 y^52 + 4039702363304 y^56 - 23159945652624 x^4 y^56 + 2410846944408 x^8 y^56 - 89650392160 x^12 y^56 + 2530838232 x^16 y^56 - 2372112 x^20 y^56 + 38760 x^24 y^56 - 52707284976 y^60 + 34631561424 x^4 y^60 - 1844368352 x^8 y^60 + 481233312 x^12 y^60 - 790704 x^16 y^60 + 15504 x^20 y^60 + 541355421 y^64 + 1691958932 x^4 y^64 + 3264318 x^8 y^64 - 197676 x^12 y^64 + 4845 x^16 y^64 - 5241644 y^68 - 8367012 x^4 y^68 - 34884 x^8 y^68 + 1140 x^12 y^68 + 36846 y^72 - 3876 x^4 y^72 + 190 x^8 y^72 - 204 y^76 + 20 x^4 y^76 + y^80"
samplePolyMathematica = unicode(samplePolyMathematica, "utf-8")

Before doing any work with kernals its nice if we have a concrete way to represnt the polynomial.

In [3]:
def parsePolynomialString(polyString):
    
    
    try:
        polyString = unicode(polyString,"utf-8")
    except TypeError: #check if already unicode
        pass
    
    repls = ('+', '!'), ('-', '!')
    temp = [l.strip() for l in reduce(lambda a, kv: a.replace(*kv), repls, polyString ).split('!')]
    temp2 = np.asarray([split_coefficient_variable(t) for t in temp])
    temp2[:,0] = map(lambda x: u'1' if x==u'' else x,temp2[:,0])
    
    signs = ''.join(ch for ch in polyString if ch == '+' or ch == '-')
    if len(signs)==len(temp2)-1:
        signs = '+'+signs
    signs = np.asarray([c for c in signs])
    
    
    coeffs = [int(s+val) for (s,val) in zip(signs,temp2[:,0])]
    variables = temp2[:,1]
    
    return coeffs,variables

def split_coefficient_variable(string):
    n = len(string)
    for ind,c in enumerate(string):
        if c.isnumeric():
            if ind == n-1:
                return (string,u'0')
            else:
                continue
        else:
            return (string[0:ind],string[ind+1::])
        
def findDegreeUnivariate(v):
    end_ind = -1 # a hack way for a flag
    start_ind = -1
    for i,c in enumerate(v):
        if c.isnumeric() and start_ind == -1:
            start_ind = i
            if start_ind == len(v) -1:
                end_ind = start_ind
                break
        elif (not c.isnumeric() and end_ind == -1 and not start_ind  == -1) or i == len(v)-1:
            end_ind = i
            break

        else:
            continue
    #pdb.set_trace()
    if not start_ind == end_ind:
         if end_ind == 0 and start_ind == -1:
            return 1
         else:
            t = v[start_ind:end_ind+1]
       
    else:
        t = v[start_ind]
    
    return int(t)

def buildDegrees(varsfull):
    ''' Simple wrapper function that builds a list of list for all the degrees of each univariate variable '''
    degreeLists = [[findDegreeUnivariate(var) for var in vars.split()] for vars in varsfull]
    return degreeLists

In [4]:
def vandermonde_interp():
    ''' Uses the generator approach to solve the multivariate interpolation problem f(x_i) = y_i '''

def evaluate_polynomial():
    ''' uses the reduce function to quickly evaluate a polynomial '''

def parallel_modular_reduce():
    ''' reduces all the coefficients by mod'''
    
def extended_eucledian():
    ''' computes the modular inverse'''
    
def parallel_chinese_remaindering():
    ''' Performs parallel chinese remaindering on each set of coefficients '''
    

def primesfrom3to(n):
    """ Returns a array of primes, 3 <= p < n """
    sieve = np.ones(n/2, dtype=np.bool)
    for i in xrange(3,int(n**0.5)+1,2):
        if sieve[i/2]:
            sieve[i*i/2::i] = False
    return 2*np.nonzero(sieve)[0][1::]+1
    
def get_mod_primes(N,M):
    primes = primesfrom3to(N)
    primes = primes.tolist()
    total = 1L
    mvect = np.array([],dtype=np.int32)
    for p in primes:
        total *= p
        mvect = np.append(mvect,p)
        #print total
        if total > M:
            return mvect
    return mvect

def generate_evaluationpoints(degreeResult):
    return np.arange(1,degreeResult)

In [5]:
float(125121626161631613631613136)

1.251216261616316e+26

In [6]:
import pdb
from math import floor
from decimal import *
def EGCD(a,b):
    cc = a
    dd = b
    c1 = 1
    c2 =0
    d1 =0
    d2 = 1
    
    while not dd==0:
        try:
            q = cc/dd
        except TypeError:
            pdb.set_trace()
            
        r = cc - int(q*dd)
        r = int(r)
        cc = dd
        dd = r

        r1 = c1 - int(q*d1)
        r1 = int(r1)
        c1 = d1
        d1 = r1

        r2 = c2 - int(q*d2)
        r2 = int(r2)
        c2 = d2
        d2 = r2
    
    return (cc,c1,c2)

def cra_incremental(rvect,mvect):
    k = len(rvect)
    assert(len(rvect) == len(mvect))
    
    M = mvect[0]
    res = rvect[0]
    M_invs = []
    for i in range(1,k):
        M_inv = EGCD(M,mvect[i])[1] 
        if M_inv == 0: pdb.set_trace()
        M_invs.append(M_inv)
        c = int(M_inv%mvect[i])
        rprime = int(res%mvect[i])
        s = c*(rvect[i] - rprime)%mvect[i]
        s = int(s)
        res = int(res+s*M)
        #import pdb; pdb.set_trace()
        M = int(M*mvect[i])
    return res



In [7]:
def MRCoeff(rvect,mvect,gamma):
    n = len(rvect)
    #v = np.zeros(n,dtype=np.int32)
    #v[0] = rvect[0]
    v = list()
    v.append(1L*rvect[0])
    
    for k in range(1,n):
        temp = v[k-1]
        for j in range(k-2,-1,-1):
            temp = int(temp*mvect[j] + v[j]) % mvect[k]
        #v[k] = int((rvect[k] - temp) * gamma[k])%mvect[k]
        v.append(((rvect[k] - temp) * gamma[k])%mvect[k])
    
    
#     u = v[n-1]
#     for k in range(n-2,-1,-1):
#         u = u*mvect[k]+v[k]
        
        
    
    return v
            

In [8]:
def MRC_alg(rvect,mvect,c):
    k = len(rvect)
    gamma = np.empty(k,dtype = np.int32)
    gamma[0] = rvect[0]
    #gamma[0] = rvect[0]
    M = np.ones(k,dtype=np.int32)

    for i in range(1,k):
        gamma[i] = ((rvect[i] - gamma[0])*c[i]) % mvect[i]
        M[i] = (mvect[0]*c[i]) % mvect[i]



    for i in range(1,k-1):
        for j in range(i+1,k):
            gamma[j] = (gamma[j] - gamma[i]*M[j]) % mvect[j]
            M[j] = (M[j]*mvect[i]) % mvect[j]
    return gamma,M

def homer_scheme(mvect,gammas,i):
    if i==len(mvect):
        return 1
    return gammas[i]+mvect[i]*(homer_scheme(mvect,gammas,i+1))

    

In [9]:
'''Lets take a look at some modular arthimitic'''
num = 515033918625080621034667968750231312
#num = 32
mvect = get_mod_primes(10000,num).tolist()
residues =[int(num%m) for m in mvect] 
#residues = [r%m for (r,m) in zip(residues,mvect)]
c = [0] + [EGCD(reduce(lambda x,y:x*y,mvect[0:k+1]),m)[1] for (k,m) in enumerate(mvect[1:])]
#mvect = mvect.tolist()
#vv,M = MRC_alg(residues,mvect,c)
vv,M = MRC_alg(residues,mvect,c)
vv = vv.tolist()
M = M.tolist()

u = homer_scheme(mvect,vv,0)%reduce(lambda x,y: x*y,mvect)

mvect2 = get_mod_primes(10000,u).tolist()
residues2 =[int(u%m) for m in mvect2]
print residues
print residues2

[0, 2, 3, 9, 2, 3, 7, 9, 1, 8, 19, 24, 38, 21, 44, 30, 37, 25, 29, 69, 26, 16, 39, 77]
[0, 2, 3, 9, 2, 3, 7, 9, 1, 8, 19, 24, 38, 21, 44, 30, 37, 25, 29, 69, 26, 16, 39, 77]


In [588]:
'Univariate Case'
a = '7 x + 5'
b = '2 x - 3'

aCoef,aVars = parsePolynomialString(a)
bCoef,bVars = parsePolynomialString(b)

aDegree = max([np.sum(_) for _ in buildDegrees(aVars)])
bDegree = max([np.sum(_) for _ in buildDegrees(bVars)])
cDegree = aDegree+bDegree

# Setup
M = 2*max(np.abs(aCoef))*max(np.abs(bCoef)) 
M = 2*M

#Calculate homomorphisms
# syntax: x_'i' where i corresponds to mod reduction and x is the polynomial in question i.e (a,b)

# for this example we use m = 5,7
m = [5,7]

a_5 = [x%5 for x in aCoef]
a_7 = [x%7 for x in aCoef]
b_5 = [x%5 for x in bCoef]
b_7 = [x%7 for x in bCoef]






In [58]:
np.array([[1,2,3],[1,2,3]])

array([[1, 2, 3],
       [1, 2, 3]])

In [60]:
np.hstack([a_5,b_5])

array([2, 0, 2, 2])

In [64]:
X = np.array([np.hstack([a_5,b_5]),np.hstack([a_7,b_7])])
X

array([[2, 0, 2, 2],
       [0, 5, 2, 4]])

In [589]:
print a_5
print a_7
print b_5
print b_7

[2, 0]
[0, 5]
[2, 2]
[2, 4]


In [266]:
bpg = 50
tpb = 16
@cuda.jit
def matmul(A, B, C):
    """Perform square matrix multiplication of C = A * B
    """
    i, j = cuda.grid(2)
    if i < C.shape[0] and j < C.shape[1]:
        tmp = 0.
        for k in range(A.shape[1]):
            tmp += A[i, k] * B[k, j]
        C[i, j] = tmp

In [298]:
print a_5
print a_7

[2, 0]
[0, 5]


In [691]:
@cuda.jit(argtypes=[int32[:],int32[:],int32,int32,int32,int32], target='gpu')
def evaluate_polynomial(a,c,n,d,k,l):
    i = cuda.grid(1)
    N = n*d*k*l
    #we need a representation for the polynomial (x^(p-1),x^p,...x^0)
    
    if i<N:
        #alpha = i%n
        alpha = i%6/d
        degree = i%d
        ind = degree #it just so happens that the degree is equal to the index in this case
        offset = (i/(n*d)*d)
        #cuda.syncthreads()
        c[i] = a[ind+offset]*(alpha**degree)
        

In [695]:
#a = np.hstack([a_5[::-1],a_7[::-1]])
m = [5,7]
N = 24
a = np.hstack([a_5[::-1],a_7[::-1]])
b = np.hstack([b_5[::-1],b_7[::-1]])
ab = np.hstack([a,b])
c = np.empty(N,dtype=np.int32)
deg = 2
k=2
n=3
l=2  # the number of polies
evaluate_polynomial[bpg,tpb](ab,c,n,2,2,2)


X = np.reshape(c,((N/deg),deg))
X = np.sum(X,axis=1)
X = np.reshape(X,(k*n,l),order='F')
X  = np.product(X,axis=1)
#X = np.reshape(X,(3,2))
X = np.reshape(X,(k,n))
for i,row in enumerate(X[:,:]):
    X[i,:] = X[i,:]%m[i]

X

array([[0, 3, 4],
       [6, 2, 5]])

In [694]:
X

array([[ 0,  3],
       [ 3,  6],
       [30, 40]])

In [690]:
X = np.reshape(c,((N/deg),deg))
X = np.sum(X,axis=1)
X

array([0, 2, 4, 5, 5, 5, 2, 4, 6, 4, 6, 8])

In [656]:
array([0, 2, 0, 0, 0, 4, 5, 0, 5, 0, 5, 0, 2, 2, 2, 0, 2, 4, 4, 2, 4, 0, 4,
       4])

2

In [632]:
X = np.reshape(c,((N/deg),deg))
X

array([[0, 2],
       [0, 0],
       [0, 4],
       [5, 0],
       [5, 0],
       [5, 0],
       [2, 2],
       [2, 0],
       [2, 4],
       [4, 2],
       [4, 0],
       [4, 4]])

In [674]:
X

array([[0, 3, 4],
       [6, 2, 5]])

In [631]:
30%7
20%7

6

In [587]:
array([0, 2, 0, 0, 0, 4, 5, 0, 5, 0, 5, 0, 2, 2, 2, 0, 2, 4, 4, 2, 4, 0, 4,
       4])

[0, 2]

In [543]:
ab

array([0, 2, 5, 0, 2, 2, 4, 2])

In [514]:
array([2, 2, 2, 0, 2, 4, 4, 2, 4, 0, 4, 4])

[4, 2]

In [581]:
@cuda.jit(argtypes=[int32[:],int32[:],int32,int32,int32], target='gpu')
def evaluate_polynomial(a,c,n,d,k):
    i = cuda.grid(1)
    N = n*d*k
    #we need a representation for the polynomial (x^(p-1),x^p,...x^0)
    
    if i<N:
        alpha = i%n
        degree = i%d
        ind = degree #it just so happens that the degree is equal to the index in this case
        offset = i/(n*d)*d
        #offset2 = 
        #cuda.syncthreads()
        c[i] = a[ind+offset]*(alpha**degree)
        

In [513]:
b_5[::-1]

[2, 2]

array([2, 2, 4, 2])

In [501]:
b_5,b_7

([2, 2], [2, 4])

In [477]:
def test(a,c,m_i,n):
    i = cuda.grid(1)
    N = 12
    #we need a representation for the polynomial (x^(p-1),x^p,...x^0)
    
    if i<N:
        
        c[i] = i
        

In [435]:
test(a,c,0,0)
c

TypeError: 'Macro' object is not callable

In [399]:
a = np.arange(4,dtype = np.int32)
b = np.arange(3,dtype = np.int32)
N=12
c = np.empty(N,dtype = np.int32)
evaluate_polynomial[bpg,tpb](a,c,0,0)

In [400]:
c

array([0, 1, 0, 0, 0, 2, 2, 3, 2, 0, 2, 6])

array([2, 0, 0, 5])

In [357]:
a_3

NameError: name 'a_3' is not defined

In [342]:
c

array([2, 2, 2, 2, 2, 2])

In [293]:
c

array([0, 2, 4])

In [288]:
n = tpb*bpg
A = np.random.rand(n,n).astype(np.float32)*10
B = np.random.rand(n,n).astype(np.float32)*10
C = np.empty_like(A)
griddim = bpg,bpg
blockdim = tpb,tpb
matmul[griddim,blockdim](A,B,C)

In [283]:
C

array([[ 20730.80078125,  20616.0703125 ,  20420.48828125, ...,
         21465.83984375,  21071.57421875,  21225.83789062],
       [ 20197.94726562,  20379.00585938,  19203.54101562, ...,
         19770.06835938,  20253.38085938,  19888.62695312],
       [ 19224.87109375,  19752.73242188,  18745.109375  , ...,
         19927.33007812,  20449.4921875 ,  20040.81445312],
       ..., 
       [ 20676.72070312,  21098.375     ,  20046.03320312, ...,
         20805.05273438,  21192.85351562,  21036.24609375],
       [ 20173.58203125,  20437.49023438,  19533.52929688, ...,
         20087.29882812,  20808.890625  ,  19968.26171875],
       [ 19657.12109375,  19997.8359375 ,  18818.859375  , ...,
         19581.7890625 ,  19905.37695312,  19658.87890625]], dtype=float32)

In [285]:
np.dot(A,B)

array([[ 20730.80273438,  20616.0703125 ,  20420.49414062, ...,
         21465.84375   ,  21071.57617188,  21225.83789062],
       [ 20197.9453125 ,  20379.00390625,  19203.53710938, ...,
         19770.06640625,  20253.38085938,  19888.62890625],
       [ 19224.87109375,  19752.73632812,  18745.109375  , ...,
         19927.328125  ,  20449.48828125,  20040.81640625],
       ..., 
       [ 20676.72070312,  21098.37304688,  20046.02929688, ...,
         20805.05273438,  21192.85742188,  21036.24804688],
       [ 20173.5859375 ,  20437.48828125,  19533.52734375, ...,
         20087.296875  ,  20808.88671875,  19968.26367188],
       [ 19657.12304688,  19997.83203125,  18818.86328125, ...,
         19581.7890625 ,  19905.37304688,  19658.87890625]], dtype=float32)

In [281]:
np.all(np.dot(A,B) == C)

False

In [262]:
bpg = 50
tpb = 16
n = bpg * tpb

@cuda.jit(argtypes=[float32[:,:], float32[:,:], float32[:,:]], target='gpu')
def cu_square_matrix_mul(A, B, C):
    sA = cuda.shared.array(shape=(tpb, tpb), dtype=float32)
    sB = cuda.shared.array(shape=(tpb, tpb), dtype=float32)

    tx = cuda.threadIdx.x
    ty = cuda.threadIdx.y
    bx = cuda.blockIdx.x
    by = cuda.blockIdx.y
    bw = cuda.blockDim.x
    bh = cuda.blockDim.y
    
    #x = tx + bx * bw
    #y = ty + by * bh
    (x,y) = cuda.grid(2)

    acc = 0.
    for i in range(bpg):
        if x < n and y < n:
            sA[ty, tx] = A[y, tx + i * tpb]
            sB[ty, tx] = B[ty + i * tpb, x]

        cuda.syncthreads()

        if x < n and y < n:
            for j in range(tpb):
                acc += sA[ty, j] * sB[j, tx]
        if tx == 0:
            acc *= 0

        cuda.syncthreads()

    if x < n and y < n:
        C[y, x] = acc

In [484]:
@cuda.jit(argtypes=[int32[:],int32[:],int32,int32], target='gpu')
def evaluate_polynomial(a,c,k,d):
    i = cuda.grid(1)
    N = 12
    #we need a representation for the polynomial (x^(p-1),x^p,...x^0)
    
    if i<N:
        alpha = i%3
        degree = i%2
        ind = degree #it just so happens that the degree is equal to the index in this case
        offset = i/6*2
        #cuda.syncthreads()
        c[i] = a[ind+offset]*(alpha**degree)
        

In [263]:
A = np.random.rand(n,n).astype(np.float32)*10
B = np.random.rand(n,n).astype(np.float32)*10
C = np.empty_like(A)
griddim = bpg,bpg
blockdim = tpb,tpb
cu_square_matrix_mul[griddim,blockdim](A,B,C)

In [265]:
import pandas as pd
pd.DataFrame(C)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,790,791,792,793,794,795,796,797,798,799
0,0,20362.650391,20100.792969,19768.201172,20530.488281,20244.927734,20384.294922,20056.382812,20390.447266,20159.640625,...,19276.900391,20322.943359,19087.113281,20006.199219,20350.349609,19951.314453,20290.951172,19695.160156,20291.625000,20094.509766
1,0,20055.414062,20186.660156,19293.089844,20403.470703,19843.544922,19791.513672,19606.222656,19765.492188,19458.404297,...,18887.158203,20150.824219,18659.615234,19401.031250,19912.220703,19665.724609,19528.138672,19724.552734,19799.296875,19608.214844
2,0,20707.853516,20214.939453,19541.041016,20315.896484,20232.890625,20275.398438,19392.457031,20062.890625,19926.275391,...,19600.943359,20725.890625,18938.816406,18923.156250,20555.671875,19434.953125,19554.935547,19563.421875,19774.462891,19749.224609
3,0,19691.238281,19895.681641,18834.558594,20747.060547,19778.761719,19847.716797,19330.638672,20018.460938,20103.580078,...,19363.470703,20119.822266,18732.691406,18618.119141,20073.287109,19322.267578,19546.138672,19494.853516,19573.187500,19718.779297
4,0,20639.193359,20154.216797,18697.433594,20161.287109,20075.093750,20356.810547,19439.128906,19762.087891,19904.197266,...,19412.005859,20328.662109,19184.884766,18938.683594,20732.824219,20049.052734,20278.728516,19809.185547,19977.134766,20387.929688
5,0,20147.878906,19821.455078,18962.316406,20248.271484,19746.658203,19736.589844,19281.320312,19469.625000,19564.835938,...,19012.289062,20177.892578,18471.351562,19050.263672,19880.244141,19509.244141,20044.957031,18862.291016,19622.939453,19301.957031
6,0,20052.316406,19630.699219,18783.880859,19949.861328,20127.962891,19645.029297,18788.250000,19682.906250,19472.875000,...,18736.179688,20214.146484,18596.597656,19357.259766,19811.771484,19750.281250,19425.003906,19403.146484,19435.419922,19297.482422
7,0,22339.070312,22472.082031,21044.437500,22265.775391,22001.998047,22055.500000,21124.679688,21789.013672,21846.953125,...,21125.046875,22349.501953,20611.765625,20554.156250,22065.246094,21771.664062,21886.376953,20990.476562,21457.593750,21445.583984
8,0,20268.765625,20448.960938,19743.246094,20362.857422,20509.505859,19973.861328,19170.621094,20343.980469,19863.318359,...,19395.992188,20530.421875,18943.650391,19578.744141,20271.685547,20051.953125,20477.320312,19426.335938,19854.173828,19742.974609
9,0,20797.853516,20656.160156,19447.646484,20990.689453,20311.443359,20573.222656,19810.064453,20770.765625,20085.246094,...,20158.218750,20728.765625,18993.894531,20071.326172,20718.576172,19688.435547,20580.890625,19916.207031,20251.070312,20300.306641


In [261]:
@cuda.jit('int32(int32, int32)', device=True, inline=True)
def sum(a, b):
    if a > b:
        return a
    else:
        return b

@cuda.jit('int32(int32[:])', device=True, inline=True)
def test(a):
    out = 0
    for i in a:
        out+=i
    return out


@cuda.reduce
def test_reduce(a, b):  
    return test(a) * test(b)

a = np.arange(5)
b = np.arange(5)
A = np.array([a,b])
ans = test_reduce(A)   # cuda sum reduction


TypeError: only support 1D array

array([[0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4]])

In [259]:
A

[array([0, 1, 2, 3, 4]), array([0, 1, 2, 3, 4])]

In [256]:
ans

6.0

In [240]:
a1 = [1,2,3]
a2 = [4,5,6]
# we want to compute sum(a1)*sum(a2)

499500.0

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,790,791,792,793,794,795,796,797,798,799
0,0,19338.820312,18948.253906,19671.093750,20039.664062,20134.105469,20184.142578,19951.529297,19327.763672,20095.109375,...,20215.667969,18420.777344,18958.363281,20354.455078,18981.792969,19624.810547,20248.763672,20025.281250,19815.654297,19957.220703
1,0,20402.644531,20326.423828,21154.830078,21248.126953,21218.875000,21056.281250,20767.853516,20294.386719,21444.029297,...,20733.142578,19985.195312,20700.250000,20960.851562,19820.261719,20646.039062,20914.964844,21458.554688,21098.937500,21133.974609
2,0,18974.431641,18574.505859,19918.867188,19574.441406,20027.769531,19400.048828,20075.271484,19021.761719,20237.445312,...,19728.126953,18589.199219,18794.566406,19562.742188,19016.472656,20057.156250,19757.695312,20680.773438,19635.421875,20151.958984
3,0,19763.068359,19715.703125,20769.664062,19929.425781,20419.009766,20650.433594,20425.615234,19854.888672,20988.417969,...,20186.943359,18957.107422,19699.267578,20990.869141,19384.425781,20507.029297,20583.031250,20607.787109,20450.503906,20432.765625
4,0,19530.750000,19534.341797,20433.923828,19612.488281,20277.220703,19808.958984,20103.748047,19723.578125,20003.531250,...,19648.832031,18635.841797,19367.279297,19965.740234,19196.480469,19794.943359,20177.460938,19868.179688,20261.166016,20101.968750
5,0,18978.230469,18663.246094,19755.708984,19856.136719,19900.279297,19653.443359,20015.964844,19090.158203,20634.691406,...,19688.455078,18399.029297,19257.910156,20160.462891,19158.031250,20181.484375,20015.609375,20221.564453,19571.064453,19975.582031
6,0,20491.267578,19875.562500,21123.001953,20375.425781,20977.630859,20919.431641,20639.681641,19787.414062,21139.894531,...,20504.695312,19172.330078,20283.574219,21089.677734,19896.519531,20929.806641,21127.289062,21462.998047,20870.609375,20760.570312
7,0,19962.316406,19812.443359,20665.583984,20047.921875,20713.982422,20258.673828,20200.500000,19569.189453,21002.683594,...,20204.921875,19005.607422,19486.060547,21404.351562,19486.957031,20159.119141,20784.412109,20690.330078,20540.062500,20433.984375
8,0,19250.896484,19384.732422,20067.013672,20232.304688,20387.125000,20318.275391,19785.632812,19531.726562,20439.966797,...,19999.603516,18836.201172,19522.287109,20166.072266,19999.607422,20453.283203,19994.164062,20376.250000,19978.373047,20716.539062
9,0,18848.146484,18475.363281,20021.603516,19445.378906,19815.628906,19656.150391,19072.974609,18945.976562,19741.343750,...,19246.470703,18354.443359,18881.785156,19636.734375,18792.984375,19023.291016,19355.605469,19744.654297,19208.892578,19533.318359


In [211]:
C2 =np.dot(A,B)
C2[-1,-1]

19769.258

In [190]:
sum((A)[0,:])

4104.3211859306321

In [171]:
C

array([[ 1337.88842773,     0.        ,     0.        , ...,
            0.        ,     0.        ,     0.        ],
       [    0.        ,     0.        ,     0.        , ...,
            0.        ,     0.        ,     0.        ],
       [    0.        ,     0.        ,     0.        , ...,
            0.        ,     0.        ,     0.        ],
       ..., 
       [    0.        ,     0.        ,     0.        , ...,
            0.        ,     0.        ,     0.        ],
       [    0.        ,     0.        ,     0.        , ...,
            0.        ,     0.        ,     0.        ],
       [    0.        ,     0.        ,     0.        , ...,
            0.        ,     0.        ,     0.        ]], dtype=float32)

In [154]:
A[0,0]*B[0,0]

6.0296946

In [81]:
A*B

array([[ 0.13690704,  0.15780275,  0.16038589,  0.14639316,  0.07471563,
         0.17613412,  0.05036956,  0.03969185,  0.12471844,  0.19932678],
       [ 0.30503559,  0.00879565,  0.10697126,  0.52589166,  0.39518183,
         0.02045024,  0.82565844,  0.46636474,  0.00382667,  0.08289061],
       [ 0.15176536,  0.2622833 ,  0.53220826,  0.8946656 ,  0.01307231,
         0.07737056,  0.33479446,  0.1139865 ,  0.15976289,  0.41695428],
       [ 0.55979735,  0.10429091,  0.18455163,  0.65808594,  0.43310434,
         0.26133183,  0.09266995,  0.25462094,  0.05073188,  0.01599277],
       [ 0.15481941,  0.07390842,  0.17031613,  0.29221374,  0.00197615,
         0.77740699,  0.20520084,  0.03498841,  0.14344779,  0.24039632],
       [ 0.36570153,  0.03308162,  0.40504581,  0.17549804,  0.04901603,
         0.17601608,  0.42584431,  0.02565288,  0.27958959,  0.01039498],
       [ 0.25421378,  0.57697403,  0.06310637,  0.61780804,  0.21948349,
         0.0523921 ,  0.32464069,  0.20715676

In [None]:
@cuda.jit(argtypes=[float32[:,:], float32[:,:], float32[:,:]], target='gpu')
def add_matrix_row(A, B, C):
    sA = cuda.shared.array(shape=(tpb, tpb), dtype=float32)
    sB = cuda.shared.array(shape=(tpb, tpb), dtype=float32)

    tx = cuda.threadIdx.x
    ty = cuda.threadIdx.y
    bx = cuda.blockIdx.x
    by = cuda.blockIdx.y
    bw = cuda.blockDim.x
    bh = cuda.blockDim.y

    (x,y) = cuda.grid(2)

    acc = 0.
    for i in range(bpg):
        if x < n and y < n:
            sA[ty, tx] = A[y, tx + i * tpb]
            sB[ty, tx] = B[ty + i * tpb, x]

        cuda.syncthreads()

        if x < n and y < n:
            for j in range(tpb):
                acc += sA[ty, j] * sB[j, tx]

        cuda.syncthreads()

    if x < n and y < n:
        C[y, x] = acc

In [69]:
A[0,:]

array([ 0.36751449,  0.72761947,  0.83771068,  0.69905257,  0.96372646,
        0.86328846,  0.13826723,  0.42228767,  0.25019705,  0.3150101 ], dtype=float32)

In [44]:
A*B

array([[ 0.13690704,  0.15780275,  0.16038589,  0.14639316,  0.07471563,
         0.17613412,  0.05036956,  0.03969185,  0.12471844,  0.19932678],
       [ 0.30503559,  0.00879565,  0.10697126,  0.52589166,  0.39518183,
         0.02045024,  0.82565844,  0.46636474,  0.00382667,  0.08289061],
       [ 0.15176536,  0.2622833 ,  0.53220826,  0.8946656 ,  0.01307231,
         0.07737056,  0.33479446,  0.1139865 ,  0.15976289,  0.41695428],
       [ 0.55979735,  0.10429091,  0.18455163,  0.65808594,  0.43310434,
         0.26133183,  0.09266995,  0.25462094,  0.05073188,  0.01599277],
       [ 0.15481941,  0.07390842,  0.17031613,  0.29221374,  0.00197615,
         0.77740699,  0.20520084,  0.03498841,  0.14344779,  0.24039632],
       [ 0.36570153,  0.03308162,  0.40504581,  0.17549804,  0.04901603,
         0.17601608,  0.42584431,  0.02565288,  0.27958959,  0.01039498],
       [ 0.25421378,  0.57697403,  0.06310637,  0.61780804,  0.21948349,
         0.0523921 ,  0.32464069,  0.20715676

In [43]:
np.dot(A,B)

array([[ 2.24099922,  1.77301455,  2.65949869,  3.43336821,  1.68796277,
         2.40511727,  3.49401712,  2.20606256,  1.98351812,  2.77357054],
       [ 2.48035407,  1.72320306,  1.65477026,  2.66861963,  1.54199958,
         1.8704443 ,  2.49201965,  1.46566105,  2.24080086,  3.25429797],
       [ 3.38507628,  2.40767503,  3.18510532,  4.09742975,  2.43625021,
         2.32860541,  3.69326687,  2.48687434,  2.4077003 ,  3.99572086],
       [ 2.14462924,  1.73311913,  1.73244095,  3.1824019 ,  1.37521064,
         2.0045383 ,  2.73703408,  1.64927995,  2.3570044 ,  2.84271145],
       [ 2.17978024,  1.00682056,  1.92636001,  1.84516883,  1.21978295,
         1.40635645,  2.16311431,  0.92149639,  1.61307859,  2.14134169],
       [ 2.69269514,  1.97830415,  3.03063416,  3.66912627,  1.79989159,
         2.61569238,  3.8501749 ,  2.49782038,  2.56505466,  3.89123082],
       [ 2.77370715,  1.84013093,  2.6381259 ,  3.55149531,  2.05855346,
         2.46567845,  3.65449286,  2.25237131

In [42]:
C

array([[ 0.13690704,  0.15780275,  0.16038589,  0.14639316,  0.07471563,
         0.17613412,  0.05036956,  0.03969185,  0.12471844,  0.19932678],
       [ 0.30503559,  0.00879565,  0.10697126,  0.52589166,  0.39518183,
         0.02045024,  0.82565844,  0.46636474,  0.00382667,  0.08289061],
       [ 0.15176536,  0.2622833 ,  0.53220826,  0.8946656 ,  0.01307231,
         0.07737056,  0.33479446,  0.1139865 ,  0.15976289,  0.41695428],
       [ 0.55979735,  0.10429091,  0.18455163,  0.65808594,  0.43310434,
         0.26133183,  0.09266995,  0.25462094,  0.05073188,  0.01599277],
       [ 0.15481941,  0.07390842,  0.17031613,  0.29221374,  0.00197615,
         0.77740699,  0.20520084,  0.03498841,  0.14344779,  0.24039632],
       [ 0.36570153,  0.03308162,  0.40504581,  0.17549804,  0.04901603,
         0.17601608,  0.42584431,  0.02565288,  0.27958959,  0.01039498],
       [ 0.25421378,  0.57697403,  0.06310637,  0.61780804,  0.21948349,
         0.0523921 ,  0.32464069,  0.20715676

In [94]:
import os
#os.environ['NUMBA_ENABLE_CUDASIM'] = '1'
A = np.random.rand(10,10).astype(np.float32)
B = np.random.rand(10,10).astype(np.float32)
C = np.zeros([10,10],dtype=np.float32)
griddim = 1,1
blockdim = 10,10
matmul[griddim,blockdim](A,B,C)

In [95]:
C

array([[  4.65442896e-01,   5.05892813e-01,   4.61268425e-01,
          3.06628466e-01,   3.16563070e-01,   8.20996240e-02,
          2.12497041e-01,   5.60549438e-01,   4.87776287e-02,
          6.56635940e-01],
       [  6.50183082e-01,   8.60133115e-03,   3.25698406e-01,
          7.26709003e-03,   1.63424209e-01,   2.04963788e-01,
          1.47900492e-01,   9.15236771e-03,   1.00388443e-02,
          2.15082556e-01],
       [  8.77482533e-01,   1.46281436e-01,   2.64463760e-02,
          5.04277408e-01,   5.58434352e-02,   3.26909453e-01,
          1.02571761e-02,   1.07958458e-01,   5.55694140e-02,
          9.25844163e-02],
       [  7.04561844e-02,   8.91316608e-02,   8.59826431e-02,
          2.78028190e-01,   3.56522165e-02,   7.30443239e-01,
          3.83448064e-01,   1.27421901e-01,   9.70021822e-03,
          1.15200207e-01],
       [  7.35897347e-02,   8.77224877e-02,   7.98180476e-02,
          2.80209512e-01,   6.70158148e-01,   4.91348714e-01,
          2.88769379e-02

In [22]:
np.dot(A,B)

array([[ 3.00476265,  3.35871077,  1.57399905,  2.55752969,  3.00688791,
         2.27018642,  2.02393222,  2.84381723,  3.09884644,  2.70224047],
       [ 3.06552148,  2.13093853,  1.91202402,  1.95552182,  2.48850965,
         2.09819245,  2.561831  ,  2.72748637,  1.94387591,  2.00002241],
       [ 3.35360765,  3.01639795,  2.27619982,  3.10811663,  2.86711287,
         2.57682347,  2.71806097,  2.96942496,  3.2294817 ,  2.86511517],
       [ 3.58684826,  3.20459461,  2.2715168 ,  2.51269007,  3.06844592,
         2.94995522,  2.59395194,  2.98315334,  2.74776602,  3.01982665],
       [ 3.38010526,  3.17983675,  2.05682373,  3.17262721,  2.69850636,
         2.57781267,  3.08232498,  3.18620706,  3.22671032,  2.73384833],
       [ 3.33356118,  2.64442015,  1.95927763,  1.78774965,  2.70843363,
         2.60898948,  2.43013287,  2.9552381 ,  2.20211005,  2.372895  ],
       [ 3.6653831 ,  3.20907593,  2.18912148,  2.86173725,  3.22517419,
         2.85678101,  3.08049345,  3.35723352

In [20]:
dotKernel(np.zeros([16,16]),np.ones([16,16]),np.ones([16,16]))

NameError: name 'dotKernel' is not defined

In [396]:
num/1025228867564895324627517087881976107L

0L

In [382]:
933257502289211727197297134235659092L/2

466628751144605863598648567117829546L

In [345]:
reduce(lambda x,y: x*y,mvect)

105L

In [346]:
np.product(mvect)

105

In [333]:
M
vv

[2, 0, 2]

In [334]:
np.dot(M,vv)

4

In [335]:
print mvect
print vv

[3L, 5L, 7L]
[2, 0, 2]


In [336]:
homer_scheme(mvect,vv,0) 

137L

In [337]:
2+(3*(0+5*(2+7)))%()

32

5150339186250806210346679687503131313123131L + 2

In [295]:
#print map(int,mvect)
print residues
print residues2

[2L, 2L, 4L]
[2, 2, 4]


In [52]:
51503391862508062103466796875023131231231L/1000

51503391862508062103466796875023131231L

In [51]:
51503391862508062103466796875023131231L + r

51503391862508062103466796875023131462L

In [None]:
def mod(a,b):
    # we seek to find an x such that a = bx+r
    

In [None]:
51503391862508062103466796875023131L

In [42]:
3*int(floor(float(7)/3)) + 7%3

7

In [37]:
3*int(floor(Decimal(51503391862508062103466796875023131)/Decimal(3))) + 1

51503391862508065409344126788304897L

In [15]:
a=3L
Decimal(a)

Decimal('3')

In [16]:
print residues
print residues2

[1, 1, 5, 4, 0, 12, 10, 3, 26, 13, 35, 35, 38, 16, 36, 50, 34, 9, 24, 14, 34, 18, 66, 56]
[1, 1, 5, 4, 0, 12, 10, 3, 26, 13, 35, 35, 38, 16, 38, 28, 39, 46, 24, 38, 43, 81, 29, 79]


In [1130]:

len([2, 0, 3, 3, 10, 12, 16, 6, 26, 27, 27, 2, 28, 20, 35])

15

In [1113]:
num%96

12L

In [1093]:
from math import floor

In [1097]:
100*int(floor(num/100))+num%100

524822996828991184901557452800L

In [1098]:
100*int(floor(float(num)/100)) + (num%100)

524822996828991184901557452800L

In [146]:
num2 = cra_incremental(residues,mvect)
mvect3 = get_mod_primes(10000,num2).tolist()
residues3 =[int(num2%m) for m in mvect3] 
print residues3

[1, 1, 5, 4, 0, 12, 10, 3, 26, 13, 35, 35, 38, 16, 36, 50, 34, 9, 24, 14, 34, 18, 66, 56]


In [148]:
num2

51503391862508062103466796875023131L

In [966]:
num3 = u
mvect3 = get_mod_primes(10000,num3).tolist()
residues3 =[int(num3%m) for m in mvect3] 

In [147]:
print residues
print residues2
print residues3
#print mvect
#print mvect2

[1L, 1L, 5L, 4L, 0L, 12L, 10L, 3L, 26L, 13L, 35L, 35L, 38L, 16L, 36L, 50L, 34L, 9L, 24L, 14L, 34L, 18L, 66L, 56L]
[1, 1, 5, 4, 0, 12, 10, 3, 26, 13, 35, 35, 38, 16, 36, 26, 15, 0, 24, 29, 15, 25, 65, 34]
[1, 1, 5, 4, 0, 12, 10, 3, 26, 13, 35, 35, 38, 16, 36, 50, 34, 9, 24, 14, 34, 18, 66, 56]


In [831]:
[1, 1, 5, 4, 0, 12, 10, 3, 26, 13, 35, 35, 38, 16, 36, 50, 34, 9, 24, 14, 34, 18, 66, 56]


14

In [691]:
-28 % 95

67

In [660]:
-35 % 97

62

In [632]:
gamma[0] + gamma[1]*mvect[1]

3735

In [569]:

c = [0]
for i in range(1,len(mvect)):
    Mmult = reduce(lambda x,y: x*y,mvect[0:i+1])
    c.append(EGCD(Mmult,mvect[i])[1])

In [525]:
c

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [503]:
i = 
Mmult = reduce(lambda x,y: x*y, mvect[:i-1])

  from ipykernel import kernelapp as app


In [485]:
mvect.tolist()[:-1]

[3L,
 5L,
 7L,
 11L,
 13L,
 17L,
 19L,
 23L,
 29L,
 31L,
 37L,
 41L,
 43L,
 47L,
 53L,
 59L,
 61L,
 67L,
 71L,
 73L]

In [444]:
(coeff,var) = parsePolynomialString('51251251251225125105123123 x - 213213213123213232313232321')
(coeff2,var2) = parsePolynomialString('5125125215215215125125215125 y x + 231232132132132131231212313 x + 23151251251')

In [446]:
buildDegrees(var2)

[[1, 1], [1], [0]]

In [447]:
ss

array([u'y x', u'x', u'0'], 
      dtype='<U28')

In [470]:
acoeffs,avars = parsePolynomialString('7 x^2 + 5')
bcoeffs,bvars = parsePolynomialString('50 y^1000 z^40 x^3 - 3')

mm = get_mod_primes(100,M*2)

adegree = max([np.sum(_) for _ in buildDegrees(avars)])
bdegree = max([np.sum(_) for _ in buildDegrees(bvars)])

In [472]:
bdegree

1043

In [975]:
'''Lets take a look at some modular arthimitic'''
num = 51503391862508062103466796875023131
mvect = get_mod_primes(10000,num)
residues =[int(num%m) for m in mvect] 
num2 = cra_incremental(residues,mvect.tolist())
print num2

430945705614427796230659834487134286


In [976]:
mvect2 = get_mod_primes(10000,num2)
residues2 =[int(num2%m) for m in mvect2]

In [977]:
print mvect
print mvect2

[ 3  5  7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97]
[ 3  5  7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97]


In [978]:
print residues
print residues2

[1, 1, 5, 4, 0, 12, 10, 3, 26, 13, 35, 35, 38, 16, 36, 50, 34, 9, 24, 14, 34, 18, 66, 56]
[1, 1, 5, 4, 0, 12, 10, 3, 26, 13, 35, 35, 38, 16, 38, 39, 58, 59, 6, 14, 54, 20, 66, 56]


In [979]:
len(residues2)

24

In [430]:
reduce(lambda x,y:x*y,mvect2.tolist()) 

11884370948172775385325268800679155L

In [438]:
mvect

array([ 3,  5,  7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61,
       67, 71, 73, 79, 83], dtype=int64)

In [288]:
np.log(100000000000000000)/np.log(2)

56.472777613085164

In [294]:
2L**56

72057594037927936L

In [255]:
reduce(lambda x,y: x*y,mvect.tolist()) > num

True

In [286]:
np.log(4)/np.log(2)

2.0

In [195]:
mvect

array([ 3,  5,  7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61,
       67, 71, 73, 79], dtype=int64)

In [185]:
print reduce(lambda x,y: x*y,mvect.tolist()) 
print num

3929160775540133527939545
176717776189025974273681640625


[3L,
 5L,
 7L,
 11L,
 13L,
 17L,
 19L,
 23L,
 29L,
 31L,
 37L,
 41L,
 43L,
 47L,
 53L,
 59L,
 61L,
 67L,
 71L,
 73L,
 79L]

In [226]:
assert(1==2)

AssertionError: 

In [148]:
cra_incremental([4,0],[5,7])

14.0

In [None]:
307444891294245705

1608822383670336453949542277065L

AttributeError: 'module' object has no attribute 'astype'

In [31]:
[np.sum(_) for _ in buildDegrees(bvars)]

[1043, 0]

In [28]:
reduce(lambda x,y:x+y,)

1043

In [26]:
reduce(sum,buildDegrees(bvars))

TypeError: can only concatenate list (not "int") to list

In [14]:
degreeList for degreeList in buildDegrees(bvars)]

[[1000, 40, 3], [0]]

In [212]:
varsfull = bvars


hey


In [213]:
degreeLists

[[1000, 3], [0]]

In [179]:
findDegreeUnivariate(avars[0])

2

In [197]:
test = u"y^5 x^3"

In [200]:
[findDegreeUnivariate(x) for x in test.split()]

[5, 3]

In [186]:
for vars in avars:
     print findDegreeUnivariate(vars)

5
0


In [135]:
findDegreeUnivariate(v)

u'3'

In [122]:
start_ind == len(v) -1

True

In [115]:
(not c.isnumeric() and end_ind == -1 and not start_ind  == -1) or i == len(v)-1

True

2

In [107]:
start_i

2

In [81]:
calcDegree(varString):
    
    

array([u'y x^3', u'0'], 
      dtype='<U5')

In [60]:
''' For our first example we consider the problem of multiplying the two polynomials a*b (7x+5)(2x-3) '''
a = np.array([7,5])
b = np.array([2,-3])



# For multiplication the degree result is simply the degree of a and b.
range(1,degreeResult)





In [64]:
samplePolyMathematica 

'176717776189025974273681640625 - 524822996828991154174804687500 x^4 + 515033918625080621034667968750 x^8 - 163334210731862516738592187500 x^12 - 2736239170293783025519921875 x^16 - 853567945260137903985750000 x^20 - 3332100748804904946375000 x^24 - 1366093540832634465390000 x^28 + 19097129680976733491250 x^32 - 1070178180326028037800 x^36 + 30307759123324511700 x^40 - 684287794892518696 x^44 + 15919723498675794 x^48 - 261616202648496 x^52 + 4039702363304 x^56 - 52707284976 x^60 + 541355421 x^64 - 5241644 x^68 + 36846 x^72 - 204 x^76 + x^80 - 524822996828991154174804687500 y^4 + 1358410577665290835983398437500 x^4 y^4 - 1121410871803655000470926562500 x^8 y^4 + 268366792559331875120012632500 x^12 y^4 + 18009207366928841424033090000 x^16 y^4 + 1409967474860596621252172400 x^20 y^4 + 37093896071304807884698800 x^24 y^4 + 277169016416649937196688 x^28 y^4 - 45399599055456194122344 x^32 y^4 - 2051064742836371588856 x^36 y^4 - 48917341932402832184 x^40 y^4 + 719118621286728408 x^44 y^4 + 72

In [33]:
primesfrom3to(10000)

array([   3,    5,    7, ..., 9949, 9967, 9973], dtype=int64)

In [None]:
from numba import cuda
from numba import float32,int32,int64
import numpy as np

@cuda.jit('void(int64[:],int64[:])')
def reduceModuloPolynomial(a,b):
    i = cuda.grid(1)
    if i < len(a):
        b[i] = a[i]%7
        


In [None]:
''' We need a way to efficiently'''

@cuda.reduce
def 

In [None]:
N = 10
a = np.ones(N,np.int64)*15
b = np.zeros(N,dtype = np.int64)
reduceModuloPolynomial[1,10](a,b)