Skip to content
Browse files

Add timings for trmv family.

  • Loading branch information...
1 parent bdcf745 commit bf3bbeb30cc188b1e1f6f9afe1f3b9afc7f89a6d @dpo dpo committed
Showing with 110 additions and 0 deletions.
  1. +55 −0 tokyo/_double_speed.pyx
  2. +55 −0 tokyo/_single_speed.pyx
View
55 tokyo/_double_speed.pyx
@@ -56,6 +56,7 @@ for size in test_sizes:
print
dgemv_speed(size); print
dsymv_speed(size); print
+ dtrmv_speed(size); print
dger_speed(size); print
print
@@ -360,6 +361,60 @@ cdef dsymv_speed( int size ):
print "%9.0f kc/s %5.1fx" % (rate/1000,rate/np_rate)
+# Double precision triangular matrix vector product: x <- A * x
+
+cdef dtrmv_speed( int size ):
+
+ cdef int i, loops
+
+ loops = speed_base*10/(<int>(size**1.2))
+
+ A = np.array( np.random.random( (size,size) ), dtype=np.float64 )
+ x = np.array( np.random.random( (size) ), dtype=np.float64 )
+ for i in range(size):
+ for j in range(size):
+ if j > i: A[i,j] = 0
+
+ cdef np.ndarray[double, ndim=2, mode='c'] A_
+ cdef np.ndarray[double, ndim=1, mode='c'] x_
+ A_ = A; x_ = x
+
+ print "numpy.dot: ",
+ start = time.clock()
+ for i in range(loops):
+ x = np.dot(A,x)
+ np_rate = loops/(time.clock()-start)
+ print "%9.0f kc/s" % (np_rate/1000)
+
+ loops *= 3
+
+ print "dtrmv: ",
+ start = time.clock()
+ for i in range(loops):
+ tokyo.dtrmv( A, x )
+ rate = loops/(time.clock()-start)
+ print "%9.0f kc/s %5.1fx" % (rate/1000,rate/np_rate)
+
+ loops *= 5
+
+ print "dtrmv6: ",
+ start = time.clock()
+ for i in range(loops):
+ tokyo.dtrmv6( tokyo.CblasRowMajor, tokyo.CblasLower, tokyo.CblasNoTrans,
+ tokyo.CblasNonUnit, A, x )
+ rate = loops/(time.clock()-start)
+ print "%9.0f kc/s %5.1fx" % (rate/1000,rate/np_rate)
+
+ print "dtrmv_: ",
+ start = time.clock()
+ for i in range(loops):
+ tokyo.dtrmv_( tokyo.CblasRowMajor, tokyo.CblasLower, tokyo.CblasNoTrans,
+ tokyo.CblasNonUnit, A_.shape[1], <double*>A_.data,
+ A_.shape[1], <double*>x_.data, 1 )
+ rate = loops/(time.clock()-start)
+ print "%9.0f kc/s %5.1fx" % (rate/1000,rate/np_rate)
+
+
# double precision vector outer-product: A = alpha * outer_product( x, y.T )
View
55 tokyo/_single_speed.pyx
@@ -58,6 +58,7 @@ for size in test_sizes:
print
sgemv_speed(size); print
ssymv_speed(size); print
+ strmv_speed(size); print
sger_speed(size); print
@@ -382,6 +383,60 @@ cdef ssymv_speed( int size ):
print "%9.0f kc/s %5.1fx" % (rate/1000,rate/np_rate)
+# Single precision triangular matrix vector product: x <- A * x
+
+cdef strmv_speed( int size ):
+
+ cdef int i, loops
+
+ loops = speed_base*10/(<int>(size**1.2))
+
+ A = np.array( np.random.random( (size,size) ), dtype=np.float32 )
+ x = np.array( np.random.random( (size) ), dtype=np.float32 )
+ for i in range(size):
+ for j in range(size):
+ if j > i: A[i,j] = 0
+
+ cdef np.ndarray[float, ndim=2, mode='c'] A_
+ cdef np.ndarray[float, ndim=1, mode='c'] x_
+ A_ = A; x_ = x
+
+ print "numpy.dot: ",
+ start = time.clock()
+ for i in range(loops):
+ x = np.dot(A,x)
+ np_rate = loops/(time.clock()-start)
+ print "%9.0f kc/s" % (np_rate/1000)
+
+ loops *= 3
+
+ print "strmv: ",
+ start = time.clock()
+ for i in range(loops):
+ tokyo.strmv( A, x )
+ rate = loops/(time.clock()-start)
+ print "%9.0f kc/s %5.1fx" % (rate/1000,rate/np_rate)
+
+ loops *= 5
+
+ print "strmv6: ",
+ start = time.clock()
+ for i in range(loops):
+ tokyo.strmv6( tokyo.CblasRowMajor, tokyo.CblasLower, tokyo.CblasNoTrans,
+ tokyo.CblasNonUnit, A, x )
+ rate = loops/(time.clock()-start)
+ print "%9.0f kc/s %5.1fx" % (rate/1000,rate/np_rate)
+
+ print "strmv_: ",
+ start = time.clock()
+ for i in range(loops):
+ tokyo.strmv_( tokyo.CblasRowMajor, tokyo.CblasLower, tokyo.CblasNoTrans,
+ tokyo.CblasNonUnit, A_.shape[1], <float*>A_.data,
+ A_.shape[1], <float*>x_.data, 1 )
+ rate = loops/(time.clock()-start)
+ print "%9.0f kc/s %5.1fx" % (rate/1000,rate/np_rate)
+
+
# single precision vector outer-product: A = alpha * outer_product( x, y.T )
cdef sger_speed( int size ):

0 comments on commit bf3bbeb

Please sign in to comment.
Something went wrong with that request. Please try again.