Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

- Adding source files

  • Loading branch information...
commit b09340af56c076d74a71d8f24233492e08f8ff87 1 parent 00ab65b
@mikecvet authored
View
15 CMakeLists.txt
@@ -0,0 +1,15 @@
+PROJECT (STRASSEN)
+
+INCLUDE_DIRECTORIES(${STRASSEN}/src)
+
+FIND_LIBRARY(MATH m)
+FIND_LIBRARY(PTHREAD pthread)
+
+SET(CMAKE_CXX_FLAGS "-O2 -fforce-addr -march=native -Wall -DNDEBUG")
+
+ADD_EXECUTABLE(test_strassen_matrix
+ src/util/timer.cpp
+ src/test/test_strassen_matrix.cpp
+ )
+
+TARGET_LINK_LIBRARIES(test_strassen_matrix ${MATH} ${PTHREAD})
View
441 src/strassen/matrix.hpp
@@ -0,0 +1,441 @@
+/**
+ *Copyright (C) 2011 by Michael Cvet <http://mikecvet.wordpress.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ **/
+
+#ifndef MATRIX_HPP_
+#define MATRIX_HPP_
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "transpose_matrix_multiplier.hpp"
+#include "strassen_matrix_multiplier.hpp"
+
+namespace strassen
+{
+ template <typename T>
+ class matrix
+ {
+ friend class iterator;
+
+ private:
+ size_t _rows;
+ size_t _cols;
+ T *__matrix;
+
+ strassen::matrix_multiplier<T> *__mm;
+
+ T& __at (size_t i, size_t j);
+ void __mult (T *a, size_t arows, size_t acols, T k);
+ void __mult (T *A, size_t arows, size_t acols, const matrix<T> &b);
+ void __add (T *A, size_t arows, size_t acols, const matrix<T> &b);
+ void __sub (T *A, size_t arows, size_t acols, const matrix<T> &b);
+
+ bool __equal (const matrix<T> &m);
+
+ public:
+ matrix (matrix_multiplier<T> *mm = new strassen_matrix_multiplier<T> ());
+ matrix (size_t h, size_t w, matrix_multiplier<T> *mm = new strassen_matrix_multiplier<T> ());
+ matrix (const matrix<T> &m);
+ ~matrix ();
+
+ void clear ();
+ void zeroes ();
+ void random (uint32_t max = 0);
+
+ T& at (size_t i, size_t j);
+ void mult (T k);
+ void mult (const matrix<T> &m);
+ void add (const matrix<T> &m);
+ void sub (const matrix<T> &m);
+ bool equal (const matrix<T> &m);
+
+ size_t rows () const;
+ size_t cols () const;
+ T* raw_data_copy () const;
+
+ T& operator () (size_t i, size_t j);
+ matrix<T>& operator * (T k);
+ matrix<T>& operator * (const matrix<T> &m);
+ matrix<T>& operator + (const matrix<T> &m);
+ matrix<T>& operator - (const matrix<T> &m);
+ matrix<T>& operator = (const matrix<T> &m);
+ bool operator == (const matrix<T> &m);
+
+ class iterator
+ {
+ private:
+ T *__m;
+ size_t __rows;
+ size_t __cols;
+ size_t __indx;
+ size_t __n;
+
+ public:
+ iterator (matrix<T> &m);
+ ~iterator ();
+
+ T val ();
+ size_t row ();
+ size_t col ();
+ bool ok ();
+ void operator ++ ();
+ };
+ };
+
+ template <typename T>
+ matrix<T>::matrix (matrix_multiplier<T> *mm)
+ : _rows (0),
+ _cols (0),
+ __matrix (NULL),
+ __mm (mm)
+ {
+ }
+
+ template <typename T>
+ matrix<T>::matrix (size_t rows, size_t cols, matrix_multiplier<T> *mm)
+ : _rows (rows),
+ _cols (cols),
+ __mm (mm)
+ {
+ __matrix = (T *) malloc (_rows * _cols * sizeof (T));
+ }
+
+ template <typename T>
+ matrix<T>::matrix (const matrix<T> &m)
+ : _rows (m.rows ()),
+ _cols (m.cols ()),
+ __mm (m.__mm->copy ())
+ {
+ __matrix = m.raw_data_copy ();
+ }
+
+ template <typename T>
+ matrix<T>::~matrix ()
+ {
+ if (__matrix)
+ {
+ free (__matrix);
+ __matrix = NULL;
+ delete __mm;
+ }
+ }
+
+ template <typename T>
+ void
+ matrix<T>::clear ()
+ {
+ if (__matrix)
+ {
+ _rows = 0;
+ _cols = 0;
+
+ free (__matrix);
+ __matrix = NULL;
+ }
+ }
+
+ template <typename T>
+ T&
+ matrix<T>::at (size_t i, size_t j)
+ {
+ return (__at (i, j));
+ }
+
+ template <typename T>
+ void
+ matrix<T>::zeroes ()
+ {
+ memset (__matrix, 0, _rows * _cols * sizeof (T));
+ }
+
+ template <typename T>
+ void
+ matrix<T>::random (uint32_t max)
+ {
+ size_t n = _rows * _cols;
+
+ srand (time (NULL));
+
+ if (!max)
+ {
+ for (size_t i = 0; i < n; i++)
+ __matrix[i] = rand ();
+ }
+ else
+ {
+ for (size_t i = 0; i < n; i++)
+ __matrix[i] = (rand () % max) + 1;
+ }
+ }
+
+ template <typename T>
+ size_t
+ matrix<T>::rows () const
+ {
+ return _rows;
+ }
+
+ template <typename T>
+ size_t
+ matrix<T>::cols () const
+ {
+ return _cols;
+ }
+
+ template <typename T>
+ T*
+ matrix<T>::raw_data_copy () const
+ {
+ T *t = (T *) malloc (_rows * _cols * sizeof (T));
+ memcpy (t, __matrix, (_rows * _cols * sizeof (T)));
+
+ return t;
+ }
+
+ template <typename T>
+ void
+ matrix<T>::mult (T k)
+ {
+ __mult (__matrix, _rows, _cols, k);
+ }
+
+ template <typename T>
+ void
+ matrix<T>::mult (const matrix<T> &m)
+ {
+ T *C = __mm -> mult (__matrix, m.__matrix, _rows, _cols, m.rows (), m.cols ());
+
+ if (C)
+ {
+ free (__matrix);
+
+ __matrix = C;
+ _cols = _rows;
+ }
+ }
+
+ template <typename T>
+ void
+ matrix<T>::add (const matrix<T> &m)
+ {
+ __add (__matrix, _rows, _cols, m);
+ }
+
+ template <typename T>
+ void
+ matrix<T>::sub (const matrix<T> &m)
+ {
+ __sub (__matrix, _rows, _cols, m);
+ }
+
+ template <typename T>
+ bool
+ matrix<T>::equal (const matrix<T> &m)
+ {
+ return (__equal (m));
+ }
+
+ template <typename T>
+ T&
+ matrix<T>::operator () (size_t i, size_t j)
+ {
+ return (__at (i , j));
+ }
+
+ template <typename T>
+ matrix<T>&
+ matrix<T>::operator * (T k)
+ {
+ __mult (__matrix, _rows, _cols, k);
+ return (*this);
+ }
+
+ template <typename T>
+ matrix<T>&
+ matrix<T>::operator * (const matrix<T> &m)
+ {
+ __mult (__matrix, _rows, _cols, m);
+ return (*this);
+ }
+
+ template <typename T>
+ matrix<T>&
+ matrix<T>::operator + (const matrix<T> &m)
+ {
+ __add (__matrix, _rows, _cols, m);
+ return (*this);
+ }
+
+ template <typename T>
+ matrix<T>&
+ matrix<T>::operator - (const matrix<T> &m)
+ {
+ __sub (__matrix, _rows, _cols, m);
+ return (*this);
+ }
+
+ template <typename T>
+ matrix<T>&
+ matrix<T>::operator = (const matrix<T> &m)
+ {
+ if (__matrix)
+ free (__matrix);
+
+ _rows = m.rows ();
+ _cols = m.cols ();
+ __matrix = m.raw_data_copy ();
+
+ return (*this);
+ }
+
+ template <typename T>
+ bool
+ matrix<T>::operator == (const matrix<T> &m)
+ {
+ return (__equal (m));
+ }
+
+ template <typename T>
+ T&
+ matrix<T>::__at (size_t i, size_t j)
+ {
+ return (__matrix[(i * _cols) + j]);
+ }
+
+ template <typename T>
+ void
+ matrix<T>::__mult (T *A, size_t arows, size_t acols, T k)
+ {
+ size_t n = arows * acols;
+
+ for (size_t i = 0; i < n; i++)
+ A[i] = A[i] * k;
+ }
+
+ template <typename T>
+ void
+ matrix<T>::__add (T *A, size_t arows, size_t acols, const matrix<T> &b)
+ {
+ if (_rows == b.rows () && _cols == b.cols ())
+ {
+ size_t n = _rows * _cols;
+ T *B = b.__matrix;
+
+ for (size_t i = 0; i < n; i++)
+ __matrix[i] = A[i] + B[i];
+ }
+ //else throw exception
+ }
+
+ template <typename T>
+ void
+ matrix<T>::__sub (T *A, size_t arows, size_t acols, const matrix<T> &b)
+ {
+ if (_rows == b.rows () && _cols == b.cols ())
+ {
+ size_t n = _rows * _cols;
+ T *B = b.__matrix;
+
+ for (size_t i = 0; i < n; i++)
+ __matrix[i] = A[i] - B[i];
+ }
+ //else throw exception
+ }
+
+ template <typename T>
+ bool
+ matrix<T>::__equal (const matrix<T> &m)
+ {
+ if (_rows == m.rows () && _cols == m.cols ())
+ {
+ size_t n = _rows * _cols;
+ T *B = m.__matrix;
+
+ for (size_t i = 0; i < n; i++)
+ {
+ if (__matrix[i] != B[i])
+ {
+ fprintf (stderr, "failure: %d\n",__matrix[i]);
+ return false;
+ }
+ }
+
+ return true;
+ }
+ else
+ return false;
+ }
+
+ template <typename T>
+ matrix<T>::iterator::iterator (matrix<T> &m)
+ : __m (m.__matrix),
+ __rows (m.rows ()),
+ __cols (m.cols ()),
+ __indx (0)
+ {
+ __n = __rows * __cols;
+ }
+
+ template <typename T>
+ matrix<T>::iterator::~iterator ()
+ {
+ }
+
+ template <typename T>
+ T
+ matrix<T>::iterator::val ()
+ {
+ return (__m[__indx]);
+ }
+
+ template <typename T>
+ size_t
+ matrix<T>::iterator::row ()
+ {
+ return (__indx / __rows);
+ }
+
+ template <typename T>
+ size_t
+ matrix<T>::iterator::col ()
+ {
+ return (__indx % __cols);
+ }
+
+ template <typename T>
+ bool
+ matrix<T>::iterator::ok ()
+ {
+ return (__indx < __n);
+ }
+
+ template <typename T>
+ void
+ matrix<T>::iterator::operator ++ ()
+ {
+ ++__indx;
+ }
+}
+
+#endif /* MATRIX_HPP_ */
View
37 src/strassen/matrix_multiplier.hpp
@@ -0,0 +1,37 @@
+/**
+ *Copyright (C) 2011 by Michael Cvet <http://mikecvet.wordpress.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ **/
+
+#ifndef MATRIX_MULTIPLIER_HPP_
+#define MATRIX_MULTIPLIER_HPP_
+
+namespace strassen
+{
+ template <typename T>
+ class matrix_multiplier
+ {
+ public:
+ virtual T* mult (const T *a, const T *b, size_t arows, size_t acols, size_t brows, size_t bcols) = 0;
+ virtual matrix_multiplier<T>* copy () const = 0;
+ };
+}
+
+#endif /* MATRIX_MULTIPLIER_HPP_ */
View
101 src/strassen/naive_matrix_multiplier.hpp
@@ -0,0 +1,101 @@
+/**
+ *Copyright (C) 2011 by Michael Cvet <http://mikecvet.wordpress.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ **/
+
+#ifndef NAIVE_MATRIX_MULTIPLIER_HPP_
+#define NAIVE_MATRIX_MULTIPLIER_HPP_
+
+#include "matrix_multiplier.hpp"
+
+namespace strassen
+{
+ template <typename T>
+ class naive_matrix_multiplier : public strassen::matrix_multiplier<T>
+ {
+ public:
+ naive_matrix_multiplier ();
+ ~naive_matrix_multiplier ();
+
+ T* mult (const T *a, const T *b, size_t arows, size_t acols, size_t brows, size_t bcols);
+ matrix_multiplier<T>* copy () const;
+ };
+
+ template <typename T>
+ naive_matrix_multiplier<T>::naive_matrix_multiplier ()
+ {
+ }
+
+ template <typename T>
+ naive_matrix_multiplier<T>::~naive_matrix_multiplier ()
+ {
+ }
+
+ template <typename T>
+ matrix_multiplier<T>*
+ naive_matrix_multiplier<T>::copy () const
+ {
+ return (new naive_matrix_multiplier<T> ());
+ }
+
+ template <typename T>
+ T*
+ naive_matrix_multiplier<T>::mult (const T *A, const T *B,
+ size_t arows, size_t acols,
+ size_t brows, size_t bcols)
+ {
+ if (arows == bcols)
+ {
+ T t;
+ size_t m = arows;
+ size_t n = acols;
+ size_t im;
+
+ T *C = (T *) malloc (m * m * sizeof (T));
+ const T *a_row = NULL;
+
+ for (size_t i = 0; i < m; i++)
+ {
+ im = i * m;
+ a_row = &A[i * acols];
+
+ for (size_t j = 0; j < m; j++)
+ {
+ t = 0;
+
+ for (size_t k = 0; k < n; k++)
+ {
+ t += (a_row[k] * B[(k * brows) + j]);
+ }
+
+ C[im++] = t;
+ }
+ }
+
+ return C;
+ }
+ else
+ arows = 0;
+
+ return NULL;
+ }
+}
+
+#endif /* NAIVE_MATRIX_MULTIPLIER_HPP_ */
View
372 src/strassen/parallel_strassen_matrix_multiplier.hpp
@@ -0,0 +1,372 @@
+/**
+ *Copyright (C) 2011 by Michael Cvet <http://mikecvet.wordpress.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ **/
+
+#ifndef PARALLEL_STRASSEN_MATRIX_MULTIPLIER_HPP_
+#define PARALLEL_STRASSEN_MATRIX_MULTIPLIER_HPP_
+
+#include <cmath>
+#include "matrix_multiplier.hpp"
+#include "strassen_matrix_multiplier.hpp"
+#include "transpose_matrix_multiplier.hpp"
+
+namespace strassen
+{
+ template <typename T>
+ class psmm_pair
+ {
+ public:
+ T *A;
+ T *B;
+ T *C;
+ size_t m;
+ void *pmm;
+ int j;
+ };
+
+ template <typename T>
+ class parallel_strassen_matrix_multiplier : public strassen::strassen_matrix_multiplier<T>
+ {
+ private:
+ bool __loop;
+ size_t __cntr;
+ size_t __nthreads;
+ pthread_t *__threads;
+ pthread_mutex_t *__lock;
+ pthread_cond_t *__cond[7];
+ pthread_cond_t *__main_cond;
+ psmm_pair<T>* __thread_data[7];
+
+ strassen_matrix_multiplier<T> __smm;
+ transpose_matrix_multiplier<T> __tmm;
+
+ T* __mult (const T *A, const T *B, size_t n, uint32_t id);
+
+ public:
+ parallel_strassen_matrix_multiplier ();
+ ~parallel_strassen_matrix_multiplier ();
+
+ T* mult (const T *a, const T *b, size_t arows, size_t acols, size_t brows, size_t bcols);
+ matrix_multiplier<T>* copy () const;
+
+ void thread_loop (int id);
+ };
+
+ template <typename T>
+ void*
+ psmm_thread_entry (void *p)
+ {
+ psmm_pair<T> *pmm = ((psmm_pair<T> *) p);
+
+ ((parallel_strassen_matrix_multiplier<T> *) pmm -> pmm) -> thread_loop (pmm -> j);
+ //free (pmm);
+
+ return NULL;
+ }
+
+ template <typename T>
+ parallel_strassen_matrix_multiplier<T>::parallel_strassen_matrix_multiplier ()
+ {
+ __loop = true;
+ __nthreads = 8;
+ __cntr = __nthreads - 1;
+ __threads = (pthread_t *) malloc ((__nthreads - 1) * sizeof (pthread_t));
+ __lock = (pthread_mutex_t *) malloc (sizeof (pthread_mutex_t));
+ __main_cond = (pthread_cond_t *) malloc (sizeof (pthread_cond_t));
+
+ pthread_mutex_init (__lock, NULL);
+ pthread_cond_init (__main_cond, NULL);
+
+ for (size_t i = 0; i < (__nthreads - 1); i++)
+ {
+ __cond[i] = (pthread_cond_t *) malloc (sizeof (pthread_cond_t));
+ pthread_cond_init (__cond[i], NULL);
+
+ __thread_data[i] = (psmm_pair<T> *) malloc (sizeof (psmm_pair<T>));
+ __thread_data[i] -> pmm = (void *) this;
+ __thread_data[i] -> j = i + 1;
+ pthread_create (&__threads[i], NULL, psmm_thread_entry<T>, (void *) __thread_data[i]);
+ }
+
+ sleep (1);
+ }
+
+ template <typename T>
+ parallel_strassen_matrix_multiplier<T>::~parallel_strassen_matrix_multiplier ()
+ {
+ __loop = false;
+
+ pthread_mutex_lock (__lock);
+
+ for (size_t i = 0; i < (__nthreads - 1); i++)
+ {
+ pthread_cond_signal (__cond[i]);
+
+ pthread_join (__threads[i], NULL);
+
+ free (__thread_data[i]);
+
+ pthread_cond_destroy (__cond[i]);
+ free (__cond[i]);
+ }
+
+ pthread_mutex_unlock (__lock);
+
+ free (__threads);
+ pthread_mutex_destroy (__lock);
+ pthread_cond_destroy (__main_cond);
+
+ free (__lock);
+ free (__main_cond);
+ }
+
+ template <typename T>
+ matrix_multiplier<T>*
+ parallel_strassen_matrix_multiplier<T>::copy () const
+ {
+ return (new parallel_strassen_matrix_multiplier<T> ());
+ }
+
+
+ template <typename T>
+ T*
+ parallel_strassen_matrix_multiplier<T>::mult (const T *m, const T *n,
+ size_t arows, size_t acols,
+ size_t brows, size_t bcols)
+ {
+ if (acols == brows)
+ {
+ if (arows == acols && brows == bcols && !(arows & (arows - 1)))
+ {
+ T *C = __mult (m, n, arows, 0);
+ return C;
+ }
+ else
+ {
+ size_t N;
+ size_t max_term = acols;
+
+ T *A;
+ T *B;
+
+ if (arows >= acols && arows >= brows)
+ max_term = arows;
+ else if (acols >= arows && acols >= bcols)
+ max_term = acols;
+ else if (brows >= bcols && brows >= arows)
+ max_term = brows;
+ else if (bcols >= brows && bcols >= acols)
+ max_term = bcols;
+
+ N = std::pow (2, (size_t) (std::log (max_term) / strassen_matrix_multiplier<T>::__log2) + 1);
+
+ A = __pad (m, arows, acols, N);
+ B = __pad (n, brows, bcols, N);
+
+ T *C = __mult (A, B, N, 0);
+ T *D = __unpad (C, arows, arows, N);
+
+ free (A);
+ free (B);
+ free (C);
+
+ return D;
+ }
+ }
+ return NULL;
+ }
+
+ template <typename T>
+ T*
+ parallel_strassen_matrix_multiplier<T>::__mult (const T *A, const T *B, size_t n, uint32_t id)
+ {
+ if (n <= 128)
+ {
+ return (__tmm.mult (A, B, n, n, n, n));
+ }
+
+ size_t m = n / 2;
+
+ size_t tl_row_start = 0;
+ size_t tl_col_start = 0;
+
+ size_t tr_row_start = 0;
+ size_t tr_col_start = m;
+
+ size_t bl_row_start = m;
+ size_t bl_col_start = 0;
+
+ size_t br_row_start = m;
+ size_t br_col_start = m;
+
+ T *C = (T *) malloc (n * n * sizeof (T));
+
+ T* AA[7];
+ T* BB[7];
+ T* MM[7] = {NULL};
+
+ if (id)
+ {
+ if (!A[0] && !A[1])
+ {
+ if (__zeroes (A, n))
+ {
+ memset (C, 0, n * n * sizeof (T));
+ return C;
+ }
+ }
+
+ if (!B[0] && !B[1])
+ {
+ if (__zeroes (B, n))
+ {
+ memset (C, 0, n * n * sizeof (T));
+ return C;
+ }
+ }
+ }
+
+ for (uint32_t i = 0; i < 7; i++)
+ {
+ AA[i] = (T *) malloc (m * m * sizeof (T));
+ BB[i] = (T *) malloc (m * m * sizeof (T));
+ }
+
+ __submatrix_add (AA[0], A, tl_row_start, tl_col_start, br_row_start, br_col_start, m, n);
+ __submatrix_add (AA[1], A, bl_row_start, bl_col_start, br_row_start, br_row_start, m, n);
+ __submatrix_cpy (AA[2], A, tl_row_start, tl_col_start, m, n);
+ __submatrix_cpy (AA[3], A, br_row_start, br_col_start, m, n);
+ __submatrix_add (AA[4], A, tl_row_start, tl_col_start, tr_row_start, tr_col_start, m, n);
+ __submatrix_sub (AA[5], A, bl_row_start, bl_col_start, tl_row_start, tl_col_start, m, n);
+ __submatrix_sub (AA[6], A, tr_row_start, tr_col_start, br_row_start, br_col_start, m, n);
+
+ __submatrix_add (BB[0], B, tl_row_start, tl_col_start, br_row_start, br_col_start, m, n);
+ __submatrix_cpy (BB[1], B, tl_row_start, tl_col_start, m, n);
+ __submatrix_sub (BB[2], B, tr_row_start, tr_col_start, br_row_start, br_col_start, m, n);
+ __submatrix_sub (BB[3], B, bl_row_start, bl_col_start, tl_row_start, tl_col_start, m, n);
+ __submatrix_cpy (BB[4], B, br_row_start, br_col_start, m, n);
+ __submatrix_add (BB[5], B, tl_row_start, tl_col_start, tr_row_start, tr_col_start, m, n);
+ __submatrix_add (BB[6], B, bl_row_start, bl_col_start, br_row_start, br_col_start, m, n);
+
+ if (m <= 256)
+ {
+ MM[0] = __tmm.mult (AA[0], BB[0], m, m, m, m);
+ MM[1] = __tmm.mult (AA[1], BB[1], m, m, m, m);
+ MM[2] = __tmm.mult (AA[2], BB[2], m, m, m, m);
+ MM[3] = __tmm.mult (AA[3], BB[3], m, m, m, m);
+ MM[4] = __tmm.mult (AA[4], BB[4], m, m, m, m);
+ MM[5] = __tmm.mult (AA[5], BB[5], m, m, m, m);
+ MM[6] = __tmm.mult (AA[6], BB[6], m, m, m, m);
+ }
+ else
+ {
+ if (!id)
+ {
+ pthread_mutex_lock (__lock);
+ __cntr = 7;
+ pthread_mutex_unlock (__lock);
+
+ for (uint32_t i = 0; i < (__nthreads - 1); i++)
+ {
+ __thread_data[i]->A = AA[i];
+ __thread_data[i]->B = BB[i];
+ __thread_data[i]->C = MM[i];
+ __thread_data[i]->m = m;
+
+ pthread_cond_signal (__cond[i]);
+ }
+
+ pthread_mutex_lock (__lock);
+
+ while (__cntr)
+ pthread_cond_wait (__main_cond, __lock);
+
+ MM[0] = __thread_data[0] -> C;
+ MM[1] = __thread_data[1] -> C;
+ MM[2] = __thread_data[2] -> C;
+ MM[3] = __thread_data[3] -> C;
+ MM[4] = __thread_data[4] -> C;
+ MM[5] = __thread_data[5] -> C;
+ MM[6] = __thread_data[6] -> C;
+
+ pthread_mutex_unlock (__lock);
+ }
+ else
+ {
+ MM[0] = __mult (AA[0], BB[0], m, id);
+ MM[1] = __mult (AA[1], BB[1], m, id);
+ MM[2] = __mult (AA[2], BB[2], m, id);
+ MM[3] = __mult (AA[3], BB[3], m, id);
+ MM[4] = __mult (AA[4], BB[4], m, id);
+ MM[5] = __mult (AA[5], BB[5], m, id);
+ MM[6] = __mult (AA[6], BB[6], m, id);
+ }
+ }
+
+ __submatrix_add (C, MM[0], MM[3], tl_row_start, tl_col_start, m, n);
+ __submatrix_sub (C, MM[4], tl_row_start, tl_col_start, m, n);
+ __submatrix_add (C, MM[6], tl_row_start, tl_col_start, m, n);
+
+ __submatrix_add (C, MM[2], MM[4], tr_row_start, tr_col_start, m, n);
+
+ __submatrix_add (C, MM[1], MM[3], bl_row_start, bl_col_start, m, n);
+
+ __submatrix_sub (C, MM[0], MM[1], br_row_start, br_col_start, m, n);
+ __submatrix_add (C, MM[2], br_row_start, br_col_start, m, n);
+ __submatrix_add (C, MM[5], br_row_start, br_col_start, m, n);
+
+ for (uint32_t i = 0; i < 7; i++)
+ {
+ free (AA[i]);
+ free (BB[i]);
+ free (MM[i]);
+ }
+
+ return C;
+ }
+
+ template <typename T>
+ void
+ parallel_strassen_matrix_multiplier<T>::thread_loop (int id)
+ {
+ while (__loop)
+ {
+ pthread_mutex_lock (__lock);
+ --__cntr;
+
+ if (!__cntr)
+ pthread_cond_broadcast (__main_cond);
+
+ pthread_cond_wait (__cond[id - 1], __lock);
+ pthread_mutex_unlock (__lock);
+
+ if (!__loop)
+ break;
+
+ __thread_data[id-1]->C = __mult (__thread_data[id-1]->A,
+ __thread_data[id-1]->B,
+ __thread_data[id-1]->m,
+ id);
+ }
+ }
+}
+
+#endif /* PARALLEL_STRASSEN_MATRIX_MULTIPLIER_HPP_ */
View
475 src/strassen/strassen_matrix_multiplier.hpp
@@ -0,0 +1,475 @@
+/**
+ *Copyright (C) 2011 by Michael Cvet <http://mikecvet.wordpress.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ **/
+
+#ifndef STRASSEN_MATRIX_MULTIPLIER_HPP_
+#define STRASSEN_MATRIX_MULTIPLIER_HPP_
+
+#include <cmath>
+#include "matrix_multiplier.hpp"
+
+namespace strassen
+{
+ template <typename T>
+ class strassen_matrix_multiplier : public strassen::matrix_multiplier<T>
+ {
+ protected:
+ static double __log2;
+ transpose_matrix_multiplier<T> __tmm;
+
+ T* __pad (const T *m, size_t rows, size_t cols, size_t n);
+ T* __unpad (const T *m, size_t rows, size_t cols, size_t n);
+ T* __mult (const T *A, const T *B, size_t n);
+
+ bool __zeroes (const T *A, size_t n);
+
+ void __submatrix_add (T *C, const T *A,
+ size_t a_row_start, size_t a_col_start,
+ size_t b_row_start, size_t b_col_start,
+ size_t m, size_t n);
+
+ void __submatrix_sub (T *C, const T *A,
+ size_t a_row_start, size_t a_col_start,
+ size_t b_row_start, size_t b_col_start,
+ size_t m, size_t n);
+
+ void __submatrix_cpy (T *C, const T *A, size_t start, size_t end, size_t m, size_t n);
+
+ void __submatrix_add (T *C, const T *A, size_t row_start, size_t col_start, size_t m, size_t n);
+ void __submatrix_add (T *C, const T *A, const T *B, size_t row_start, size_t col_start, size_t m, size_t n);
+ void __submatrix_sub (T *C, const T *A, size_t row_start, size_t col_start, size_t m, size_t n);
+ void __submatrix_sub (T *C, const T *A, const T *B, size_t row_start, size_t col_start, size_t m, size_t n);
+
+ public:
+ strassen_matrix_multiplier ();
+ virtual ~strassen_matrix_multiplier ();
+
+ virtual T* mult (const T *a, const T *b, size_t arows, size_t acols, size_t brows, size_t bcols);
+ virtual matrix_multiplier<T>* copy () const;
+ };
+
+ template <typename T>
+ double strassen_matrix_multiplier<T>::__log2 = std::log (2.0);
+
+ template <typename T>
+ strassen_matrix_multiplier<T>::strassen_matrix_multiplier ()
+ {
+ }
+
+ template <typename T>
+ strassen_matrix_multiplier<T>::~strassen_matrix_multiplier ()
+ {
+ }
+
+ template <typename T>
+ matrix_multiplier<T>*
+ strassen_matrix_multiplier<T>::copy () const
+ {
+ return (new strassen_matrix_multiplier<T> ());
+ }
+
+ template <typename T>
+ T*
+ strassen_matrix_multiplier<T>::mult (const T *m, const T *n,
+ size_t arows, size_t acols,
+ size_t brows, size_t bcols)
+ {
+ if (acols == brows)
+ {
+ if (arows == acols && brows == bcols && !(arows & (arows - 1)))
+ {
+ T *C = __mult (m, n, arows);
+ return C;
+ }
+ else
+ {
+ size_t N;
+ size_t max_term = acols;
+
+ T *A;
+ T *B;
+
+ if (arows >= acols && arows >= brows)
+ max_term = arows;
+ else if (acols >= arows && acols >= bcols)
+ max_term = acols;
+ else if (brows >= bcols && brows >= arows)
+ max_term = brows;
+ else if (bcols >= brows && bcols >= acols)
+ max_term = bcols;
+
+ N = std::pow (2, (size_t) (std::log (max_term) / __log2) + 1);
+
+ A = __pad (m, arows, acols, N);
+ B = __pad (n, brows, bcols, N);
+
+ T *C = __mult (A, B, N);
+ T *D = __unpad (C, arows, arows, N);
+
+ free (A);
+ free (B);
+ free (C);
+
+ return D;
+ }
+ }
+
+ return NULL;
+ }
+
+ template <typename T>
+ T*
+ strassen_matrix_multiplier<T>::__mult (const T *A, const T *B, size_t n)
+ {
+ if (n <= 128)
+ {
+ return (__tmm.mult (A, B, n, n, n, n));
+ }
+
+ size_t m = n / 2;
+
+ size_t tl_row_start = 0;
+ size_t tl_col_start = 0;
+
+ size_t tr_row_start = 0;
+ size_t tr_col_start = m;
+
+ size_t bl_row_start = m;
+ size_t bl_col_start = 0;
+
+ size_t br_row_start = m;
+ size_t br_col_start = m;
+
+ T *C = (T *) malloc (n * n * sizeof (T));
+
+ T* AA[7];
+ T* BB[7];
+ T* MM[7];
+
+ if ((!A[0] && !A[1] && __zeroes (A, n)) || (!B[0] && !B[1] && __zeroes (B, n)))
+ {
+ memset (C, 0, n * n * sizeof (T));
+ return C;
+ }
+
+ for (uint32_t i = 0; i < 7; i++)
+ {
+ AA[i] = (T *) malloc (m * m * sizeof (T));
+ BB[i] = (T *) malloc (m * m * sizeof (T));
+ }
+
+ __submatrix_add (AA[0], A, tl_row_start, tl_col_start, br_row_start, br_col_start, m, n);
+ __submatrix_add (AA[1], A, bl_row_start, bl_col_start, br_row_start, br_row_start, m, n);
+ __submatrix_cpy (AA[2], A, tl_row_start, tl_col_start, m, n);
+ __submatrix_cpy (AA[3], A, br_row_start, br_col_start, m, n);
+ __submatrix_add (AA[4], A, tl_row_start, tl_col_start, tr_row_start, tr_col_start, m, n);
+ __submatrix_sub (AA[5], A, bl_row_start, bl_col_start, tl_row_start, tl_col_start, m, n);
+ __submatrix_sub (AA[6], A, tr_row_start, tr_col_start, br_row_start, br_col_start, m, n);
+
+ __submatrix_add (BB[0], B, tl_row_start, tl_col_start, br_row_start, br_col_start, m, n);
+ __submatrix_cpy (BB[1], B, tl_row_start, tl_col_start, m, n);
+ __submatrix_sub (BB[2], B, tr_row_start, tr_col_start, br_row_start, br_col_start, m, n);
+ __submatrix_sub (BB[3], B, bl_row_start, bl_col_start, tl_row_start, tl_col_start, m, n);
+ __submatrix_cpy (BB[4], B, br_row_start, br_col_start, m, n);
+ __submatrix_add (BB[5], B, tl_row_start, tl_col_start, tr_row_start, tr_col_start, m, n);
+ __submatrix_add (BB[6], B, bl_row_start, bl_col_start, br_row_start, br_col_start, m, n);
+
+ MM[0] = __mult (AA[0], BB[0], m);
+ MM[1] = __mult (AA[1], BB[1], m);
+ MM[2] = __mult (AA[2], BB[2], m);
+ MM[3] = __mult (AA[3], BB[3], m);
+ MM[4] = __mult (AA[4], BB[4], m);
+ MM[5] = __mult (AA[5], BB[5], m);
+ MM[6] = __mult (AA[6], BB[6], m);
+
+ __submatrix_add (C, MM[0], MM[3], tl_row_start, tl_col_start, m, n);
+ __submatrix_sub (C, MM[4], tl_row_start, tl_col_start, m, n);
+ __submatrix_add (C, MM[6], tl_row_start, tl_col_start, m, n);
+
+ __submatrix_add (C, MM[2], MM[4], tr_row_start, tr_col_start, m, n);
+
+ __submatrix_add (C, MM[1], MM[3], bl_row_start, bl_col_start, m, n);
+
+ __submatrix_sub (C, MM[0], MM[1], br_row_start, br_col_start, m, n);
+ __submatrix_add (C, MM[2], br_row_start, br_col_start, m, n);
+ __submatrix_add (C, MM[5], br_row_start, br_col_start, m, n);
+
+ for (uint32_t i = 0; i < 7; i++)
+ {
+ free (AA[i]);
+ free (BB[i]);
+ free (MM[i]);
+ }
+
+ return C;
+ }
+
+ template <typename T>
+ bool
+ strassen_matrix_multiplier<T>::__zeroes (const T *A, size_t n)
+ {
+ size_t N = n * n;
+
+ for (size_t i = 0; i < N; i++)
+ {
+ if (A[i])
+ return false;
+ }
+
+ return true;
+ }
+
+ template <typename T>
+ void
+ strassen_matrix_multiplier<T>::__submatrix_add (T *C, const T *A,
+ size_t a_row_start, size_t a_col_start,
+ size_t b_row_start, size_t b_col_start,
+ size_t m, size_t n)
+ {
+ size_t im;
+ size_t a_row_indx;
+ size_t b_row_indx;
+ const T *a_row;
+ const T *b_row;
+
+ for (size_t i = 0; i < m; i++)
+ {
+ im = i * m;
+ a_row_indx = ((a_row_start + i) * n) + a_col_start;
+ b_row_indx = ((b_row_start + i) * n) + b_col_start;
+
+ a_row = &A[a_row_indx];
+ b_row = &A[b_row_indx];
+
+ for (size_t j = 0; j < m; j++)
+ {
+ C[im + j] = a_row[j] + b_row[j];
+ }
+ }
+ }
+
+ template <typename T>
+ void
+ strassen_matrix_multiplier<T>::__submatrix_add (T *C, const T *A,
+ size_t row_start, size_t col_start,
+ size_t m, size_t n)
+ {
+ size_t im;
+ size_t row_indx;
+ T *row;
+
+ for (size_t i = 0; i < m; i++)
+ {
+ im = i * m;
+ row_indx = ((row_start + i) * n) + col_start;
+ row = &C[row_indx];
+
+ for (size_t j = 0; j < m; j++)
+ {
+ row[j] += A[im + j];
+ }
+ }
+ }
+
+ template <typename T>
+ void
+ strassen_matrix_multiplier<T>::__submatrix_add (T *C, const T *A, const T *B,
+ size_t row_start, size_t col_start,
+ size_t m, size_t n)
+ {
+ size_t im;
+ size_t row_indx;
+ T *row;
+
+ for (size_t i = 0; i < m; i++)
+ {
+ im = i * m;
+ row_indx = ((row_start + i) * n) + col_start;
+ row = &C[row_indx];
+
+ for (size_t j = 0; j < m; j++)
+ {
+ row[j] = A[im + j] + B[im + j];
+ }
+ }
+ }
+
+ template <typename T>
+ void
+ strassen_matrix_multiplier<T>::__submatrix_sub (T *C, const T *A,
+ size_t a_row_start, size_t a_col_start,
+ size_t b_row_start, size_t b_col_start,
+ size_t m, size_t n)
+ {
+ size_t im;
+ size_t a_row_indx;
+ size_t b_row_indx;
+ const T *a_row;
+ const T *b_row;
+
+ for (size_t i = 0; i < m; i++)
+ {
+ im = i * m;
+ a_row_indx = ((a_row_start + i) * n) + a_col_start;
+ b_row_indx = ((b_row_start + i) * n) + b_col_start;
+
+ a_row = &A[a_row_indx];
+ b_row = &A[b_row_indx];
+
+ for (size_t j = 0; j < m; j++)
+ {
+ C[im + j] = a_row[j] - b_row[j];
+ }
+ }
+ }
+
+ template <typename T>
+ void
+ strassen_matrix_multiplier<T>::__submatrix_cpy (T *C, const T *A,
+ size_t row_start, size_t col_start,
+ size_t m, size_t n)
+ {
+ size_t im;
+ size_t row_indx;
+ const T *row;
+
+ for (size_t i = 0; i < m; i++)
+ {
+ im = i * m;
+ row_indx = ((row_start + i) * n) + col_start;
+
+ row = &A[row_indx];
+
+ for (size_t j = 0; j < m; j++)
+ {
+ C[im + j] = row[j];
+ }
+ }
+ }
+
+ template <typename T>
+ void
+ strassen_matrix_multiplier<T>::__submatrix_sub (T *C, const T *A,
+ size_t row_start, size_t col_start,
+ size_t m, size_t n)
+ {
+ size_t im;
+ size_t row_indx;
+ T *row;
+
+ for (size_t i = 0; i < m; i++)
+ {
+ im = i * m;
+ row_indx = ((row_start + i) * n) + col_start;
+ row = &C[row_indx];
+
+ for (size_t j = 0; j < m; j++)
+ {
+ row[j] -= A[im + j];
+ }
+ }
+ }
+
+ template <typename T>
+ void
+ strassen_matrix_multiplier<T>::__submatrix_sub (T *C, const T *A, const T *B,
+ size_t row_start, size_t col_start,
+ size_t m, size_t n)
+ {
+ size_t im;
+ size_t row_indx;
+ T *row;
+
+ for (size_t i = 0; i < m; i++)
+ {
+ im = i * m;
+ row_indx = ((row_start + i) * n) + col_start;
+ row = &C[row_indx];
+
+ for (size_t j = 0; j < m; j++)
+ {
+ row[j] = A[im + j] - B[im + j];
+ }
+ }
+ }
+
+ template <typename T>
+ T*
+ strassen_matrix_multiplier<T>::__pad (const T *m, size_t rows, size_t cols, size_t n)
+ {
+ size_t in;
+ size_t ir;
+ T *M = (T *) malloc (n * n * sizeof (T));
+
+ for (size_t i = 0; i < rows; i++)
+ {
+ in = i * n;
+ ir = i * rows;
+
+ for (size_t j = 0; j < cols; j++)
+ {
+ M[in + j] = m[ir + j];
+ }
+
+ for (size_t j = cols; j < n; j++)
+ {
+ M[in + j] = 0;
+ }
+ }
+
+ for (size_t i = rows; i < n; i++)
+ {
+ in = i * n;
+
+ for (size_t j = 0; j < n; j++)
+ {
+ M[in + j] = 0;
+ }
+ }
+
+ return M;
+ }
+
+
+ template <typename T>
+ T*
+ strassen_matrix_multiplier<T>::__unpad (const T *m, size_t rows, size_t cols, size_t n)
+ {
+ size_t in;
+ size_t ir;
+ T *M = (T *) malloc (rows * cols * sizeof (T));
+
+ for (size_t i = 0; i < rows; i++)
+ {
+ in = i * n;
+ ir = i * rows;
+
+ for (size_t j = 0; j < cols; j++)
+ {
+ M[ir + j] = m[in + j];
+ }
+ }
+
+ return M;
+ }
+}
+
+#endif /* STRASSEN_MATRIX_MULTIPLIER_HPP_ */
View
134 src/strassen/transpose_matrix_multiplier.hpp
@@ -0,0 +1,134 @@
+/**
+ *Copyright (C) 2011 by Michael Cvet <http://mikecvet.wordpress.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ **/
+
+#ifndef TRANSPOSE_MATRIX_MULTIPLIER_HPP_
+#define TRANSPOSE_MATRIX_MULTIPLIER_HPP_
+
+#include "matrix_multiplier.hpp"
+
+namespace strassen
+{
+ template <typename T>
+ class transpose_matrix_multiplier : public strassen::matrix_multiplier<T>
+ {
+ public:
+ transpose_matrix_multiplier ();
+ ~transpose_matrix_multiplier ();
+
+ T* mult (const T *a, const T *b, size_t arows, size_t acols, size_t brows, size_t bcols);
+ T* transpose (const T *A, size_t rows, size_t cols);
+ matrix_multiplier<T>* copy () const;
+ };
+
+ template <typename T>
+ transpose_matrix_multiplier<T>::transpose_matrix_multiplier ()
+ {
+ }
+
+ template <typename T>
+ transpose_matrix_multiplier<T>::~transpose_matrix_multiplier ()
+ {
+ }
+
+ template <typename T>
+ matrix_multiplier<T>*
+ transpose_matrix_multiplier<T>::copy () const
+ {
+ return (new transpose_matrix_multiplier<T> ());
+ }
+
+ template <typename T>
+ T*
+ transpose_matrix_multiplier<T>::mult (const T *A, const T *b,
+ size_t arows, size_t acols,
+ size_t brows, size_t bcols)
+ {
+ if (arows == bcols)
+ {
+ T t;
+ size_t m = arows;
+ size_t n = acols;
+ size_t im;
+ size_t tmp = brows;
+ brows = bcols;
+ bcols = tmp;
+
+ T *B = transpose (b, brows, bcols);
+ T *C = (T *) malloc (m * m * sizeof (T));
+ const T *a_row = NULL;
+ T *b_row = NULL;
+
+ for (size_t i = 0; i < m; i++)
+ {
+ im = i * m;
+ a_row = &A[i * acols];
+
+ for (size_t j = 0; j < m; j++)
+ {
+ t = 0;
+ b_row = &B[j * bcols];
+
+ for (size_t k = 0; k < n; k++)
+ {
+ t += (a_row[k] * b_row[k]);
+ }
+
+ C[im++] = t;
+ }
+ }
+
+ free (B);
+ return C;
+ }
+ else
+ arows = 0;
+
+ return NULL;
+ }
+
+ template <typename T>
+ T*
+ transpose_matrix_multiplier<T>::transpose (const T *A, size_t rows, size_t cols)
+ {
+ if (A)
+ {
+ T *row = NULL;
+ T *m = (T *) malloc (rows * cols * sizeof (T));
+
+ for (size_t i = 0; i < rows; i++)
+ {
+ row = &m[i * cols];
+
+ for (size_t j = 0; j < cols; j++)
+ {
+ row[j] = A[j * rows + i];
+ }
+ }
+
+ return m;
+ }
+
+ return NULL;
+ }
+}
+
+#endif /* TRANSPOSE_MATRIX_MULTIPLIER_HPP_ */
View
467 src/test/test_strassen_matrix.cpp
@@ -0,0 +1,467 @@
+/**
+ *Copyright (C) 2011 by Michael Cvet <http://mikecvet.wordpress.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ **/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <iostream>
+#include <fstream>
+
+#include "../util/printer.hpp"
+#include "../util/timer.hpp"
+
+#include "../strassen/matrix.hpp"
+#include "../strassen/naive_matrix_multiplier.hpp"
+#include "../strassen/transpose_matrix_multiplier.hpp"
+#include "../strassen/strassen_matrix_multiplier.hpp"
+#include "../strassen/parallel_strassen_matrix_multiplier.hpp"
+
+void
+simple ()
+{
+ strassen::matrix<int> m (8, 8);
+ m.random (10);
+
+ strassen::matrix<int> n (8, 8);
+ n.random (10);
+
+ strassen::matrix<int> o (8, 8);
+ o = m;
+ o.mult (n);
+ m.add (o);
+ m.mult (7);
+
+ strassen::matrix<int> p = o;
+
+ o.add (m);
+ p.add (m);
+
+ o.mult (n);
+ p.mult (n);
+
+ if (o == p)
+ printf ("matrices equal!\n");
+ else
+ {
+ std::string os = alg_tostring (o);
+ std::string ps = alg_tostring (p);
+
+ printf ("%s\n\n%s\n\n", os.c_str(), ps.c_str());
+ }
+}
+
+void
+test_matrix_multipliers ()
+{
+ size_t s = 129;
+
+ strassen::matrix<int> m (s, s);
+ strassen::matrix<int> n (s, s);
+ strassen::matrix<int> m_nmm (s, s, new strassen::naive_matrix_multiplier<int> ());
+ strassen::matrix<int> m_tmm (s, s, new strassen::transpose_matrix_multiplier<int> ());
+ strassen::matrix<int> m_smm (s, s, new strassen::strassen_matrix_multiplier<int> ());
+ strassen::matrix<int> m_psmm (s, s, new strassen::parallel_strassen_matrix_multiplier<int> ());
+
+ m.random (197);
+ n.random (213);
+
+ m_nmm = m;
+ m_tmm = m;
+ m_smm = m;
+ m_psmm = m;
+
+ if (!(m_nmm == m || m_tmm == m || m_smm == m || m_psmm == m))
+ {
+ fprintf (stderr, "time_matrix_multipliers: matrix initialization failure\n");
+ }
+
+ m.mult (n);
+
+ m_nmm.mult (n);
+ m_tmm.mult (n);
+ m_smm.mult (n);
+ m_psmm.mult (n);
+
+ if (!(m_nmm == m))
+ {
+ fprintf (stderr, "test_matrix_multipliers: m_nmm matrix multiplication failure\n");
+ }
+ else
+ {
+ fprintf (stderr, "test_matrix_multipliers: naive multiplier success\n");
+ }
+
+ if (!(m_tmm == m))
+ {
+ fprintf (stderr, "test_matrix_multipliers: m_tmm matrix multiplication failure\n");
+ }
+ else
+ {
+ fprintf (stderr, "test_matrix_multipliers: transpose multiplier success\n");
+ }
+
+ if (!(m_smm == m))
+ {
+ fprintf (stderr, "test_matrix_multipliers: m_smm matrix multiplication failure\n");
+
+ std::string s = alg_tostring (m);
+ std::string smm = alg_tostring (m_smm);
+
+ std::cout << s << std::endl;
+ std::cout << "====================" << std::endl;
+ std::cout << smm << std::endl;
+ }
+ else
+ {
+ fprintf (stderr, "test_matrix_multipliers: strassen multiplier success\n");
+ }
+
+ if (!(m_psmm == m))
+ {
+ fprintf (stderr, "test_matrix_multipliers: m_psmm matrix multiplication failure\n");
+
+ std::string s = alg_tostring (m);
+ std::string smm = alg_tostring (m_smm);
+
+ std::cout << s << std::endl;
+ std::cout << "====================" << std::endl;
+ std::cout << smm << std::endl;
+ }
+ else
+ {
+ fprintf (stderr, "test_matrix_multipliers: parallel strassen multiplier success\n");
+ }
+}
+
+void
+time_matrix_multipliers (size_t sz)
+{
+ strassen::timer t;
+ strassen::matrix<int> m (sz, sz);
+ strassen::matrix<int> n (sz, sz);
+ strassen::matrix<int> m_nmm (sz, sz, new strassen::naive_matrix_multiplier<int> ());
+ strassen::matrix<int> m_tmm (sz, sz, new strassen::transpose_matrix_multiplier<int> ());
+ strassen::matrix<int> m_smm (sz, sz, new strassen::strassen_matrix_multiplier<int> ());
+ strassen::matrix<int> m_psmm (sz, sz, new strassen::parallel_strassen_matrix_multiplier<int> ());
+
+ m.random (103);
+ n.random (103);
+
+ m_nmm = m;
+ m_tmm = m;
+ m_smm = m;
+ m_psmm = m;
+
+ if (!(m_nmm == m || m_tmm == m || m_smm == m || m_psmm == m))
+ {
+ fprintf (stderr, "test_matrix_multipliers: matrix initialization failure\n");
+ }
+
+ printf ("MM array size: %lu x %lu\n", sz, sz);
+
+ m.mult (n);
+
+ t.start ();
+ m_nmm.mult (n);
+ t.stop ();
+
+ printf ("MM time [naive]: %lu.%0lu\n", t.secs(), t.usecs());
+
+ t.start ();
+ m_tmm.mult (n);
+ t.stop ();
+
+ printf ("MM time [transpose]: %lu.%0lu\n", t.secs(), t.usecs());
+
+ t.start ();
+ m_smm.mult (n);
+ t.stop ();
+
+ printf ("MM time [strassen]: %lu.%0lu\n", t.secs(), t.usecs());
+
+ t.start ();
+ m_psmm.mult (n);
+ t.stop ();
+
+ printf ("MM time [parallel strassen]: %lu.%0lu\n", t.secs(), t.usecs());
+
+ if (!(m_nmm == m))
+ {
+ fprintf (stderr, "test_matrix_multipliers: m_nmm matrix multiplication failure\n");
+ }
+ else
+ {
+ fprintf (stderr, "test_matrix_multipliers: naive multiplier success\n");
+ }
+
+ if (!(m_tmm == m))
+ {
+ fprintf (stderr, "test_matrix_multipliers: m_tmm matrix multiplication failure\n");
+ }
+ else
+ {
+ fprintf (stderr, "test_matrix_multipliers: transpose multiplier success\n");
+ }
+
+ if (!(m_smm == m))
+ {
+ fprintf (stderr, "test_matrix_multipliers: m_smm matrix multiplication failure\n");
+ }
+ else
+ {
+ fprintf (stderr, "test_matrix_multipliers: strassen multiplier success\n");
+ }
+
+ if (!(m_psmm == m))
+ {
+ fprintf (stderr, "test_matrix_multipliers: m_psmm matrix multiplication failure\n");
+ }
+ else
+ {
+ fprintf (stderr, "test_matrix_multipliers: parallel strassen multiplier success\n");
+ }
+}
+
+void
+time_full ()
+{
+ bool b = false;
+ uint32_t trials = 16;
+ double tmp;
+ double avgsum = 0.0;
+ strassen::timer t;
+
+ strassen::matrix<int> m (1024, 1024);
+ strassen::matrix<int> n (1024, 1024);
+
+ for (uint32_t i = 0; i < trials; i++)
+ {
+ m.random (177);
+ n.random (273);
+
+ t.start ();
+
+ m.mult (n);
+ m.add (n);
+
+ strassen::matrix<int> o (m);
+ b = (m == o);
+
+ strassen::matrix<int> p (512, 1024);
+ p.random ();
+
+ m.mult (p);
+ m.sub (n);
+ n.mult (m);
+ m.mult (n);
+
+ t.stop ();
+
+ printf ("full test time [%u]: %lu.%0lu\n", i, t.secs(), t.usecs());
+
+ tmp = (double) t.usecs ();
+ tmp = tmp / 1000000;
+ tmp += (double) t.secs ();
+ avgsum += tmp;
+
+ if (!b)
+ fprintf (stderr, "matrices inequal!\n");
+ }
+
+ printf ("average time over %u trials: %lfs\n", trials, (avgsum / trials));
+}
+
+void
+mult_test ()
+{
+ strassen::matrix<int> m (800, 800);
+ strassen::matrix<int> n (800, 800);
+ strassen::matrix<int> o (800, 800);
+
+ m.random (231);
+ n.random (673);
+
+ o = m;
+
+ m.mult (n);
+ o.mult (n);
+
+ if (!(m == o))
+ {
+ fprintf (stderr, "matrix mult match failure\n");
+
+ std::string ms = alg_tostring (m);
+ std::string os = alg_tostring (o);
+
+ printf ("%s\n\n%s\n\n", ms.c_str(), os.c_str());
+ }
+}
+
+void
+big_test (size_t start)
+{
+ std::ofstream out;
+
+ out.open ("./matrix_mult.out", std::ios::out | std::ios::app);
+
+ if (out.fail ())
+ {
+ fprintf (stderr, "error opening output file\n");
+ perror ("open");
+ exit (1);
+ }
+
+ strassen::timer t;
+
+ size_t reps = 4;
+
+ size_t s = start;
+ time_t secs[5];
+ time_t usecs[5];
+ char buf[128];
+
+ while (s < 5000)
+ {
+ strassen::matrix<int> m (s, s);
+ strassen::matrix<int> n (s, s);
+ strassen::matrix<int> m_nmm (s, s, new strassen::naive_matrix_multiplier<int> ());
+ strassen::matrix<int> m_tmm (s, s, new strassen::transpose_matrix_multiplier<int> ());
+ strassen::matrix<int> m_smm (s, s, new strassen::strassen_matrix_multiplier<int> ());
+ strassen::matrix<int> m_psmm (s, s, new strassen::parallel_strassen_matrix_multiplier<int> ());
+
+ m.random (197);
+ n.random (213);
+
+ m_nmm = m;
+ m_tmm = m;
+ m_smm = m;
+ m_psmm = m;
+
+ printf ("\nMM array size: %lu x %lu\n", s, s);
+
+ secs[0] = 0;
+ usecs[0] = 0;
+ /*
+ for (uint32_t i = 0; i < reps; i++)
+ {
+ t.start ();
+ m_nmm.mult (n);
+ t.stop ();
+
+ secs[0] += t.secs ();
+ usecs[0] += t.usecs ();
+ }
+
+ secs[0] = secs[0] / reps;
+ usecs[0] = usecs[0] / reps;
+ m_nmm.clear ();
+ */
+ printf ("MM time [naive]: %lu.%lu\n", secs[0], usecs[0]);
+
+ secs[1] = 0;
+ usecs[1] = 0;
+
+ for (uint32_t i = 0; i < reps; i++)
+ {
+ t.start ();
+ m_tmm.mult (n);
+ t.stop ();
+
+ secs[1] += t.secs ();
+ usecs[1] += t.usecs ();
+ }
+
+ secs[1] = (time_t)(secs[1] / reps);
+ usecs[1] = (time_t)(usecs[1] / reps);
+ m_tmm.clear ();
+
+ printf ("MM time [transpose]: %lu.%lu\n", secs[1], usecs[1]);
+
+ secs[3] = 0;
+ usecs[3] = 0;
+
+ for (uint32_t i = 0; i < reps; i++)
+ {
+ t.start ();
+ m_smm.mult (n);
+ t.stop ();
+
+ secs[3] += t.secs ();
+ usecs[3] += t.usecs ();
+ }
+
+ secs[3] = secs[3] / reps;
+ usecs[3] = usecs[3] / reps;
+ m_smm.clear ();
+
+ printf ("MM time [strassen]: %lu.%lu\n", secs[3], usecs[3]);
+
+ secs[4] = 0;
+ usecs[4] = 0;
+
+ for (uint32_t i = 0; i < reps; i++)
+ {
+ t.start ();
+ m_psmm.mult (n);
+ t.stop ();
+
+ secs[4] += t.secs ();
+ usecs[4] += t.usecs ();
+ }
+
+ secs[4] = secs[4] / reps;
+ usecs[4] = usecs[4] / reps;
+ m_psmm.clear ();
+
+ printf ("MM time [parallel strassen]: %lu.%lu\n", secs[4], usecs[4]);
+
+ snprintf (buf, 128, "%lu %lu.%lu %lu.%lu %lu.%lu %lu.%lu %lu.%lu\n",
+ s,
+ secs[0], usecs[0],
+ secs[1], usecs[1],
+ secs[2], usecs[2],
+ secs[3], usecs[3],
+ secs[4], usecs[4]);
+
+ out.write (buf, strlen (buf));
+ out.flush ();
+
+ s += 137;
+ sleep (1);
+ }
+}
+
+int
+main ()
+{
+ srand (time (NULL));
+
+ simple ();
+ test_matrix_multipliers ();
+ time_full ();
+ mult_test ();
+
+ //13
+ big_test (40);
+
+ return 0;
+}
View
93 src/util/printer.hpp
@@ -0,0 +1,93 @@
+/**
+ *Copyright (C) 2011 by Michael Cvet <http://mikecvet.wordpress.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ **/
+
+#ifndef PRINTER_HPP_
+#define PRINTER_HPP_
+
+#include <string>
+#include <sstream>
+
+#include "../strassen/matrix.hpp"
+
+namespace strassen
+{
+ // template <typename T>
+ std::string
+ alg_tostring (strassen::matrix<int> &m)
+ {
+ size_t i = 0;
+ size_t rows = m.rows ();
+ size_t cols = m.cols ();
+ std::stringstream ss;
+ matrix<int>::iterator iter (m);
+
+ ss << "| ";
+
+ while (iter.ok ())
+ {
+ ss << iter.val () << " ";
+
+ if (++i == cols && (--rows))
+ {
+ i = 0;
+ ss << "|\n| ";
+ }
+
+ ++iter;
+ }
+
+ ss << "|\n";
+
+ return ss.str ();
+ }
+
+ std::string
+ alg_tostring (strassen::matrix<uint32_t> &m)
+ {
+ size_t i = 0;
+ size_t rows = m.rows ();
+ size_t cols = m.cols ();
+ std::stringstream ss;
+ matrix<uint32_t>::iterator iter (m);
+
+ ss << "| ";
+
+ while (iter.ok ())
+ {
+ ss << iter.val () << " ";
+
+ if (++i == cols && (--rows))
+ {
+ i = 0;
+ ss << "|\n| ";
+ }
+
+ ++iter;
+ }
+
+ ss << "|\n";
+
+ return ss.str ();
+ }
+}
+
+#endif /* PRINTER_HPP_ */
View
64 src/util/timer.cpp
@@ -0,0 +1,64 @@
+/**
+ *Copyright (C) 2011 by Michael Cvet <http://mikecvet.wordpress.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ **/
+
+#include "timer.hpp"
+
+using namespace strassen;
+
+timer::timer ()
+ : _secs (0),
+ _usecs (0)
+{
+}
+
+timer::~timer ()
+{
+}
+
+void
+timer::start ()
+{
+ gettimeofday (&_start, NULL);
+}
+
+void
+timer::stop ()
+{
+ gettimeofday (&_stop, NULL);
+
+ timersub (&_stop, &_start, &_diff);
+
+ _secs = _diff.tv_sec;
+ _usecs = _diff.tv_usec;
+}
+
+time_t
+timer::secs ()
+{
+ return _secs;
+}
+
+time_t
+timer::usecs ()
+{
+ return _usecs;
+}
View
53 src/util/timer.hpp
@@ -0,0 +1,53 @@
+/**
+ *Copyright (C) 2011 by Michael Cvet <http://mikecvet.wordpress.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ **/
+
+
+#ifndef TIMER_HPP_
+#define TIMER_HPP_
+
+#include <time.h>
+#include <sys/time.h>
+
+namespace strassen
+{
+ class timer
+ {
+ private:
+ time_t _secs;
+ time_t _usecs;
+
+ struct timeval _start;
+ struct timeval _stop;
+ struct timeval _diff;
+
+ public:
+ timer ();
+ ~timer ();
+
+ void start ();
+ void stop ();
+ time_t secs ();
+ time_t usecs ();
+ };
+}
+
+#endif /* TIMER_HPP_ */
Please sign in to comment.
Something went wrong with that request. Please try again.