In [6]:
%%writefile funcs.hpp

int add(int i, int j);

Overwriting funcs.hpp


In [7]:
%%writefile funcs.cpp

int add(int i, int j) {
    return i + j;
};

Overwriting funcs.cpp


In [8]:
%%writefile wrap1.cpp
#include <pybind11/pybind11.h>
#include "funcs.hpp"

namespace py = pybind11;
using namespace pybind11::literals;

PYBIND11_MODULE(example, m) {
    m.doc() = "pybind11 example plugin"; // optional module docstring

    m.def("add", &add, "A function which adds two numbers",
          "i"_a, "j"_a=2);
}

Overwriting wrap1.cpp


In [35]:
%%bash
c++ -O3 -Wall -shared -std=c++11 -fPIC `python3 -m pybind11 --includes` wrap1.cpp -o wrap1`python3-config --extension-suffix`

In [36]:
import wrap1

In [37]:
wrap1.add(i=2)

4

In [33]:
help(wrap1)

Help on module wrap1:

NAME
    wrap1 - pybind11 example plugin

FUNCTIONS
    add(...) method of builtins.PyCapsule instance
        add(i: int=1, j: int=2) -> int
        
        A function which adds two numbers

FILE
    /home/olszewskip/Desktop/git-repos/MDFS_playground/python/scheduler/example1/wrap1.cpython-35m-x86_64-linux-gnu.so




---
07.02

In [10]:
%%writefile pi.cpp
/*
<%
setup_pybind11(cfg)
%>
*/
#include <pybind11/pybind11.h>

namespace py = pybind11;

double compute_pi(int n) {
    
    double step = 1.0/n;
    double pi = 0;

    double x;
    for(int i = 0 ; i < n ; i++){
        x = (i + 0.5) * step;
        pi += 4.0 / (1 + x*x);
    }
    
    pi *= step;
    return pi;
}


PYBIND11_MODULE(pi, m) {
    m.def("compute_pi", &compute_pi);
}

Overwriting pi.cpp


In [11]:
import cppimport

In [12]:
pi = cppimport.imp("pi")

In [13]:
%timeit pi.compute_pi(1000000)

4.37 ms ± 60.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [5]:
%%writefile openmp_pi.cpp
/*
<%
setup_pybind11(cfg)
cfg['compiler_args'] = ['-fopenmp']
cfg['linker_args'] = ['-fopenmp']
%>
*/
#include <pybind11/pybind11.h>
#include <omp.h>

namespace py = pybind11;

double compute_pi(int n) {
    
    double step = 1.0/n;
    double pi = 0;
    
    omp_set_num_threads(2);
    #pragma omp parallel
    {
        double x;
        #pragma omp for reduction(+:pi)
        for(int i = 0 ; i < n ; i++){
            x = (i + 0.5) * step;
            pi += 4.0 / (1 + x*x);
        }
    }
    
    pi *= step;
    return pi;
}


PYBIND11_MODULE(openmp_pi, m) {
    m.def("compute_pi", &compute_pi);
}

Overwriting openmp_pi.cpp


In [6]:
# m.def("compute_pi", &compute_pi, py::call_guard<py::gil_scoped_release>() )?

In [1]:
import cppimport

In [2]:
# cppimport.force_rebuild()
openmp_pi = cppimport.imp("openmp_pi")

In [3]:
openmp_pi.compute_pi(10)

3.1424259850010987

In [4]:
%timeit openmp_pi.compute_pi(1000000)

2.35 ms ± 3.95 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [21]:
%%writefile takietam.cpp
/*
<%
setup_pybind11(cfg)
%>
*/
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
namespace py = pybind11;

void sayhi() {
    py::print("hi!");
}
        
PYBIND11_MODULE(takietam, module) {
    module.def("sayhi", &sayhi);
}

Overwriting takietam.cpp


In [1]:
import cppimport

In [2]:
cppimport.set_quiet(True)

In [3]:
takietam = cppimport.imp("takietam")

In [4]:
takietam.sayhi()

hi!


In [1]:
%%writefile np1.cpp
/*
<%
setup_pybind11(cfg)
%>
*/
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
namespace py = pybind11;

void print_info(py::array_t<int> input1) {
    py::buffer_info buf1 = input1.request();
    py::print("ptr:", buf1.ptr);
    py::print("itemsize:", buf1.itemsize);
    py::print("format:", buf1.format);
    py::print("ndim:", buf1.ndim);
    for (int i = 0; i < buf1.shape.size(); i++) {
      py::print(i, "shape:", buf1.shape[i]);
    }
    for (int i = 0; i < buf1.strides.size(); i++) {
      py::print(i, "stride:", buf1.strides[i]);
    }
    
    int *ptr = (int *) buf1.ptr;
    int element_count = 1;
    for (auto r: buf1.shape) {
      element_count *= r;
    }
    for (int i = 0; i < element_count; i++) {
        py::print(i, "element:", *ptr++);
    }

}
        
PYBIND11_MODULE(np1, module) {
    module.def("print_info", &print_info);
}

Overwriting np1.cpp


In [2]:
import cppimport
np1 = cppimport.imp("np1")
import numpy as np

In [4]:
np1.print_info([[11,12,13],[14,15,16]])

ptr: <capsule object NULL at 0x7fc2a80dafc0>
itemsize: 4
format: i
ndim: 2
0 shape: 2
1 shape: 3
0 stride: 12
1 stride: 4
0 element: 11
1 element: 12
2 element: 13
3 element: 14
4 element: 15
5 element: 16


In [9]:
%%writefile np2.cpp
/*
<%
setup_pybind11(cfg)
%>
*/
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
namespace py = pybind11;

void double_elems(py::array_t<int> input) {
    
    py::buffer_info buf = input.request();
    int *ptr = (int *) buf.ptr;
    
    int element_count = 1;
    for (auto r: buf.shape) {
      element_count *= r;
    }
    for (int i = 0; i < element_count; i++) {
        *ptr++ *= 2;
    }
}
        
PYBIND11_MODULE(np2, module) {
    module.def("double_elems", &double_elems);
}

Overwriting np2.cpp


In [1]:
import cppimport
np2 = cppimport.imp("np2")
import numpy as np

In [2]:
my_array = np.array([[11,12,13],[14,15,16]], dtype='int32') # dtype!!!
np2.double_elems(my_array)

In [3]:
my_array

array([[22, 24, 26],
       [28, 30, 32]], dtype=int32)

In [4]:
%%writefile np3.cpp
/*
<%
setup_pybind11(cfg)
cfg['compiler_args'] = ['-fopenmp']
cfg['linker_args'] = ['-fopenmp']
%>
*/
#include <pybind11/pybind11.h>
// #include <pybind11/numpy.h>
#include <omp.h>

#define NUM_THREADS 2
namespace py = pybind11;

double compute_pi(int n) {
    
    double step = 1.0 / n;
    double pi_parts[NUM_THREADS];
    omp_set_num_threads(NUM_THREADS);
    
    int nthreads;
    
    #pragma omp parallel
    {
        int id = omp_get_thread_num();
        pi_parts[id] = 0.0;
        int nthrds = omp_get_num_threads();
        if (id == 0) nthreads = nthrds;
        double x;
        for (int i=id; i<n; i=i+nthrds) {
            x = (i + 0.5) * step;
            pi_parts[id] += 4.0 / (1.0 + x*x);
        }
    }
    
    double pi = 0.0;
    for (int i=0; i<nthreads; i++){
        pi += pi_parts[i] * step;
    }
    return pi;
}


PYBIND11_MODULE(np3, m) {
    m.def("compute_pi", &compute_pi);
}

Overwriting np3.cpp


In [5]:
import cppimport
np3 = cppimport.imp("np3")
import numpy as np

In [6]:
np3.compute_pi(10000)

3.141592654423132

In [6]:
%%writefile np3.cpp
/*
<%
setup_pybind11(cfg)
cfg['compiler_args'] = ['-fopenmp']
cfg['linker_args'] = ['-fopenmp']
%>
*/
#include <omp.h>
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>

#define NUM_THREADS 2
namespace py = pybind11;

void double_elems(py::array_t<int> input){
     
    py::buffer_info buf = input.request();
    int *ptr = (int *) buf.ptr;
    
    int element_count = 1;
    for (auto r: buf.shape) {
      element_count *= r;
    }
    
    omp_set_num_threads(NUM_THREADS);
    #pragma omp parallel
    {
        int id = omp_get_thread_num();
        int nthrds = omp_get_num_threads();
        for (int i = id; i < element_count; i += nthrds) {
            ptr[i] *= 2;
        }
    }
     
}
        
PYBIND11_MODULE(np3, module) {
    module.def("double_elems", &double_elems);
}

Overwriting np3.cpp


In [1]:
import cppimport
np3 = cppimport.imp("np3")
import numpy as np

In [3]:
my_array = np.array(range(100), dtype = 'int32')
np3.double_elems(my_array)

In [4]:
my_array

array([  0,   2,   4,   6,   8,  10,  12,  14,  16,  18,  20,  22,  24,
        26,  28,  30,  32,  34,  36,  38,  40,  42,  44,  46,  48,  50,
        52,  54,  56,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
        78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102,
       104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128,
       130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154,
       156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180,
       182, 184, 186, 188, 190, 192, 194, 196, 198], dtype=int32)

In [8]:
%%writefile np4.cpp
/*
<%
setup_pybind11(cfg)
cfg['compiler_args'] = ['-fopenmp']
cfg['linker_args'] = ['-fopenmp']
%>
*/
#include <omp.h>
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>

#define NUM_THREADS 2
namespace py = pybind11;

void double_elems(py::array_t<int> input){
     
    py::buffer_info buf = input.request();
    int *ptr = (int *) buf.ptr;
    
    int element_count = 1;
    for (auto r: buf.shape) {
      element_count *= r;
    }
    
    omp_set_num_threads(NUM_THREADS);
    #pragma omp parallel for schedule(static, 1)
    for (int i = 0; i < element_count; i++) {
        ptr[i] *= 2;
    }
     
}
        
PYBIND11_MODULE(np4, module) {
    module.def("double_elems", &double_elems);
}

Overwriting np4.cpp


In [9]:
import cppimport
np4 = cppimport.imp("np4")
import numpy as np

In [10]:
my_array = np.array(range(10), dtype = 'int32')
np3.double_elems(my_array)
my_array

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18], dtype=int32)

In [None]:
%%writefile np5.cpp
/*
<%
setup_pybind11(cfg)
cfg['compiler_args'] = ['-fopenmp']
cfg['linker_args'] = ['-fopenmp']
%>
*/
#include <cmath>
#include <omp.h>
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>

#define NUM_THREADS 2
namespace py = pybind11;

int contingency(const int divisions, const int N, py::array_t<int> input1, py::array_t<int> input2){
    
    double l = std::log(3);
    py::print(l);
    
    int contingency_m[divisions][divisions] = {};
    
    for (int i = 0; i < divisions; i++)
        for (int j = 0; j < divisions; j++)
            py::print(contingency_m[i][j]);

    
    py::buffer_info buf1 = input1.request();
    py::buffer_info buf2 = input2.request();
    int *ptr1 = (int *) buf1.ptr;
    int *ptr2 = (int *) buf2.ptr;
    
    
    omp_set_num_threads(NUM_THREADS);
    
    int prod = 0;
    
    #pragma omp parallel for reduction(+:prod)
    for (int i = 0; i < N; i++) {
        prod += ptr1[i] * ptr2[i];
    }
    
    return prod;
}
        
PYBIND11_MODULE(np5, module) {
    module.def("contingency", &contingency);
}

In [1]:
%%writefile np6.cpp
/*
<%
setup_pybind11(cfg)
cfg['compiler_args'] = ['-fopenmp']
cfg['linker_args'] = ['-fopenmp']
%>
*/
#include <omp.h>
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>

#define NUM_THREADS 2
namespace py = pybind11;

int scalar_prod(py::array_t<int> input1, py::array_t<int> input2){
    
    py::buffer_info buf1 = input1.request();
    py::buffer_info buf2 = input2.request();
    int *ptr1 = (int *) buf1.ptr;
    int *ptr2 = (int *) buf2.ptr;
    
    int element_count = 1;
    for (auto r: buf1.shape) {
      element_count *= r;
    }
    
    omp_set_num_threads(NUM_THREADS);
    
    int prod = 0;
    
    #pragma omp parallel
    {
        #pragma omp for reduction(+:prod)
        for (int i = 0; i < element_count; i++) {
            prod += ptr1[i] * ptr2[i];
        }
    }
    
    return prod;
}
        
PYBIND11_MODULE(np6, module) {
    module.def("scalar_prod", &scalar_prod);
}

Overwriting np6.cpp


In [2]:
import cppimport
np6 = cppimport.imp("np6")
import numpy as np

In [3]:
N = 1000
my_array1 = np.ones(N, dtype='int32') * 7
my_array2 = np.ones(N, dtype='int32') * 5

In [10]:
%%timeit
np.sum(my_array1 * my_array2)

5.51 µs ± 85.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [11]:
%%timeit
np6.scalar_prod(my_array1, my_array2) # NUM_THREADS 1

2.78 µs ± 117 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [5]:
%%timeit
np6.scalar_prod(my_array1, my_array2) # NUM_THREADS 2

3.17 µs ± 101 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [None]:
%%writefile mpi4py_openmp.py
import numpy as np
np.random.seed(123)

from mpi4py import MPI
comm = MPI.COMM_WORLD
comm.Barrier()
time0 = MPI.Wtime()
size = comm.Get_size()
rank = comm.Get_rank()