# Pointers
One of the biggest hurdles when transitioning from Python to Cython is the use of pointers in C code, which is necessary to write clean Cython programs.


In C, every variable has a memory location and every memory location has a memory address that can be accessed using the `&` symbol.

In [1]:
# enable Cython in the IPython Noteboook
%load_ext Cython

In [2]:
%%cython
# from libc.stdio cimport printf

# this works, but unfortunately prints to the command line and not the notebook
# https://github.com/ipython/ipython/issues/1230
# cdef int variable_1 = 10
# cdef int variable_2 = 20
# printf("Address of variable_1: %x\n", &variable_1);
# printf("Address of variable_2: %x\n", &variable_2);

# this throws an error, is there any way I can fix this?
cdef int variable_1 = 10
cdef int variable_2 = 20
print hex(<unsigned>(&variable_1))
print hex(<unsigned>(&variable_2))

0x3bfd2e0
0x3bfd2e4


## So what exactly are pointers?
A pointer is a variable whose value is the memory address of another variable. Changing the pointer changes the value at the memory address.

To declare a pointer, use the following syntax:

In [3]:
%%cython
cdef int* integer_pointer # declares a pointer to an integer
cdef double* double_pointer # declares a pointer to a double
cdef float* float_pointer # declares a pointer to a float
cdef char* char_pointer # declares a pointer to a char

While these pointers all point to memory addresses, the variable or constant at the memory address the pointer is pointing to are all of different types.

## How do I use pointers?
Pointers are generally used to modify the value of a variable or constant that it points to. In C, this is done through use of the `*` operator, which returns the value of the variable or constant specified by the pointer type. However, the `*` operator already has a meaning in Python -- unpacking tuples. As a result, assigning and accessing pointers uses an index notation similar to lists.

In [4]:
%%cython
# define a variable to modify
cdef int to_increment = 0
# define an int pointer with the * operator
cdef int* incrementor

# store the address of the variable in the pointer
incrementor = &to_increment

# access the value at the pointer by using indexing notation and 
# increment the value the pointer points at.
incrementor[0] += 1
print "Value of variable set to pointer: {}".format(incrementor[0])
print "Value of original variable: {}".format(to_increment)

# increment the original variable
to_increment += 1
print "Value of variable set to pointer: {}".format(incrementor[0])
print "Value of original variable: {}".format(to_increment)

Value of variable set to pointer: 1
Value of original variable: 1
Value of variable set to pointer: 2
Value of original variable: 2


As you can see, modifying the value of what the pointer was pointing at (by adding one) changed the value of the original variable, and changes to the original variable propogated to affect the value that the pointer was pointing at.

## Some more pointer examples

In [5]:
%%cython
# define an integer
cdef int some_value
cdef int another_value

# define an integer pointer and point it at some_value
cdef int* int_pointer = &some_value
# sets the value of what the pointer is pointing to to 10.
# some_value should now have the value 10
int_pointer[0] = 10
print "Value pointer points at: {}".format(int_pointer[0])
print "Value of some_value: {}".format(some_value)

# you can also assign the pointer to a different variable
int_pointer = &another_value
print "Value pointer points at: {}".format(int_pointer[0])
print "Value of another_value: {}".format(another_value)

# lastly, you can set it to NULL if you wish
int_pointer = NULL

Value pointer points at: 10
Value of some_value: 10
Value pointer points at: 0
Value of another_value: 0


## Quicksorting a C array in Cython
The following example runs quicksort in-place (thanks to pointers) on a C-level array in Cython.

In [6]:
%%cython
from libc.stdlib cimport rand, RAND_MAX
from libc.stdlib cimport calloc

cdef void sort(double* to_sort, size_t low, size_t high) nogil:
    """Sort a c-level array with quicksort"""
    cdef size_t pivot, i, j
    cdef double temp
    if low < high:
        pivot = low
        i = low
        j = high
        while i < j:
            while(to_sort[i] <= to_sort[pivot] and i <= high):
                i += 1
            while(to_sort[j] > to_sort[pivot] and j  >= low):
                j -= 1
            if i < j:
                temp = to_sort[i]
                to_sort[i] = to_sort[j]
                to_sort[j] = temp

        temp = to_sort[j]
        to_sort[j] = to_sort[pivot]
        to_sort[pivot] = temp

        sort(to_sort, low, j-1)
        sort(to_sort, j+1, high)

# I put the main code in a separate setup function 
# with nogil to ensure that all operations are c-level
cdef void run_sort_example() nogil:
    # create a C-level array of size 10
    cdef size_t array_size = 6
    cdef double* array_to_sort = <double*> calloc(array_size, sizeof(double))

    # populate the array with random doubles
    with gil:
        print "Values in unsorted array:"
    for i in range(array_size):
        array_to_sort[i] = rand() / (RAND_MAX + 1.);
        with gil:
            print array_to_sort[i]
    sort(array_to_sort, 0, array_size-1)
    # print the sorted array
    with gil:
        print "Values in array post quicksort:"
    for i in range(array_size):
        with gil:
            print array_to_sort[i]

run_sort_example()


Values in unsorted array:
0.67929640552
0.934692895506
0.383502077311
0.519416371826
0.830965345725
0.0345721105114
Values in array post quicksort:
0.0345721105114
0.383502077311
0.519416371826
0.67929640552
0.830965345725
0.934692895506


# Using NumPy arrays with Cython
NumPy arrays are essentially wrappers around a regular C array. This means that we can easily pass a NumPy array from the Python level to the C level and manipulate a NumPy array in Cython without any data-copying -- this provides all the benefits of C-level operations quite easily.

To use a NumPy array in Cython, you can use the `.data` attribute to extract the underlying C pointer.
```
cdef dtype* X_c = <dtype*> X_ndarray.data
```

`dtype` refers to the data type of the NumPy array. For example, to convert a Python double array to a C pointer:
```
cdef double* X_c = <double*> X_ndarray.data
```

Lastly, before extracting the C pointer, the NumPy array must be explicitly cast as an `ndarray`, like so:
```
cdef double* X_c = <double*> (<numpy.ndarray> X_ndarray).data
```

The example below demonstrates these concepts.

In [7]:
%%cython
# import both numpy and the Cython declarations for numpy
import numpy as np
cimport numpy as np

cdef void c_add_one(long* input, int array_size) nogil:
    for i in range(array_size):
        input[i] = input[i] + 1
    

cdef X = np.array([0, 1, 2, 3, 4, 5])
print "original numpy array: {}".format(X)

# turn the numpy array into a C pointer
# use the long datatype, because the numpy dtype is int64
cdef long* X_c = <long*> (<np.ndarray> X).data

# get the size of the array
cdef int array_size = X.shape[0]

# use a cython function to increment all values in the array
c_add_one(X_c, array_size)
print "numpy array after being incremented by a C function: {}".format(X)

original numpy array: [0 1 2 3 4 5]
numpy array after being incremented by a C function: [1 2 3 4 5 6]


Notice that changing the values of the C pointer representation automatically changed the NumPy array on the Python side as well; this makes using Cython in computation-heavy operations with NumPy quite useful.

The next example is a slightly more interesting application of Cython to NumPy arrays. I'll use Cython to find the weighted median of a large, unsorted, randomly generated array with weights.

In [8]:
%%cython
# import both numpy and the Cython declarations for numpy
import numpy as np
import sys
cimport numpy as np

cdef double compute_weighted_median(double* y_vals, double* weights, 
                                    long array_size) nogil:
    """Calculate the weighted median
    given values, weights, and a start and end index
    """
    cdef size_t i, p, k
    cdef double sum_weights
    cdef size_t median_index
    cdef double total_sum
    median_index = 0
    sum_weights = 0.0
        
    for i in range(array_size):
        sum_weights += weights[i]

    sort_values_and_weights(y_vals, weights, 0, array_size-1)
    
    total_sum = sum_weights - weights[0]
            
    while(total_sum > sum_weights/2):
        median_index +=1
        total_sum -= weights[median_index]

    if total_sum == sum_weights/2:
        return (y_vals[median_index] + y_vals[median_index + 1]) / 2
    else:
        return y_vals[median_index]
            
                
cdef void sort_values_and_weights(double* y_vals, double* weights, 
                                  long low, long high) nogil:
    """Sort an array and its corresponding weights"""
    cdef long pivot, i, j,
    cdef double temp
    if low < high:
        pivot = low
        i = low
        j = high
        while i < j:
            while(y_vals[i] <= y_vals[pivot] and i <= high):
                i += 1
            while(y_vals[j] > y_vals[pivot] and j  >= low):
                j -= 1
            if i < j:
                temp = y_vals[i]
                y_vals[i] = y_vals[j]
                y_vals[j] = temp

                temp = weights[i]
                weights[i] = weights[j]
                weights[j] = temp
        temp = y_vals[j]
        y_vals[j] = y_vals[pivot]
        y_vals[pivot] = temp

        temp = weights[j]
        weights[j] = weights[pivot]
        weights[pivot] = temp
        
        sort_values_and_weights(y_vals, weights, low, j-1)
        sort_values_and_weights(y_vals, weights, j+1, high)

# value and weight arrays for use by Cython
values_cython = np.random.uniform(0,100,999)
weights_cython = np.random.uniform(0,100,999)

cdef double* values_pointer = <double*> (<np.ndarray> values_cython).data
cdef double* weights_pointer = <double*> (<np.ndarray> weights_cython).data
cdef long array_size = 999

c_weighted_median = compute_weighted_median(values_pointer, weights_pointer, array_size)
print "Weighted median as calculated by Cython: {}".format(c_weighted_median)

Weighted median as calculated by Cython: 52.5556328503
