# Arrays and references

Author: Pierre Ablin, Mathurin Massias

Some caveats when using numpy arrays.

In [1]:
import numpy as np

In [2]:
a = np.zeros(4)
print(a)

[0. 0. 0. 0.]


In [3]:
# argument x is passed as reference (amongst others, this avoids a heavy memory copy if x is large):
def f(x):
    x[0] = 12
    

print(a)
f(a)
print(a)
# modifying x inside the function also changes it outside

[0. 0. 0. 0.]
[12.  0.  0.  0.]


In [4]:
# if we want to avoid side effects, we need to copy the argument when entering the function:
def g(x):
    x = x.copy()
    x[0] = 24

print(a)
f(a)
print(a)
# a is no longer affected

[12.  0.  0.  0.]
[12.  0.  0.  0.]


In [5]:
# what happens if we do this?
def h(x):
    x = x + 6

print(a)
h(a)
print(a)
# a is untouched: x = x + 6 means "compute x + 6, and put this in a new variable called x"

[12.  0.  0.  0.]
[12.  0.  0.  0.]


In [16]:
# but this will modify a:
def h_view(x):
    x[:] = x + 6

print(a)
h_view(a)
print(a)
# x[:] = x + 6  means compute x + 6, and fill x with the result. 
# It is useful because we don't have to create a new array at each iteration, we just use the same.

[10.  5.  5.  5.  5.  5.]
[16. 11. 11. 11. 11. 11.]


`x[:]` is what we call a _view_ on `x`: it is a different array (`a[:] is a` evaluates to False), but it points to the same data, with a potentially different way to move from one line/column to another. Views are useful because they allow to avoid a high memory usage. Let's see other examples of _views_:

In [7]:
a[:] is a

False

In [8]:
A = np.ones([5, 3])
B = A.T  # B is not A, but B shares A's data
B[0, :] = 0  # change the first line of B
print(A)  # the first column of A has changed

[[0. 1. 1.]
 [0. 1. 1.]
 [0. 1. 1.]
 [0. 1. 1.]
 [0. 1. 1.]]


This is useful for example when we compute the linear regression gradient `A.T.dot(A.dot(x) - b)`: numpy does not create a new array to store `A.T`. 
Other views:

In [9]:
A = np.ones([5, 3])
B = A[::2, :]  # B = even lines of A, with a view
B[0, :] = 0
print(A)  # A has changed!

[[0. 0. 0.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


In [10]:
A = np.ones([5, 3])
B = A[[0, 2, 4], :]  # B = even lines of A, without using a view
B[0, :] = 2
print(A)  # A has not changed

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


In [11]:
# A little more on the previous ideas, without functions:
a = np.zeros(6)
b = np.zeros(6)
print(id(a))  # address in memory of a
print(id(b))  # this address is different, a and b are not the same object
print(a is b)  # 'is' test if the two variables point to the same object
# the addresses are different, it's not the same object

140253321109632
140253320980800
False


In [12]:
b = a
print(a is b)  # True
b[0] = 5
print(b)
print(a)
# a and b point to the same object, so modifying b affects a:

True
[5. 0. 0. 0. 0. 0.]
[5. 0. 0. 0. 0. 0.]


In [13]:
b = b + 5  # b + 5 is computed and affected to a new variable called b (old b is deleted)
print(a)
print(b)
# b is modified but a is not:

[5. 0. 0. 0. 0. 0.]
[10.  5.  5.  5.  5.  5.]


In [14]:
b = a
b[:] = b + 5
print(a)
print(b)
# b is modified and so is a:

[10.  5.  5.  5.  5.  5.]
[10.  5.  5.  5.  5.  5.]


In [15]:
# This can be tricky in SAG code. The following code does not do what you think it does:
n, p = 10, 100
def grad_i(i, x): 
    return np.random.randn(p)

memory_gradient = np.zeros([n, p])

x = np.zeros(p)
for idx in range(50):
    i = idx % n
    old_g_i = memory_gradient[i, :]  # /!\ old_g_i just points to memory_gradient[i, :]
    # so changing memory_gradient[i, :] will change old_g_i!
    new_g_i = grad_i(i, x)
    memory_gradient[i, :] = new_g_i  # this also affect old_g_i as a side effect
    # old_g_i and new_g_i now have the same values!
    # so the mean of memory gradient would never change and the algorithm fails
    assert (new_g_i == old_g_i).all()  # this is True, not what you expect.
    
    # other stuff of the algorithm
    # x -= step * ...
    