1 Import the numpy package under the name np

In [2]:
import numpy as np
import scipy as sp

2 Print the numpy version and the configuration

In [3]:
print(np.__version__)

1.10.1


3 Create a null vector of size 10

In [4]:
z = np.zeros(10)
print(z)

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]


4 How to get the documentation of the numpy add function from the command line ?

In [5]:
np.info(np.add)

add(x1, x2[, out])

Add arguments element-wise.

Parameters
----------
x1, x2 : array_like
    The arrays to be added.  If ``x1.shape != x2.shape``, they must be
    broadcastable to a common shape (which may be the shape of one or
    the other).

Returns
-------
add : ndarray or scalar
    The sum of `x1` and `x2`, element-wise.  Returns a scalar if
    both  `x1` and `x2` are scalars.

Notes
-----
Equivalent to `x1` + `x2` in terms of array broadcasting.

Examples
--------
>>> np.add(1.0, 4.0)
5.0
>>> x1 = np.arange(9.0).reshape((3, 3))
>>> x2 = np.arange(3.0)
>>> np.add(x1, x2)
array([[  0.,   2.,   4.],
       [  3.,   5.,   7.],
       [  6.,   8.,  10.]])


5 Create a null vector of size 10 but the fifth value which is 1

In [6]:
z = np.zeros(10)
z[4] = 1
print(z)

[ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.]


6 Create a vector with values ranging from 10 to 49

In [7]:
x = np.arange(10, 49)
x[0] = 1
print(x)

[ 1 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
 35 36 37 38 39 40 41 42 43 44 45 46 47 48]


7 Reverse a vector (first element becomes last)

In [8]:
x = np.arange(100)
print(x[20:1:-1])
print(x[20:1:-7])
print(x[::-1])

[20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2]
[20 13  6]
[99 98 97 96 95 94 93 92 91 90 89 88 87 86 85 84 83 82 81 80 79 78 77 76 75
 74 73 72 71 70 69 68 67 66 65 64 63 62 61 60 59 58 57 56 55 54 53 52 51 50
 49 48 47 46 45 44 43 42 41 40 39 38 37 36 35 34 33 32 31 30 29 28 27 26 25
 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0]


8 Create a 3x3 matrix with values ranging from 0 to 8

In [9]:
x = np.arange(9)
y = np.vstack([x[0:3], x[3:6], x[6:9]])
print(y)

y = np.reshape(x, newshape = (3, 3))
print(y)

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[0 1 2]
 [3 4 5]
 [6 7 8]]


9 Find indices of non-zero elements from [1,2,0,0,4,0]

In [10]:
x = [1, 2, 0, 0, 4, 0]
y = [i == 0 for i in x]
print(y)

[False, False, True, True, False, True]


10 Create a 3x3 identity matrix

In [11]:
x = np.identity(3)
print(x)

[[ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]]


11 Create a 3x3x3 array with random values

In [12]:
y = np.random.normal(size = 27)
x = np.reshape(y, (3, 3, 3))
print(x)

[[[-0.30762244  1.13051479  0.65941757]
  [ 0.75746956  0.49534201 -2.10289084]
  [ 2.04393897 -1.70403894  0.23024014]]

 [[-0.37304324 -2.23816291 -0.40560343]
  [-0.4397355   0.79181846  0.5295583 ]
  [-1.03668425 -1.16708904 -1.09368245]]

 [[ 0.64914746 -1.30467822  0.3682863 ]
  [-1.04641718  0.8218299   1.08946102]
  [-1.032714   -1.87277088  0.3768051 ]]]


12 Create a 10x10 array with random values and find the minimum and maximum values

In [13]:
z = np.random.random((10, 10))
print(z.min())
print(z.max())

0.0171924058366
0.998362401507


13 Create a random vector of size 30 and find the mean value

In [14]:
z = np.random.random(30)
z.mean()

0.5776602665662558

14 Create a 5x5 matrix with values 1,2,3,4 just below the diagonal

In [15]:
M = np.zeros((5, 5))
for row in range(0, 5):
    for column in range(0, 5):
        M[row, column] = max(row - column, 0)
print(M)

[[ 0.  0.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.]
 [ 2.  1.  0.  0.  0.]
 [ 3.  2.  1.  0.  0.]
 [ 4.  3.  2.  1.  0.]]


In [16]:
Z = np.diag(1+np.arange(4),k=-1)
print(Z)

[[0 0 0 0 0]
 [1 0 0 0 0]
 [0 2 0 0 0]
 [0 0 3 0 0]
 [0 0 0 4 0]]


15 Create a 8x8 matrix and fill it with a checkerboard pattern

In [17]:
M = np.zeros((8,8), dtype = int)
for i in range(-7, 7, 2):
    length = 8 - abs(i)
    M = M + np.diag(np.ones(length), k = i)
print(M)

[[ 0.  1.  0.  1.  0.  1.  0.  0.]
 [ 1.  0.  1.  0.  1.  0.  1.  0.]
 [ 0.  1.  0.  1.  0.  1.  0.  1.]
 [ 1.  0.  1.  0.  1.  0.  1.  0.]
 [ 0.  1.  0.  1.  0.  1.  0.  1.]
 [ 1.  0.  1.  0.  1.  0.  1.  0.]
 [ 0.  1.  0.  1.  0.  1.  0.  1.]
 [ 1.  0.  1.  0.  1.  0.  1.  0.]]


In [18]:
Z = np.zeros((8,8),dtype=int)
# Hard to understand, but can leave out unneeded values
Z[1::2,::2] = 1
Z[::2,1::2] = 1
# More clear with all values:
Y = np.zeros((8,8),dtype=int)
Y[1:8:2,0:8:2] = 1
Y[0:8:2,1:8:2] = 1
print(Z)
print(Y)

[[0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]]
[[0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]]


16 Create a checkerboard 8x8 matrix using the tile function

In [19]:
M = np.diag([1, 1])
M2 = np.tile(M, reps = (4, 4))
print(M2)

[[1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]
 [1 0 1 0 1 0 1 0]
 [0 1 0 1 0 1 0 1]]


17 Normalize a 5x5 random matrix

In [20]:
M = np.random.random((5, 5))
mu = M.mean()
sigma = M.std()
M2 = (M - mu)/sigma
print(M2.mean(), M2.std())

-6.43929354283e-17 1.0


18 Multiply a 5x3 matrix by a 3x2 matrix (real matrix product)

In [21]:
A = np.random.random((5, 3))
B = np.random.random((3, 2))
np.dot(A, B)

array([[ 0.54845277,  0.93610354],
       [ 0.5083926 ,  0.76914566],
       [ 0.74300885,  0.85493792],
       [ 0.71621441,  0.72214397],
       [ 0.3556726 ,  0.6451011 ]])

19 Create a 5x5 matrix with row values ranging from 0 to 4

In [22]:
A = np.random.binomial(size = (5,5), n = 5, p = .5)
print(A)

[[3 2 3 2 2]
 [2 3 3 4 1]
 [3 4 1 3 3]
 [1 3 1 3 3]
 [2 2 3 0 2]]


20 Create a vector of size 10 with values ranging from 0 to 1, both excluded

In [23]:
np.arange(start = 1/11, stop = 1, step = 1/11)

array([ 0.09090909,  0.18181818,  0.27272727,  0.36363636,  0.45454545,
        0.54545455,  0.63636364,  0.72727273,  0.81818182,  0.90909091])

21 Create a random vector of size 10 and sort it

In [24]:
x = np.random.randint(size = 10, low = 0, high = 100)
print(x)
print(np.sort(x))

[23 25 73 59 97 60 46 71 44 93]
[23 25 44 46 59 60 71 73 93 97]


22 Consider two random array A anb B, check if they are equal

In [25]:
A = np.random.random((3, 3))
B = np.random.random((3, 3))
(A == B).all()

False

23 Make an array immutable (read-only)

In [26]:
x = np.zeros(10)
x.flags.writeable = False
print(x)
# Throws error:
# x[0] = 1

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]


24 Consider a random 10x2 matrix representing cartesian coordinates, convert them to polar coordinates

In [27]:
coords = np.random.normal(size = (10, 2))
x = coords[:, 0]
y = coords[:, 1]
coordsPolar = np.zeros((10, 2))
coordsPolar[:, 0] = np.sqrt(x*x + y*y)
coordsPolar[:, 1] = np.arctan2(y, x)
np.hstack((coords, coordsPolar))

array([[ 0.47637095, -1.32448762,  1.40754991, -1.22553794],
       [ 0.33298117,  0.15958153,  0.36924616,  0.44691097],
       [ 1.13555754,  0.68425763,  1.32578257,  0.54231012],
       [ 0.80584706,  0.87121936,  1.18676562,  0.82435859],
       [ 0.17904783,  0.20335843,  0.27094792,  0.84888545],
       [ 0.66608109, -0.21901002,  0.70116289, -0.31766849],
       [-1.00817785,  0.23967413,  1.03627519,  2.90819514],
       [ 1.08405739, -0.83960786,  1.37117533, -0.65900074],
       [-0.7144077 , -0.78432484,  1.0609165 , -2.30957744],
       [ 1.00611102, -0.7736007 ,  1.26914043, -0.655489  ]])

25 Create random vector of size 10 and replace the maximum value by 0

In [28]:
x = np.random.rand(10)
print(x)
x[x.argmax()] = 0
print(x)

[ 0.25214142  0.58702175  0.2656311   0.13100649  0.52827158  0.41945266
  0.24437953  0.30195678  0.04819004  0.89059317]
[ 0.25214142  0.58702175  0.2656311   0.13100649  0.52827158  0.41945266
  0.24437953  0.30195678  0.04819004  0.        ]


26 Create a structured array with x and y coordinates covering the [0,1]x[0,1] area

In [29]:
ans = [(x, y) for x in np.arange(0, 1, .1) for y in np.arange(0, 1, .1)]

27 Print the minimum and maximum representable value for each numpy scalar type

In [30]:
for dtype in [np.int8, np.int32, np.int64]:
   print(np.iinfo(dtype).min)
   print(np.iinfo(dtype).max)
for dtype in [np.float32, np.float64]:
   print(np.finfo(dtype).min)
   print(np.finfo(dtype).max)
   print(np.finfo(dtype).eps)

-128
127
-2147483648
2147483647
-9223372036854775808
9223372036854775807
-3.40282e+38
3.40282e+38
1.19209e-07
-1.79769313486e+308
1.79769313486e+308
2.22044604925e-16


28 Create a structured array representing a position (x,y) and a color (r,g,b)

In [31]:
ans = np.zeros(10, [('position', [('x', float, 1),
                                  ('y', float, 1)]),
                    ('color', [('r', float, 1),
                               ('g', float, 1),
                               ('b', float, 1)])])
ans

array([((0.0, 0.0), (0.0, 0.0, 0.0)), ((0.0, 0.0), (0.0, 0.0, 0.0)),
       ((0.0, 0.0), (0.0, 0.0, 0.0)), ((0.0, 0.0), (0.0, 0.0, 0.0)),
       ((0.0, 0.0), (0.0, 0.0, 0.0)), ((0.0, 0.0), (0.0, 0.0, 0.0)),
       ((0.0, 0.0), (0.0, 0.0, 0.0)), ((0.0, 0.0), (0.0, 0.0, 0.0)),
       ((0.0, 0.0), (0.0, 0.0, 0.0)), ((0.0, 0.0), (0.0, 0.0, 0.0))], 
      dtype=[('position', [('x', '<f8'), ('y', '<f8')]), ('color', [('r', '<f8'), ('g', '<f8'), ('b', '<f8')])])

29 Consider a random vector with shape (100,2) representing coordinates, find point by point distances

In [32]:
randVec = np.random.rand(100, 2)
dist = np.zeros((100, 100))
for i in np.arange(0, 100):
    for j in np.arange(0, 100):
        dist[i, j] = (randVec[i, :] - randVec[j, :]).sum()
# Print a subset
dist[np.ix_(np.arange(0, 4), np.arange(0, 4))]

array([[ 0.        ,  0.22120261,  0.74721822, -0.04370614],
       [-0.22120261,  0.        ,  0.52601561, -0.26490875],
       [-0.74721822, -0.52601561,  0.        , -0.79092436],
       [ 0.04370614,  0.26490875,  0.79092436,  0.        ]])

In [33]:
randVec = np.random.random((5,2))
X,Y = np.atleast_2d(randVec[:,0]), np.atleast_2d(randVec[:,1])
print(X)
print(X.T)
print("")
D = np.sqrt( (X-X.T)**2 + (Y-Y.T)**2)
print(D)

# Much faster with scipy
import scipy
# Thanks Gavin Heverly-Coulson (#issue 1)
import scipy.spatial

D = scipy.spatial.distance.cdist(randVec,randVec)
print(D)

[[ 0.26050017  0.69154287  0.02605483  0.66506893  0.82715761]]
[[ 0.26050017]
 [ 0.69154287]
 [ 0.02605483]
 [ 0.66506893]
 [ 0.82715761]]

[[ 0.          0.62271157  0.66178431  0.43604963  1.06372777]
 [ 0.62271157  0.          0.68672292  0.28795664  0.47077438]
 [ 0.66178431  0.68672292  0.          0.7851418   0.84907749]
 [ 0.43604963  0.28795664  0.7851418   0.          0.75515609]
 [ 1.06372777  0.47077438  0.84907749  0.75515609  0.        ]]
[[ 0.          0.62271157  0.66178431  0.43604963  1.06372777]
 [ 0.62271157  0.          0.68672292  0.28795664  0.47077438]
 [ 0.66178431  0.68672292  0.          0.7851418   0.84907749]
 [ 0.43604963  0.28795664  0.7851418   0.          0.75515609]
 [ 1.06372777  0.47077438  0.84907749  0.75515609  0.        ]]


30 Consider the following file:

1,2,3,4,5

6,,,7,8

,,9,10,11

How can we read it (saved as sampleFile.csv)?

In [35]:
# Doesn't seem to work
import csv
with open("sampleFile.csv") as myFile:
    reader = csv.reader(file, delimiter = ",")
    for row in reader:
        print(1)
        print(", ".join(row))

NameError: name 'file' is not defined

In [None]:
x = np.genfromtxt("sampleFile.csv", delimiter = ",")
print(x)

31 Generate a generic 2D Gaussian-like array

In [None]:
data = np.random.normal(loc = 0, scale = 1, size = (10, 2))
print(data)
np.matmul(data, np.array([[2, 0], [0, 1]]))

In [None]:
X, Y = np.meshgrid(np.linspace(-1,1,10), np.linspace(-1,1,10))
D = np.sqrt(X*X+Y*Y)
sigma, mu = 1.0, 0.0
G = np.exp(-( (D-mu)**2 / ( 2.0 * sigma**2 ) ) )
print(G)

32 How to randomly place p elements in a 2D array?

In [None]:
n = 10
p = 3
Z = np.zeros((n,n))
for point in np.arange(0, 3):
    x = np.random.randint(0, 10)
    y = np.random.randint(0, 10)
    Z[x, y] = 1
print(Z)

In [None]:
np.put(Z, np.random.choice(range(n*n), p, replace=False),1)
print(Z)
np.random.choice(range(n*n), p, replace=False)

33 Subtract the mean of each row of a matrix

In [None]:
M = np.random.rand(5, 5)
print(M)
for j in np.arange(0, M.shape[1]):
    M[np.arange(0, 5), j] -= M[np.arange(0, 5), j].mean()
print(M)

In [None]:
# Recent versions of numpy
print(M.mean(axis = 1, keepdims=True))
print(M.mean(axis = 0, keepdims=True))
Y = M - M.mean(axis=1, keepdims=True)
print(Y)

34 How to I sort an array by the nth column ?

In [None]:
# np.info(np.sort)
values = [("Josh", 29), ("Maria", 30), ("Diego", 2), ("Stella", 0)]
M = np.array(values, dtype = [("name", "S10"), ("age", int)])
print(M)
print(np.sort(M, order = "age"))

In [None]:
# np.info(np.argsort)
M = np.random.randint(1, 10, (4, 4))
print(M)
# Argsort returns a sorting index, not the actual order/rank
index1 = np.argsort(M, axis = 1)
index2 = np.argsort(M, axis = 0)
print("-------------")
print(index1)
print("-------------")
# for i in np.arange(0, 4):
#     print(M[index2[:, i]])
# print("-------------")
# M2 = M
# for row in np.arange(0, 4):
#     M2[row, :] = M2[row, :][index1[3, :]]
# print(M2)
for i in np.arange(0, 4):
    # Without the copy, it works like data.table (copies reference only)
    M2 = M.copy()
    for row in np.arange(0, 4):
        M2[row, :] = M2[row, :][index1[i, :]]
    print(M2)

In [None]:
Z = np.random.randint(0,10,(3,3))
print(Z)
print(Z[Z[:,0].argsort()])

35 How to tell if a given 2D array has null columns ?

In [None]:
Z = np.random.randint(0,3,(3,10))
print((~Z.any(axis=0)).any())

36 Find the nearest value from a given value in an array

In [None]:
Z = np.random.uniform(0,1,10)
test = .3
print(Z)
np.argmin(abs(Z - test))

37 Consider a generator function that generates 10 integers and use it to build an array

In [None]:
def generator(n):
    return(np.random.randint(0, 10, n))
generator(5)

In [73]:
def generate():
    for x in np.arange(0, 10):
        yield x
z = [y for y in generate()]
print(z)
print(type(z))
z = np.array(z)
print(z)
print(type(z))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
<class 'list'>
[0 1 2 3 4 5 6 7 8 9]
<class 'numpy.ndarray'>


DETOUR: Problem 1: Write an iterator class reverse_iter, that takes a list and iterates it from the reverse direction. ::

In [None]:
class reverse_iter:
    def __init__(self, n):
        self.i = n

    def next(self):
        if self.i >= 0:
            i = self.i
            self.i -= 1
            return(i)
        else:
            raise StopIteration()

class yrange:
    def __init__(self, n):
        self.i = 0
        self.n = n

    def __iter__(self):
        return(self)

    def next(self):
        if self.i < self.n:
            i = self.i
            self.i += 1
            return i
        else:
            raise StopIteration()

# y = reverse_iter(10)
y = yrange(10)
for i in [1, 2, 3, 4, 5]:
        print(y.next())

In [36]:
sum(x*x for x in np.arange(0, 10))

285

Problem 2: Write a program that takes one or more filenames as arguments and prints all the lines which are longer than 40 characters.

In [59]:
filenames = ["sampleTextFiles/longfile.txt", "sampleTextFiles/smallFile.txt", "sampleTextFiles/pyramid.txt"]
files = ["/home/josh/Documents/Github/GithubSandbox/Python/sampleTextFiles/" + x
         for x in ("longFile.txt", "smallFile.txt", "pyramid.txt")]
def readlines(filenames):
    for f in filenames:
        for line in open(f):
            yield line

def over40(filenames):
    for line in readlines(filenames):
        if(len(line) > 40): print(line)
            
over40(files)

This file has one really, really, really, really, really, really, really, really, really, really, really, really, really, really, really, really, really, really, really, really, really long line.

Hello Hello Hello Hello Hello Hello Hello.

Hello Hello Hello Hello Hello Hello Hello Hello.

Hello Hello Hello Hello Hello Hello Hello Hello Hello.

Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello.

Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello.

Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello.

Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello.

Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello.



38 Consider a given vector, how to add 1 to each element indexed by a second vector (be careful with repeated indices) ?

In [85]:
x = np.arange(10)
print(x)
index = np.random.randint(0, 10, 5)
for i in index:
    x[i] += 1
print(x)
print(index)

[0 1 2 3 4 5 6 7 8 9]
[1 1 2 4 6 5 7 7 8 9]
[6 0 3 4 4]


In [86]:
print(index)
print(np.bincount(index, minlength=len(x)))

[6 0 3 4 4]
[1 0 0 1 2 0 1 0 0 0]


39 How to accumulate elements of a vector (X) to an array (F) based on an index list (I)?

In [103]:
X = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
print(X)
I = np.random.permutation(X)
print(I)
F = np.zeros(10)
print(F)
F += np.bincount(I, X)
print(F)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0 4 2 8 1 3 6 7 9 5]
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
[ 0.  4.  2.  5.  1.  9.  6.  7.  3.  8.]
