# Numpy

In [44]:
import numpy as np

In [45]:
np.__version__

'2.3.1'

### 1. Compare speed performance between numpy and python core

In [46]:
import time
size_vector = 100000

def time_of_python_standard():
    time_start = time.time()
    vector_a = range(size_vector)
    vector_b = range(size_vector)
    new_vector = [ vector_a[i] + vector_b[i] for i in range(size_vector) ]
    return time.time() - time_start
    
def time_of_numpy():
    time_start = time.time()
    vector_a = np.arange(size_vector)
    vector_b = np.arange(size_vector)
    new_vector = vector_a + vector_b
    return time.time() - time_start

print(f'Time of standard python: {time_of_python_standard()}')
print(f'Time of numpy: {time_of_numpy()}')

print(f'Numpy is in this example {time_of_python_standard()/time_of_numpy()} faster.')

Time of standard python: 0.010977506637573242
Time of numpy: 0.0006718635559082031
Numpy is in this example 26.780904522613067 faster.


### 2. Creating Numpy Arrays

In [76]:
# Cration with evently spaced values

a = np.arange(1, 10)
b = np.arange(1, 10, 2) # third parameter is step
c = np.arange(0.1, 0.9, 0.1, float) # fourth parameter is step
print(a)
print(b)
print(c)

[1 2 3 4 5 6 7 8 9]
[1 3 5 7 9]
[0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8]


In [77]:
# Creation of a ndarray with equally spaced samples, by default 50 samples; if retstep is True then the function will also return the vlaue of the spacing value 
a = np.linspace(1, 10, 3)
b = np.linspace(1, 10, 6)
print(a)
print(b)

[ 1.   5.5 10. ]
[ 1.   2.8  4.6  6.4  8.2 10. ]


In [78]:
# n-dimensional arrays and shape of an array

a = np.array(42)
b = np.array([1,2,3,4,5,6])
c = np.array([[1,2,3], [4,5,6]])
print(a, "\t", type(a), "\t", np.ndim(a), np.shape(a))
print(b, "\t", type(b), "\t", np.ndim(b), np.shape(b))
print(c, "\t", type(c), "\t", np.ndim(c), np.shape(c))

42 	 <class 'numpy.ndarray'> 	 0 ()
[1 2 3 4 5 6] 	 <class 'numpy.ndarray'> 	 1 (6,)
[[1 2 3]
 [4 5 6]] 	 <class 'numpy.ndarray'> 	 2 (2, 3)


In [93]:
# Indexing and Slicing
# ndarray[start:stop:step]

a = np.array([1, 1, 2, 3, 5, 8, 13, 21])
b = np.array([[1,2,3], [4,5,6]])
c = np.arange(28).reshape(4, 7)
print(a[0])
print(a[-1])
print(b[1,0])
print(b[1:2, 1])
print(b[: , 1])
print(c[::2, ::3])

1
21
4
[5]
[2 5]
[[ 0  3  6]
 [14 17 20]]


In [102]:
# Creating arrays with ones, zerso and empty

a = np.ones((2,3))
b = np.zeros((2,3))
c = np.empty((2,3))
print(a)
print(b)
print(c)

[[1. 1. 1.]
 [1. 1. 1.]]
[[0. 0. 0.]
 [0. 0. 0.]]
[[0. 0. 0.]
 [0. 0. 0.]]


In [116]:
# Copying arrays and creating identity functions

a = np.array([[1,2,3], [4,5,6]])
b = np.identity(4)
c = np.eye(3, 5, k=1)
print(a[0,0])
print(b)
print(c)

1
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]
[[0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]]


### 3. Numpy Data Objects, dtype

In [119]:
# dtype 

a = np.array([1,2,3])
b = np.array([1,2,3], dtype=np.float32)
print(a.dtype)
print(b.dtype)

int64
float32


In [134]:
# Structured Arrays

atype = np.dtype([("type1", np.int32)])
a = np.array([100, 200, 300], dtype=atype)
print( a['type1'] )

b = np.dtype([
    ('column1', 'S20'), ('column2','i4')
])

c = np.array([
    ('element1', 100), ('element2', 200), ('element3', 300)
], dtype=b)
c

[100 200 300]


array([(b'element1', 100), (b'element2', 200), (b'element3', 300)],
      dtype=[('column1', 'S20'), ('column2', '<i4')])

In [140]:
# Input and output of structured arrays

np.savetxt("file1.csv", c, fmt="%s;%d", delimiter=";")
inpu = np.genfromtxt("file1.csv", dtype=b, delimiter=";")

In [150]:
# Operations

dt = np.dtype([('Columns1', 'U20'), ('Column2', 'i4')])
table1 =  np.array([
    ('element1', 200), ('element2', 310), ('element3', 420), ('element4', 530)
], dtype=dt)

table2 =  np.array([
     ('element1', 1000), ('element2', 2000), ('element3', 3000), ('element4', 4000)
], dtype=dt)

a = table1 != table2
b = table1['Column2'] - table2['Column2']
b

array([ -800, -1690, -2580, -3470], dtype=int32)

### 4. Numerical Operations on Numpy Arrays

In [161]:
# Scalars and arithmetics operations with two arrays

a = np.array([1,2,3,4,5])
print( a+2 )
print( a*2 )
print( a-2 )
print( a**2 )
print( a/2 )

b = np.array([[1,2,3], [4,5,6], [7,8,9]]) 
c = np.ones((3,3))
print( b * (c + 1) )

[3 4 5 6 7]
[ 2  4  6  8 10]
[-1  0  1  2  3]
[ 1  4  9 16 25]
[0.5 1.  1.5 2.  2.5]
[[ 2.  4.  6.]
 [ 8. 10. 12.]
 [14. 16. 18.]]


In [165]:
# Matrix Multiplication and two dimmensional arrays

print( np.dot(b, c) , "\n")
print( b * c , "\n")

[[ 6.  6.  6.]
 [15. 15. 15.]
 [24. 24. 24.]] 

[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]] 



In [175]:
# Comparison operators and Logical Operators

a = np.array([ [11, 12, 13], [21, 22, 23], [31, 32, 33] ])
b = np.array([ [11, 102, 13], [201, 22, 203], [31, 32, 303] ])
print(a == b)
print(np.array_equal(a, b))

a = np.array([ [True, True], [False, False]])
b = np.array([ [True, False], [True, False]])
print(np.logical_or(a, b))
print(np.logical_and(a, b))

[[ True False  True]
 [False  True False]
 [ True  True False]]
False
[[ True  True]
 [ True False]]
[[ True False]
 [False False]]


In [196]:
# Broadcasting

a = np.array([ [11, 12, 13], [21, 22, 23], [31, 32, 33] ])
b = np.array([1, 2, 3])

print(a * b, "\n")
print(a + b, "\n")

# how to turn a row vector into a column vector 

c = np.array([1,2,3])
print( c[:, np.newaxis], "\n" )

# Concatenate
b = np.array([1,2,3])
b = b[np.newaxis, :]
b = np.concatenate((b,b,b))
print(b, "\n")

# tile
b = np.tile(np.array([1,2,3]), (3,1))
print(b)

[[11 24 39]
 [21 44 69]
 [31 64 99]] 

[[12 14 16]
 [22 24 26]
 [32 34 36]] 

[[1]
 [2]
 [3]] 

[[1 2 3]
 [1 2 3]
 [1 2 3]] 

[[1 2 3]
 [1 2 3]
 [1 2 3]]


In [212]:
# Distance matrix
matrix = np.array( [0,  1498, 1063, 1968, 1498, 1758, 1469, 1472, 2230] )
matrix - matrix[:, np.newaxis]

array([[    0,  1498,  1063,  1968,  1498,  1758,  1469,  1472,  2230],
       [-1498,     0,  -435,   470,     0,   260,   -29,   -26,   732],
       [-1063,   435,     0,   905,   435,   695,   406,   409,  1167],
       [-1968,  -470,  -905,     0,  -470,  -210,  -499,  -496,   262],
       [-1498,     0,  -435,   470,     0,   260,   -29,   -26,   732],
       [-1758,  -260,  -695,   210,  -260,     0,  -289,  -286,   472],
       [-1469,    29,  -406,   499,    29,   289,     0,     3,   761],
       [-1472,    26,  -409,   496,    26,   286,    -3,     0,   758],
       [-2230,  -732, -1167,  -262,  -732,  -472,  -761,  -758,     0]])

### 5. Numpy Arrays: Concatenating, Flattening and Adding Dimensions

In [220]:
# flatten

a = np.array([[[ 0,  1],
               [ 2,  3],
               [ 4,  5],
               [ 6,  7]],
              [[ 8,  9],
               [10, 11],
               [12, 13],
               [14, 15]],
              [[16, 17],
               [18, 19],
               [20, 21],
               [22, 23]]])

print(a.flatten())
print(a.flatten(order="C"))
print(a.flatten(order="F"))
print(a.flatten(order="A"))

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]
[ 0  8 16  2 10 18  4 12 20  6 14 22  1  9 17  3 11 19  5 13 21  7 15 23]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23]


In [226]:
# reshape

a = np.array(range(24))
print(a.reshape((2,3,4)))

[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]]

 [[12 13 14 15]
  [16 17 18 19]
  [20 21 22 23]]]


In [239]:
# Concatenating Arrays

a = np.array([11,22])
b = np.array([18,7,6])
c = np.array([1,3,5])
print(np.concatenate((a,b,c)), "\n")

a = np.array(range(24))
a = a.reshape((3,4,2))
b = np.array(range(100,124))
b = b.reshape((3,4,2))
print( "Axis = 0\n", np.concatenate((a,b)) , "\n")
print( "Axis = 1\n", np.concatenate((a,b), axis=1) )

[11 22 18  7  6  1  3  5] 

Axis = 0
 [[[  0   1]
  [  2   3]
  [  4   5]
  [  6   7]]

 [[  8   9]
  [ 10  11]
  [ 12  13]
  [ 14  15]]

 [[ 16  17]
  [ 18  19]
  [ 20  21]
  [ 22  23]]

 [[100 101]
  [102 103]
  [104 105]
  [106 107]]

 [[108 109]
  [110 111]
  [112 113]
  [114 115]]

 [[116 117]
  [118 119]
  [120 121]
  [122 123]]] 

Axis = 1
 [[[  0   1]
  [  2   3]
  [  4   5]
  [  6   7]
  [100 101]
  [102 103]
  [104 105]
  [106 107]]

 [[  8   9]
  [ 10  11]
  [ 12  13]
  [ 14  15]
  [108 109]
  [110 111]
  [112 113]
  [114 115]]

 [[ 16  17]
  [ 18  19]
  [ 20  21]
  [ 22  23]
  [116 117]
  [118 119]
  [120 121]
  [122 123]]]


In [255]:
# Adding new dimensions, stacking and tile

a = np.array([1,2,3,4,5])
a = a[:, np.newaxis]
print(a)

a = np.array([3,4,5])
b = np.array([1,9,0])
print(np.vstack((a,b)))
print(np.column_stack((a,b)))

a = np.array([ [1, 2], [3, 4]])
print( np.tile(a, (3,4)) )

[[1]
 [2]
 [3]
 [4]
 [5]]
[[3 4 5]
 [1 9 0]]
[[3 1]
 [4 9]
 [5 0]]
[[1 2 1 2 1 2 1 2]
 [3 4 3 4 3 4 3 4]
 [1 2 1 2 1 2 1 2]
 [3 4 3 4 3 4 3 4]
 [1 2 1 2 1 2 1 2]
 [3 4 3 4 3 4 3 4]]


### 6. Python, Random Numbers and Probability

In [263]:
random_number = np.random.random(10)
random_number = random_number / random_number.sum()
random_number

array([0.07600362, 0.03677143, 0.12826016, 0.09982018, 0.12709773,
       0.09370369, 0.0666201 , 0.11755792, 0.13903833, 0.11512684])

In [269]:
# Random Integer Numbers

a = np.random.randint(low=1,high=6, size=10)
a

array([4, 5, 3, 3, 2, 3, 1, 4, 3, 4])

In [281]:
# Random choices

cities = ["Berlin", "Hamburg", "Munich", "Amsterdam", "London", "Paris", "Zurich", "Heidelberg", "Strasbourg", "Augsburg", "Milan", "Rome"]
np.random.choice(cities, 4)
np.random.choice(cities, 4, replace=False)

array(['Strasbourg', 'Munich', 'Zurich', 'Augsburg'], dtype='<U10')

In [284]:
# Random samples

a = np.random.random_sample((3,4))
a

array([[0.45897933, 0.49988739, 0.13541284, 0.75184841],
       [0.66909339, 0.58304385, 0.74949698, 0.5068202 ],
       [0.84405883, 0.19660602, 0.20287334, 0.27272323]])

### 7. Weighted Probabilities

In [286]:
weights = [0.2, 0.5, 0.3]
cum_weights = [0] + list(np.cumsum(weights))
print(cum_weights)

[0, np.float64(0.2), np.float64(0.7), np.float64(1.0)]


In [305]:
# Weighted choice
professions = ["scientist", 
               "philosopher", 
               "engineer", 
               "priest", 
               "programmer"]
probabilities = [0.2, 0.05, 0.3, 0.15, 0.3]
np.random.choice(professions, p=probabilities)

np.str_('philosopher')

In [306]:
# Cartesian Choice
def cartesian_choice(*iterables):
    res = []
    for population in iterables:
        res.append(np.random.choice(population))
    return res

cartesian_choice(["The", "A"],
                 ["red", "green", "blue", "yellow", "grey"], 
                 ["car", "house", "fish", "light"],
                 ["smells", "dreams", "blinks", "shines"])

[np.str_('The'), np.str_('green'), np.str_('house'), np.str_('blinks')]

In [321]:
# Random seed

np.random.seed(10)
for i in range(10):
    print(np.random.randint(1,10), end=", ")

print("\nAnother seed")

np.random.seed(11)
for i in range(10):
    print(np.random.randint(1,10), end=", ")

print("\nAgain the first seed")

np.random.seed(11)
for i in range(10):
    print(np.random.randint(1,10), end=", ")

5, 1, 2, 1, 2, 9, 1, 9, 7, 5, 
Another seed
1, 2, 8, 2, 8, 3, 9, 1, 1, 5, 
Again the first seed
1, 2, 8, 2, 8, 3, 9, 1, 1, 5, 

### 8. Synthetical Test Data With Python

In [323]:
firstnames = ["John", "Eve", "Jane", "Paul", 
              "Frank", "Laura", "Robert", 
              "Kathrin", "Roger", "Simone",
              "Bernard", "Sarah", "Yvonne"]
surnames = ["Singer", "Miles", "Moore", 
            "Looper", "Rampman", "Chopman", 
            "Smiley", "Bychan", "Smith",
            "Baker", "Miller", "Cook"]
   
number_of_specialists = 15
    
employees = set()
while len(employees) < number_of_specialists:
    employee = cartesian_choice(firstnames, surnames)
    employees.add(" ".join(employee))

print(employees)

{'Jane Looper', 'John Miller', 'John Smith', 'Bernard Smiley', 'Eve Rampman', 'Roger Moore', 'Simone Chopman', 'John Cook', 'Laura Bychan', 'Roger Bychan', 'Sarah Moore', 'Roger Chopman', 'Roger Rampman', 'John Looper', 'Sarah Miles'}


### 9. Numpy: Boolean Indexing

In [330]:
a = np.array([4, 7, 3, 4, 2, 8])
b = np.array([[42,56,89,65],
              [99,88,42,12],
              [55,42,17,18]])

print(a == 4)
print(a < 4)
print(b >= 42)

[ True False False  True False False]
[False False  True False  True False]
[[ True  True  True  True]
 [ True  True  True False]
 [ True  True False False]]


In [368]:
a = np.array([
[12, 13, 14, 12, 16, 14, 11, 10,  9],
[11, 14, 12, 15, 15, 16, 10, 12, 11],
[10, 12, 12, 15, 14, 16, 10, 12, 12],
[ 9, 11, 16, 15, 14, 16, 15, 12, 10],
[12, 11, 16, 14, 10, 12, 16, 12, 13],
[10, 15, 16, 14, 14, 14, 16, 15, 12],
[13, 17, 14, 10, 14, 11, 14, 15, 10],
[10, 16, 12, 14, 11, 12, 14, 18, 11],
[10, 19, 12, 14, 11, 12, 14, 18, 10],
[14, 22, 17, 19, 16, 17, 18, 17, 13],
[10, 16, 12, 14, 11, 12, 14, 18, 11],
[10, 16, 12, 14, 11, 12, 14, 18, 11],
[10, 19, 12, 14, 11, 12, 14, 18, 10],
[14, 22, 12, 14, 11, 12, 14, 17, 13],
[10, 16, 12, 14, 11, 12, 14, 18, 11]])

b = a < 15
b.astype(np.int8)

array([[1, 1, 1, 1, 0, 1, 1, 1, 1],
       [1, 1, 1, 0, 0, 0, 1, 1, 1],
       [1, 1, 1, 0, 1, 0, 1, 1, 1],
       [1, 1, 0, 0, 1, 0, 0, 1, 1],
       [1, 1, 0, 1, 1, 1, 0, 1, 1],
       [1, 0, 0, 1, 1, 1, 0, 0, 1],
       [1, 0, 1, 1, 1, 1, 1, 0, 1],
       [1, 0, 1, 1, 1, 1, 1, 0, 1],
       [1, 0, 1, 1, 1, 1, 1, 0, 1],
       [1, 0, 0, 0, 0, 0, 0, 0, 1],
       [1, 0, 1, 1, 1, 1, 1, 0, 1],
       [1, 0, 1, 1, 1, 1, 1, 0, 1],
       [1, 0, 1, 1, 1, 1, 1, 0, 1],
       [1, 0, 1, 1, 1, 1, 1, 0, 1],
       [1, 0, 1, 1, 1, 1, 1, 0, 1]], dtype=int8)

In [374]:
# Fancy indexing

a = np.random.random(10)
print( a[a<0.5] )

a = np.array([3,4,6,10,24,89,45,43,46,99,100])
print( a[ (a%3==0) & (a%5==0)] )

[0.37348765 0.03325828 0.21784499]
[45]


In [449]:
# Nonzero and where, Flatnonzero and count_nonzero

a = np.array([[0, 2, 3, 0, 1], [1, 0, 0, 7, 0], [5, 0, 0, 1, 0]])
print( np.transpose( a.nonzero() ) )
print(a[a.nonzero()])
print(np.flatnonzero(a))
print(np.count_nonzero(a))

[[0 1]
 [0 2]
 [0 4]
 [1 0]
 [1 3]
 [2 0]
 [2 3]]
[2 3 1 1 7 5 1]
[ 1  2  4  5  8 10 13]
7


### 10. Matrix Arithmetics under NumPy and Python

In [451]:
# Scalar Product / Dot Product

x = np.array([1, 2, 3])
y = np.array([-7, 8, 9])
np.dot(x, y)

np.int64(36)

In [453]:
# Cross product

x = np.array([0, 0, 1])
y = np.array([0, 1, 0])
np.cross(x, y)

array([-1,  0,  0])

### 11. Reading and Writing Data Files: ndarrays

In [456]:
# Saving textfiles with savetxt (fname, X, fmt=''%.18e', delimiter=' ', newline='\n', header='', footer='', comments='# ')

x = np.array([[1, 2, 3], 
              [4, 5, 6],
              [7, 8, 9]], np.int32)

np.savetxt("first_test.txt", x)

In [460]:
# Loading Textfiles with loadtxt

y = np.loadtxt("first_test.txt", usecols=(0,2) )
print(y)

[[1. 3.]
 [4. 6.]
 [7. 9.]]


In [462]:
# fromfile

fh = open("first_test.txt", "rb")
np.fromfile(fh, dtype=dt)

array([('\U30302e31\U30303030\U30303030\U30303030\U30303030\U30302b65\U302e3220\U30303030\U30303030\U30303030\U30303030\U302b6530\U2e332030\U30303030\U30303030\U30303030\U30303030\U2b653030\U340a3030\U3030302e', 808464432),
       ('\U30303030\U30303030\U65303030\U2030302b\U30302e35\U30303030\U30303030\U30303030\U30303030\U30302b65\U302e3620\U30303030\U30303030\U30303030\U30303030\U302b6530\U2e370a30\U30303030\U30303030\U30303030', 808464432)],
      dtype=[('Columns1', '<U20'), ('Column2', '<i4')])

In [464]:
# genfromtxt

sales = np.genfromtxt("first_test.txt", encoding='utf8', dtype=None)
sales

array([[1., 2., 3.],
       [4., 5., 6.],
       [7., 8., 9.]])