# Python Data Science Handbook
## Author: VanderPlas, Jacob T

#### Creating Arrays from list

In [1]:
import numpy as np

a = np.array([1,4,2,5,3]) # Int

b = np.array([3.14,4,2,3])

print(type(a[1]))
print(type(b[1]))

# Nested lists for multidimensional array
n = np.array([range(i,i+3) for i in [2,4,6]])

n

<class 'numpy.int64'>
<class 'numpy.float64'>


array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

#### Creating Arrays from Scratch

In [2]:
# Array filled with zeros
a = np.zeros(10, dtype=int)

# A 3x5 floating-point array filled with 1s
b = np.ones((3,5), dtype=float)

# Array filled with a linear sequence
c = np.arange(0,20,2)

# Array of five values evenly spaced
d = np.linspace(0,1,5)

# A 3x3 array of uniformly distributed random values 
# between 0 and 1
e = np.random.random((3,3))

# A 3x3 array of normally distributed random values
# with mean 0 and standard deviation 1
f = np.random.normal(0,1,(3,3))

# A 3x3 identity matrix
g = np.eye(3)


print(g)

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


#### NumPy Array Attributes

In [3]:
# seed used for reproducibility of the data
np.random.seed(0)

# One-dimensional array
x1 = np.random.randint(10, size = 6)

# Two-dimensional array
x2 = np.random.randint(10, size = (3,4))

# Three-dimensional array
x3 = np.random.randint(10, size = (3,4,5))

print("x3 ndim:",x3.ndim)       # Dimension of the array
print("x3 shape:",x3.shape)     # Size of each dimension
print("x3 size:",x3.size)       # Total size of the array

# Obtaing the data type of the array
print("dtype:",x3.dtype)

# Size, in bytes, of each array element
print("itemsize:",x3.itemsize,"bytes")

# Size, in bytes, of the total array
print("nbytes:",x3.nbytes,"bytes")

x3 ndim: 3
x3 shape: (3, 4, 5)
x3 size: 60
dtype: int64
itemsize: 8 bytes
nbytes: 480 bytes


#### Array Indexing

In [10]:
# Negative indices index from the end of the array
print(x1[-1])
print(x1[-2])

# Accesssing multidimensional array using comma-separated
# tuple of indices
print(x2[0,0])
print(x2[2,0])

# Modifying values
x2[0,0] = 12
print(x2[0,0])

9
7
3
1
12


OBS: Numpy arrays and Python lists are different

NumPy arrays have a fixed type

That means all components of the array will be ofthe same datatype

#### Array Slicing

In [17]:
x = np.arange(10)
print(x)

# First five elements
print(x[:5])

# Elements after index 5
print(x[5:])

# Middle subarray
print(x[4:7])

# Elements with step 2
print(x[::2])

# All elements in reverser order
print(x[::-1])

# MULTIDIMENSIONAL SUBARRAYS
print(x2)

# Two rows, three columns
print(x2[:2, :3])

# Reversion
print(x2[::-1, ::-1])

# Arrays rows and columns
print(x2[:,0]) # First column of x2

# Creating copies of arrays
x2_sub_copy = x2[:2, :2].copy()
print(x2_sub_copy)

# Modifying the subarray will not change the original array:
x2_sub_copy[0, 0] = 42
print(x2_sub_copy)

[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4]
[5 6 7 8 9]
[4 5 6]
[0 2 4 6 8]
[9 8 7 6 5 4 3 2 1 0]
[[12  5  2  4]
 [ 7  6  8  8]
 [ 1  6  7  7]]
[[12  5  2]
 [ 7  6  8]]
[[ 7  7  6  1]
 [ 8  8  6  7]
 [ 4  2  5 12]]
[12  7  1]
[[12  5]
 [ 7  6]]
[[42  5]
 [ 7  6]]


#### Reshaping of Arrays

In [4]:
grid = np.arange(1, 10).reshape(3,3)
print(grid)

x = np.array([1,2,3])

# Row vector via reshape
print(x.reshape(1,3))

# Row vector via newaxis
print(x[np.newaxis,:])

[[1 2 3]
 [4 5 6]
 [7 8 9]]
[[1 2 3]]
[[1 2 3]]


#### Array Concatenation and Splitting

In [5]:
from ast import YieldFrom

x = np.array([1,2,3])
y = np.array([3,2,1])
print(np.concatenate([x,y]))

z = [99,99,99]
print(np.concatenate([x,y,z]))

# Two-dimensional array:
grid = np.array([[1,2,3],
                 [4,5,6]])
print(np.concatenate([grid,grid]))

# Concatenate along the second axis
print(np.concatenate([grid,grid], axis=1))

# Stacking of arrays
x = np.array([1,2,3])
y = np.array([[99],
              [99]])

grid = np.array([[9,8,7],
                 [6,5,4]])

# Vertically stack
print(np.vstack([x,grid]))

# Horizontally stack
print(np.hstack([grid,y]))

# Spliting of arrays
x = [1,2,3,99,99,3,2,1]

x1,x2,x3 = np.split(x, [3,5])
print(x1,  x2,x3) 

grid = np.arange(16).reshape((4,4))

# Vertical splitting
upper,lower = np.vsplit(grid,[2])
print(upper)
print(lower)

# Horizontal splitting
left,right = np.hsplit(grid,[2])
print(left)
print(right)

[1 2 3 3 2 1]
[ 1  2  3  3  2  1 99 99 99]
[[1 2 3]
 [4 5 6]
 [1 2 3]
 [4 5 6]]
[[1 2 3 1 2 3]
 [4 5 6 4 5 6]]
[[1 2 3]
 [9 8 7]
 [6 5 4]]
[[ 9  8  7 99]
 [ 6  5  4 99]]
[1 2 3] [99 99] [3 2 1]
[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]
[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


#### UFUNCS

Used to implement vectorization in NumPy

It's a way faster than iterating over elements

#### Vectorization

Converts iterative statements into a vector based operation