# Introduction to Numpy and Pandas

In [1]:
import numpy as np
import pandas as pd

Numpy provides N-Dimensional Arrays

In [3]:
def show_info(a):
    print(a)
    print(f"Shape: {a.shape} NDim: {a.ndim}, DType: {a.dtype}")

oned_array = np.array([1, 2, 3, 4])
show_info(oned_array)

[1 2 3 4]
Shape: (4,) NDim: 1, DType: int32


In [3]:
# use list of lists to build 2D array
twod_array = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
show_info(twod_array)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
Shape: (3, 3) NDim: 2, DType: int32


#### python lists can have multiple types. Each element is reference to an object.

In [4]:
python_2d_list = [[1, 2, 3], [4, "5", 6], [7, 8, 99.73]]
print(python_2d_list)

[[1, 2, 3], [4, '5', 6], [7, 8, 99.73]]


#### numpy array elements need to have same type

In [5]:
np2d_array = np.array(python_2d_list)
show_info(np2d_array)  # all elements converted to a common type - in this case string

[['1' '2' '3']
 ['4' '5' '6']
 ['7' '8' '99.73']]
Shape: (3, 3) NDim: 2, DType: <U11


#### type converted to float64

In [6]:
show_info(np.array([1, 2, 3, 4.0]))  

[ 1.  2.  3.  4.]
Shape: (4,) NDim: 1, DType: float64


#### use dtype to cast to specific type

In [4]:
show_info(np.array([1, 2, 3, 4.0], dtype=np.int32))

[1 2 3 4]
Shape: (4,) NDim: 1, DType: int32


#### compatible types converted silently

In [5]:
show_info(np.array([1, 2, "3", "4"], dtype=np.int32)) 

[1 2 3 4]
Shape: (4,) NDim: 1, DType: int32


#### fails if the elements can not convert to requested dtype

In [10]:
show_info(np.array([1, 2, "3.0", "4"], dtype=np.int32)) 

ValueError: invalid literal for int() with base 10: '3.0'

#### see https://docs.python.org/3.6/tutorial/floatingpoint.html for explanation of why you do not get 3.23 in the print output

In [12]:
show_info(np.array([1, 2, "3.23", "4"], dtype=np.float32))   

[ 1.          2.          3.23000002  4.        ]
Shape: (4,) NDim: 1, DType: float32


### ReShaping

In [15]:
# following 3 arrays have 4 integers, with different shapes
a1 = np.array([1, 2, 3, 4])
a2 = np.array([[1, 2, 3, 4]])
a3 = np.array([[1], [2], [3], [4]])

print("\n********** A1: One dimensional Array:")
show_info(a1)
print("\n********** A2: 2 Dimensional Array with 1 row and 4 columns ")
show_info(a2)
print("\n********** A3: 2 Dimensional Array with 4 rows and 1 column")
show_info(a3)


********** A1: One dimensional Array:
[1 2 3 4]
Shape: (4,) NDim: 1, DType: int32

********** A2: 2 Dimensional Array with 1 row and 4 columns 
[[1 2 3 4]]
Shape: (1, 4) NDim: 2, DType: int32

********** A3: 2 Dimensional Array with 4 rows and 1 column
[[1]
 [2]
 [3]
 [4]]
Shape: (4, 1) NDim: 2, DType: int32


#### convert 1d array of 4 elements to 1row by 4 columns 2d array

In [16]:
a2 = a1.reshape((1, 4)) # note the argument is a tuple - hence need two parenthesis
show_info(a2)


[[1 2 3 4]]
Shape: (1, 4) NDim: 2, DType: int32


#### convert 1d array of 4 elements to 4rows by 1 column 2d array

In [17]:
a3 = a1.reshape((4, 1)) 
show_info(a3)

[[1]
 [2]
 [3]
 [4]]
Shape: (4, 1) NDim: 2, DType: int32


#### convert 1d array of 4 elements to 2x2 2dArray

In [18]:
a4 = a1.reshape((2, 2))
show_info(a4)

[[1 2]
 [3 4]]
Shape: (2, 2) NDim: 2, DType: int32


#### np.newaxis adds another dimention of size 1

In [19]:
a = np.array([1, 2, 3, 4])
show_info(a[:, np.newaxis])

[[1]
 [2]
 [3]
 [4]]
Shape: (4, 1) NDim: 2, DType: int32


In [20]:
a = np.array([1, 2, 3, 4])
show_info(a[np.newaxis, :])

[[1 2 3 4]]
Shape: (1, 4) NDim: 2, DType: int32


#### numpy re-uses same memory contents where possible.  Modifying the array with any of the  reshaped objects will affect all others.

In [21]:
# modify second element
a1 = np.array([1, 2, 3, 4])
a2 = a1.reshape((4, 1))
a3 = a1.reshape((1, 4))
a4 = a1.reshape((2, 2))


a1[1] = 100

print(a1)

[  1 100   3   4]


In [28]:
a1, a2, a3, a4

(array([  1, 100,   3,  -1]), array([[  1],
        [100],
        [  3],
        [ -1]]), array([[  1, 100,   3,  -1]]), array([[  1, 100],
        [  3,  -1]]))

In [25]:
a4[1,1] = -1

#### Use np.copy to create copies.

In [29]:
a1 = np.array([1, 2, 3, 4])
a2 = a1.reshape((1, 4)).copy()
a3 = a1.reshape((4, 1)).copy()
a4 = a1.reshape((2, 2)).copy()

a1[1] = 100 # will not affect the copies

print('a1:', a1)
print('a2:', a2)
print('a3:', a3)
print('a4:', a4)

a1: [  1 100   3   4]
a2: [[1 2 3 4]]
a3: [[1]
 [2]
 [3]
 [4]]
a4: [[1 2]
 [3 4]]


#### Array creation

In [30]:
# np.arange() - similar to python range()
np.arange(6, dtype=np.int16)

array([0, 1, 2, 3, 4, 5], dtype=int16)

In [32]:
show_info(np.zeros(5))
show_info(np.zeros((4,5), dtype=np.int32))
show_info(np.zeros(20, dtype=np.int32).reshape((4,5)))

[ 0.  0.  0.  0.  0.]
Shape: (5,) NDim: 1, DType: float64
[[0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
Shape: (4, 5) NDim: 2, DType: int32
[[0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
Shape: (4, 5) NDim: 2, DType: int32


In [34]:
show_info(np.ones(5))
show_info(np.ones((4,5), dtype=np.int16))

[ 1.  1.  1.  1.  1.]
Shape: (5,) NDim: 1, DType: float64
[[1 1 1 1 1]
 [1 1 1 1 1]
 [1 1 1 1 1]
 [1 1 1 1 1]]
Shape: (4, 5) NDim: 2, DType: int16


In [43]:
# data is truncated to fit
a = np.zeros(10, dtype=np.int8)
show_info(a)
a[0] = 300
show_info(a)

[0 0 0 0 0 0 0 0 0 0]
Shape: (10,) NDim: 1, DType: int8
[44  0  0  0  0  0  0  0  0  0]
Shape: (10,) NDim: 1, DType: int8


In [44]:
show_info(np.identity(4, dtype=np.int32))

[[1 0 0 0]
 [0 1 0 0]
 [0 0 1 0]
 [0 0 0 1]]
Shape: (4, 4) NDim: 2, DType: int32


#### Transpose

In [45]:
a = np.array(range(12)).reshape((3,4))
show_info(a)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
Shape: (3, 4) NDim: 2, DType: int32


In [48]:
aT = a.transpose()
show_info(aT)

show_info(a.reshape(4, 3))

[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]
Shape: (4, 3) NDim: 2, DType: int32
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]
Shape: (4, 3) NDim: 2, DType: int32


In [49]:
# Transpose reuses the memory
a[1][1] = 100
print(a)
print(aT)

[[  0   1   2   3]
 [  4 100   6   7]
 [  8   9  10  11]]
[[  0   4   8]
 [  1 100   9]
 [  2   6  10]
 [  3   7  11]]


#### Concatenate

In [51]:
a = np.array([1, 2, 3])
b = np.array([4, 5])
c = np.array([6, 7, 8])
d = np.concatenate([a, b, c])
show_info(d)

[1 2 3 4 5 6 7 8]
Shape: (8,) NDim: 1, DType: int32


In [53]:
# concatenate will copy the data
a[0] = -1
d

array([1, 2, 3, 4, 5, 6, 7, 8])

In [55]:
# default concatenate along axis 0 (Row Axis)
a = np.arange(12).reshape(3,4)
b = np.arange(12).reshape(3,4) + 100
np.concatenate([a, b])

array([[  0,   1,   2,   3],
       [  4,   5,   6,   7],
       [  8,   9,  10,  11],
       [100, 101, 102, 103],
       [104, 105, 106, 107],
       [108, 109, 110, 111]])

In [56]:
# specify axis for concatenation on different axis.  axis 1 is column axis.
np.concatenate([a, b], axis=1)

array([[  0,   1,   2,   3, 100, 101, 102, 103],
       [  4,   5,   6,   7, 104, 105, 106, 107],
       [  8,   9,  10,  11, 108, 109, 110, 111]])