# Introduction to Numpy and Pandas

In [1]:
import numpy as np
import pandas as pd

Numpy provides N-Dimensional Arrays

In [7]:
def show_info(a):
    print(a)
    print(f"Shape: {a.shape} NDim: {a.ndim}, DType: {a.dtype}")

oned_array = np.array([1, 2, 3, 4])
show_info(oned_array)

[1 2 3 4]
Shape: (4,) NDim: 1, DType: int32


In [8]:
# use list of lists to build 2D array
twod_array = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
show_info(twod_array)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
Shape: (3, 3) NDim: 2, DType: int32


#### python lists can have multiple types. Each element is reference to an object.

In [10]:
python_2d_list = [[1, 2, 3], [4, "5", 6], [7, 8, 99.73]]
print(normal_2d_list)

[[1, 2, 3], [4, '5', 6], [7, 8, 99.73]]


#### numpy array elements need to have same type

In [11]:
np2d_array = np.array(python_2d_list)
show_info(np2d_array)  # all elements converted to a common type - in this case string

[['1' '2' '3']
 ['4' '5' '6']
 ['7' '8' '99.73']]
Shape: (3, 3) NDim: 2, DType: <U11


#### type converted to float64

In [13]:
show_info(np.array([1, 2, 3, 4.0]))  

[ 1.  2.  3.  4.]
Shape: (4,) NDim: 1, DType: float64


#### use dtype to cast to specific type

In [14]:
show_info(np.array([1, 2, 3, 4.0], dtype=np.int32))  

[1 2 3 4]
Shape: (4,) NDim: 1, DType: int32


#### compatible types converted silently

In [15]:
show_info(np.array([1, 2, "3", "4"], dtype=np.int32)) 

[1 2 3 4]
Shape: (4,) NDim: 1, DType: int32


#### fails if the elements can not convert to requested dtype

In [17]:
show_info(np.array([1, 2, "3.23", "4"], dtype=np.int32)) 

ValueError: invalid literal for int() with base 10: '3.23'

#### see https://docs.python.org/3.6/tutorial/floatingpoint.html for explanation of why you do not get 3.23 in the print output

In [19]:
show_info(np.array([1, 2, "3.23", "4"], dtype=np.float32))   


[ 1.          2.          3.23000002  4.        ]
Shape: (4,) NDim: 1, DType: float32


### ReShaping

In [30]:
# following 3 arrays have 4 integers, with different shapes
a1 = np.array([1, 2, 3, 4])
a3 = np.array([[1, 2, 4, 4]])
a2 = np.array([[1], [2], [3], [4]])

print("\n********** A1: One dimensional Array:")
show_info(a1)
print("\n********** A2: 2 Dimensional Array with 1 row and 4 columns ")
show_info(a2)
print("\n********** A2: 2 Dimensional Array with 4 rows and 1 column")
show_info(a3)


********** A1: One dimensional Array:
[1 2 3 4]
Shape: (4,) NDim: 1, DType: int32

********** A2: 2 Dimensional Array with 1 row and 4 columns 
[[1]
 [2]
 [3]
 [4]]
Shape: (4, 1) NDim: 2, DType: int32

********** A2: 2 Dimensional Array with 4 rows and 1 column
[[1 2 4 4]]
Shape: (1, 4) NDim: 2, DType: int32


#### convert 1d array of 4 elements to 1row by 4 columns 2d array

In [43]:
a2 = a1.reshape((1, 4)) # note the argument is a tuple - hence need two parenthesis
show_info(a2)


[[1 2 3 4]]
Shape: (1, 4) NDim: 2, DType: int32


#### convert 1d array of 4 elements to 4rows by 1 column 2d array

In [44]:
a3 = a1.reshape((4, 1)) 
show_info(a3)

[[1]
 [2]
 [3]
 [4]]
Shape: (4, 1) NDim: 2, DType: int32


#### convert 1d array of 4 elements to 2x2 2dArray

In [45]:
a4 = a1.reshape((2, 2))
show_info(a4)

[[1 2]
 [3 4]]
Shape: (2, 2) NDim: 2, DType: int32


#### numpy re-uses same memory contents where possible.  Modifying the array with any of the  reshaped objects will affect all others.

In [46]:
# modify second element
a1[1] = 100

print(a1)

[  1 100   3   4]


In [47]:
print(a2)

[[  1 100   3   4]]


In [48]:
print(a3)

[[  1]
 [100]
 [  3]
 [  4]]


In [49]:
print(a4)

[[  1 100]
 [  3   4]]


#### Use np.copy to create copies.

In [54]:
a1 = np.array([1, 2, 3, 4])
a2 = a1.reshape((1, 4)).copy()
a3 = a1.reshape((4, 1)).copy()
a4 = a1.reshape((2, 2)).copy()

a1[1] = 100 # will not affect the copies

print('a1:', a1)
print('a2:', a2)
print('a3:', a3)
print('a4:', a4)

a1: [  1 100   3   4]
a2: [[1 2 3 4]]
a3: [[1]
 [2]
 [3]
 [4]]
a4: [[1 2]
 [3 4]]
