# Numpy

In [26]:
import numpy as np
np.version.full_version

'1.19.5'

## Intro

In [47]:
a = np.array([0, 1, 2, 3, 4, 5])
print(f'Array: {a}')
print(f'Dimensions: {a.ndim}')
print(f'Shape: {a.shape}')
print(f'Type: {a.dtype}')

Array: [0 1 2 3 4 5]
Dimensions: 1
Shape: (6,)
Type: int64


## Reshape

In [28]:
b = a.reshape((3, 2))
print(f'Array: {b}')
print(f'Dimensions: {b.ndim}')
print(f'Shape: {b.shape}')

Array: [[0 1]
 [2 3]
 [4 5]]
Dimensions: 2
Shape: (3, 2)


In [29]:
#b & a reference to the same array
b[1][0] = 108
print(f'Array b: {b}')
print()
print(f'Array a: {a}')

Array b: [[  0   1]
 [108   3]
 [  4   5]]

Array a: [  0   1 108   3   4   5]


In [30]:
#Copy method makes a new copy (doesn't reference to the same array)
c = a.reshape((3, 2)).copy()
c[1][0] = 12
print(f'Array b: {c}')
print()
print(f'Array a: {a}')

Array b: [[ 0  1]
 [12  3]
 [ 4  5]]

Array a: [  0   1 108   3   4   5]


## Arithmetic Operations

In [31]:
a*2

array([  0,   2, 216,   6,   8,  10])

In [32]:
a-2

array([ -2,  -1, 106,   1,   2,   3])

## Indexing

In [33]:
a[[2,3,4]]

array([108,   3,   4])

In [34]:
a > 3

array([False, False,  True, False,  True,  True])

In [35]:
a[a>3]

array([108,   4,   5])

In [36]:
a[a>3] = 4
a

array([0, 1, 4, 3, 4, 4])

In [37]:
a.clip(0, 2)

array([0, 1, 2, 2, 2, 2])

## NaN in numpy

In [38]:
c = np.array([1, 2, np.NAN, 3, 4])
np.isnan(c)

array([False, False,  True, False, False])

In [39]:
c[~np.isnan(c)]

array([1., 2., 3., 4.])

## Runtime comparison

In [40]:
import timeit

In [46]:
normal_py_sec = timeit.timeit('sum(x*x for x in range(1000))', number=10000)
naive_np_sec = timeit.timeit('sum(na*na)', setup="import numpy as np; na=np.arange(1000)", number=10000)
good_np_sec = timeit.timeit('na.dot(na)', setup="import numpy as np; na=np.arange(1000)", number=10000)

print(f"Normal Python: {normal_py_sec:.5} sec")
print(f"Naive NumPy: {naive_np_sec:.5} sec")
print(f"Good NumPy: {good_np_sec:.5} sec")

Normal Python: 1.0422 sec
Naive NumPy: 2.0941 sec
Good NumPy: 0.01734 sec


## Reading files

### Tab-separated

In [75]:
data = np.genfromtxt("intro_numpy_scipy_1.tsv", delimiter="\t")
data

array([[1.000e+00, 2.272e+03],
       [2.000e+00,       nan],
       [3.000e+00, 1.386e+03],
       ...,
       [7.410e+02, 5.392e+03],
       [7.420e+02, 5.906e+03],
       [7.430e+02, 4.881e+03]])

### Comma-separated

In [76]:
data = np.genfromtxt("intro_numpy_scipy_1.csv", delimiter=",")
data

array([[1.000e+00, 2.272e+03],
       [2.000e+00,       nan],
       [3.000e+00, 1.386e+03],
       ...,
       [7.410e+02, 5.392e+03],
       [7.420e+02, 5.906e+03],
       [7.430e+02, 4.881e+03]])