# Numpy

In [1]:
import numpy as np
np.version.full_version

'1.19.2'

## Intro

In [2]:
a = np.array([0, 1, 2, 3, 4, 5])
print(f'Array: {a}')
print(f'Dimensions: {a.ndim}')
print(f'Shape: {a.shape}')
print(f'Type: {a.dtype}')

Array: [0 1 2 3 4 5]
Dimensions: 1
Shape: (6,)
Type: int32


## Reshape

In [3]:
b = a.reshape((3, 2))
print(f'Array: {b}')
print(f'Dimensions: {b.ndim}')
print(f'Shape: {b.shape}')

Array: [[0 1]
 [2 3]
 [4 5]]
Dimensions: 2
Shape: (3, 2)


In [4]:
#b & a reference to the same array
b[1][0] = 108
print(f'Array b: {b}')
print()
print(f'Array a: {a}')

Array b: [[  0   1]
 [108   3]
 [  4   5]]

Array a: [  0   1 108   3   4   5]


In [5]:
#Copy method makes a new copy (doesn't reference to the same array)
c = a.reshape((3, 2)).copy()
c[1][0] = 12
print(f'Array b: {c}')
print()
print(f'Array a: {a}')

Array b: [[ 0  1]
 [12  3]
 [ 4  5]]

Array a: [  0   1 108   3   4   5]


## Arithmetic Operations

In [6]:
a*2

array([  0,   2, 216,   6,   8,  10])

In [7]:
a-2

array([ -2,  -1, 106,   1,   2,   3])

## Indexing

In [8]:
a[[2,3,4]]

array([108,   3,   4])

In [9]:
a > 3

array([False, False,  True, False,  True,  True])

In [10]:
a[a>3]

array([108,   4,   5])

In [11]:
a[a>3] = 4
a

array([0, 1, 4, 3, 4, 4])

In [12]:
a.clip(0, 2)

array([0, 1, 2, 2, 2, 2])

## NaN in numpy

In [13]:
c = np.array([1, 2, np.NAN, 3, 4])
np.isnan(c)

array([False, False,  True, False, False])

In [14]:
c[~np.isnan(c)]

array([1., 2., 3., 4.])

## Runtime comparison

In [15]:
import timeit

In [16]:
normal_py_sec = timeit.timeit('sum(x*x for x in range(1000))', number=10000)
naive_np_sec = timeit.timeit('sum(na*na)', setup="import numpy as np; na=np.arange(1000)", number=10000)
good_np_sec = timeit.timeit('na.dot(na)', setup="import numpy as np; na=np.arange(1000)", number=10000)

print(f"Normal Python: {normal_py_sec:.5} sec")
print(f"Naive NumPy: {naive_np_sec:.5} sec")
print(f"Good NumPy: {good_np_sec:.5} sec")

Normal Python: 2.2519 sec
Naive NumPy: 3.6451 sec
Good NumPy: 0.029518 sec


## Reading files

### Tab-separated

In [17]:
#This file is from:
#https://github.com/luispedro/BuildingMachineLearningSystemsWithPython/blob/master/ch01/data/web_traffic.tsv
#Be careful, this method stores the file on your computer
data = np.loadtxt("https://raw.githubusercontent.com/luispedro/BuildingMachineLearningSystemsWithPython/master/ch01/data/web_traffic.tsv", delimiter="\t")
data

array([[1.000e+00, 2.272e+03],
       [2.000e+00,       nan],
       [3.000e+00, 1.386e+03],
       ...,
       [7.410e+02, 5.392e+03],
       [7.420e+02, 5.906e+03],
       [7.430e+02, 4.881e+03]])