# Numpy

NumPy (short for Numerical Python) provides an efficient interface to store and operate on dense data buffers.

In [1]:
import numpy as np

# The NumPy ndarray: A Multidimensional Array Object
- N-dimensional array object, or ndarray, which is a fast, flexible container for large data sets
- Arrays enable you to perform mathematical operations on whole blocks of data using similar syntax
- An ndarray is a generic multidimensional container for homogeneous data
  - all of the elements must be the **same type**,
  - **shape**, a tuple indicating the size of each dimension,
  - **dtype**, an object describing the data type of the array.

## Creating ndarrays
- the ***array*** function accepts any sequence-like object (including other arrays) and produces a new NumPy array containing the passed data


In [2]:
data1 = [1, 3, 5, 7, 9, 11]

In [3]:
data1

[1, 3, 5, 7, 9, 11]

In [4]:
arr1 = np.array(data1)

In [5]:
arr1

array([ 1,  3,  5,  7,  9, 11])

In [6]:
arr1.ndim

1

In [7]:
arr1.shape

(6,)

In [8]:
arr1.dtype

dtype('int64')

In [9]:
arr1.itemsize # the size (in bytes) of each array element

8

In [10]:
arr1.nbytes # the total size (in bytes) of the array

48

In [11]:
data2 = [1, 3.7, 5, 7, 9, 11.4]

In [12]:
data2

[1, 3.7, 5, 7, 9, 11.4]

- data2 contiene numeri interi e numeri decimali, posso creare un Numpy array?

In [13]:
arr2 = np.array(data2)

In [14]:
arr2

array([ 1. ,  3.7,  5. ,  7. ,  9. , 11.4])

In [15]:
for x in arr2:
  print(x)

1.0
3.7
5.0
7.0
9.0
11.4


In [16]:
arr2[0]

np.float64(1.0)

- Con due dimensioni?

In [17]:
data_2 = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [18]:
type(data_2)

list

In [19]:
data_2[1][0]

4

In [20]:
data_2

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [21]:
arr_2 = np.array(data_2)

In [22]:
arr_2

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [23]:
arr_2.ndim

2

In [24]:
arr_2.shape

(3, 3)

- Other techniques for initializing ndarrays
  - ones
  - zeros
  - empty
  - eye

In [25]:
np.ones(16)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [26]:
np.ones((16, 2))

array([[1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.]])

In [27]:
np.empty((3, 4))

array([[6.62098696e-310, 0.00000000e+000, 2.10077583e-312,
        6.79038654e-313],
       [2.22809558e-312, 2.14321575e-312, 2.35541533e-312,
        6.79038654e-313],
       [2.22809558e-312, 2.14321575e-312, 2.46151512e-312,
        2.41907520e-312]])

In [28]:
np.zeros((4, 4, 4))

array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]])

In [29]:
np.eye(44)

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

- When constructing an array, you can specify the data type using a string

In [30]:
np.ones(10, dtype='float32')  # Default is numpy.float64

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=float32)

In [31]:
# random.randint(low, high=None, size=None, dtype=int)

a1 = np.random.randint(10, size=10)  #one-dimensional array
a2 = np.random.randint(10, size=(10, 4)) # two-dimensional array
a3 = np.random.randint(10, size=(10, 3, 3)) # three-dimensional array

## Basic array manipulations

#### Indexing and slicing arrays
- Getting and setting the value of individual array elements
- Getting and setting smaller subarrays within a larger array

In [32]:
# numpy.arange([start, ]stop, [step, ]dtype=None, *, like=None)

arr = np.arange(0,10,1)

In [33]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [34]:
arr[5]

np.int64(5)

In [35]:
arr[:]

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [36]:
arr[2:5]

array([2, 3, 4])

In [37]:
arr[2:5] = 111 # data is not copied, and any modifications to the view will be reflected in the source array

In [38]:
arr

array([  0,   1, 111, 111, 111,   5,   6,   7,   8,   9])

In [39]:
array2D = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [40]:
array2D

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [41]:
array2D[2]

array([7, 8, 9])

In [42]:
array2D[1,1]

np.int64(5)

In [43]:
array2D[1,0]

np.int64(4)

In [44]:
array2D[1][0] = 44

In [45]:
array2D

array([[ 1,  2,  3],
       [44,  5,  6],
       [ 7,  8,  9]])

In [46]:
array2D[:, 0:1]

array([[ 1],
       [44],
       [ 7]])

In [47]:
array2D[2:,1:]

array([[8, 9]])

In [48]:
array2D[:,1:]

array([[2, 3],
       [5, 6],
       [8, 9]])

#### Boolean indexing

In [49]:
data = np.random.randn(7,5)

In [50]:
data

array([[ 1.14837225, -0.19008395, -0.43357159, -2.37554966,  0.61669614],
       [ 0.47459655, -1.17561811, -0.48469971,  0.12946714, -1.68749917],
       [-1.38288863, -0.81274748, -0.23186602,  0.72999437, -0.55567594],
       [ 0.66753279,  0.0247566 , -0.0706621 , -0.82062777, -1.44454058],
       [ 0.93490484, -1.85856722,  1.12270412,  0.50683035, -2.15286541],
       [ 1.1968452 ,  0.31565987,  2.05786279, -1.37175595,  0.31641224],
       [-1.10604307,  0.24801555, -2.42545942,  0.2192103 ,  0.65334011]])

In [51]:
data < 0

array([[False,  True,  True,  True, False],
       [False,  True,  True, False,  True],
       [ True,  True,  True, False,  True],
       [False, False,  True,  True,  True],
       [False,  True, False, False,  True],
       [False, False, False,  True, False],
       [ True, False,  True, False, False]])

In [52]:
data[data<0]=0

In [53]:
data

array([[1.14837225, 0.        , 0.        , 0.        , 0.61669614],
       [0.47459655, 0.        , 0.        , 0.12946714, 0.        ],
       [0.        , 0.        , 0.        , 0.72999437, 0.        ],
       [0.66753279, 0.0247566 , 0.        , 0.        , 0.        ],
       [0.93490484, 0.        , 1.12270412, 0.50683035, 0.        ],
       [1.1968452 , 0.31565987, 2.05786279, 0.        , 0.31641224],
       [0.        , 0.24801555, 0.        , 0.2192103 , 0.65334011]])

In [54]:
array2D

array([[ 1,  2,  3],
       [44,  5,  6],
       [ 7,  8,  9]])

In [55]:
array2D == 5

array([[False, False, False],
       [False,  True, False],
       [False, False, False]])

In [56]:
array2D[array2D == 5]=0

In [57]:
array2D

array([[ 1,  2,  3],
       [44,  0,  6],
       [ 7,  8,  9]])

In [58]:
(array2D == 3) | (array2D == 8)

array([[False, False,  True],
       [False, False, False],
       [False,  True, False]])

#### Fancy Indexing
- Fancy indexing is a term adopted by NumPy to describe indexing using integer arrays.
- To select out a subset of the rows in a particular order, you can simply pass a list or ndarray of integers specifying the desired order

In [59]:
arr = np.empty((10, 6))

In [60]:
for i in range(10):
    arr[i] = i

In [61]:
arr

array([[0., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 1.],
       [2., 2., 2., 2., 2., 2.],
       [3., 3., 3., 3., 3., 3.],
       [4., 4., 4., 4., 4., 4.],
       [5., 5., 5., 5., 5., 5.],
       [6., 6., 6., 6., 6., 6.],
       [7., 7., 7., 7., 7., 7.],
       [8., 8., 8., 8., 8., 8.],
       [9., 9., 9., 9., 9., 9.]])

In [62]:
arr[[2,1,4]]

array([[2., 2., 2., 2., 2., 2.],
       [1., 1., 1., 1., 1., 1.],
       [4., 4., 4., 4., 4., 4.]])

In [63]:
arr[np.array([1,5,4,3,6,6,7])]

array([[1., 1., 1., 1., 1., 1.],
       [5., 5., 5., 5., 5., 5.],
       [4., 4., 4., 4., 4., 4.],
       [3., 3., 3., 3., 3., 3.],
       [6., 6., 6., 6., 6., 6.],
       [6., 6., 6., 6., 6., 6.],
       [7., 7., 7., 7., 7., 7.]])

#### Reshaping of arrays
- Changing the shape of a given array


In [64]:
arr.shape

(10, 6)

In [65]:
arr

array([[0., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 1.],
       [2., 2., 2., 2., 2., 2.],
       [3., 3., 3., 3., 3., 3.],
       [4., 4., 4., 4., 4., 4.],
       [5., 5., 5., 5., 5., 5.],
       [6., 6., 6., 6., 6., 6.],
       [7., 7., 7., 7., 7., 7.],
       [8., 8., 8., 8., 8., 8.],
       [9., 9., 9., 9., 9., 9.]])

In [66]:
arr.reshape((5,12))

array([[0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1.],
       [2., 2., 2., 2., 2., 2., 3., 3., 3., 3., 3., 3.],
       [4., 4., 4., 4., 4., 4., 5., 5., 5., 5., 5., 5.],
       [6., 6., 6., 6., 6., 6., 7., 7., 7., 7., 7., 7.],
       [8., 8., 8., 8., 8., 8., 9., 9., 9., 9., 9., 9.]])

In [67]:
arr.reshape((-1,15))

array([[0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 2., 2., 2.],
       [2., 2., 2., 3., 3., 3., 3., 3., 3., 4., 4., 4., 4., 4., 4.],
       [5., 5., 5., 5., 5., 5., 6., 6., 6., 6., 6., 6., 7., 7., 7.],
       [7., 7., 7., 8., 8., 8., 8., 8., 8., 9., 9., 9., 9., 9., 9.]])

In [68]:
arr.reshape((3,2,10))

array([[[0., 0., 0., 0., 0., 0., 1., 1., 1., 1.],
        [1., 1., 2., 2., 2., 2., 2., 2., 3., 3.]],

       [[3., 3., 3., 3., 4., 4., 4., 4., 4., 4.],
        [5., 5., 5., 5., 5., 5., 6., 6., 6., 6.]],

       [[6., 6., 7., 7., 7., 7., 7., 7., 8., 8.],
        [8., 8., 8., 8., 9., 9., 9., 9., 9., 9.]]])

#### Change the data type of an array.

In [69]:
x = np.array([[2, 4, 6], [6, 8, 10]], np.int32)

In [70]:
y= x.astype(float)

In [71]:
y

array([[ 2.,  4.,  6.],
       [ 6.,  8., 10.]])

#### Joining and splitting of arrays
- Combining multiple arrays into one, and splitting one array into many
  - np.concatenate takes a tuple or list of arrays as its first argument

In [72]:
x = np.array([1,2,3,4,5])
y = np.array([6,7,8,9,10])

In [73]:
np.concatenate([x,y])

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [74]:
np.concatenate([x,y],axis=1)    # The axis along which the arrays will be joined. If axis is None, arrays are flattened before use. Default is 0.

AxisError: axis 1 is out of bounds for array of dimension 1

In [None]:
xR=x.reshape(1,-1)
yR=y.reshape(1,-1)

In [None]:
xR

In [None]:
xR.ndim

In [None]:
xR.shape

In [None]:
np.concatenate([xR,yR])    # default axis=0

In [None]:
np.concatenate([xR,yR], axis=1)

- For working with arrays of mixed dimensions, it can be clearer to use the np.vstack (vertical stack) and np.hstack (horizontal stack) functions


In [76]:
x = np.array([1,2,3])
y = np.array([[4,5,6],[7,8,9]])

In [77]:
x

array([1, 2, 3])

In [78]:
y

array([[4, 5, 6],
       [7, 8, 9]])

In [79]:
np.vstack([x,y])

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [80]:
np.hstack([y,y])

array([[4, 5, 6, 4, 5, 6],
       [7, 8, 9, 7, 8, 9]])

- The opposite of concatenation is splitting, which is implemented by the functions np.split


In [None]:
z, k = np.split(y,[1])

In [None]:
z

In [None]:
k

## Computation on NumPy Arrays



- Any arithmetic operations between **equal-size arrays** applies the operation **element-wise**
- Arithmetic operations with scalars are propagating the value to each element

In [None]:
arr1 = np.random.randint(10,size= (10,10))
arr2 = np.random.randint(10,size= (10,10))

In [None]:
arr1

In [None]:
arr1 * 125

In [None]:
arr1 + arr2    # arr1 and arr2 have the same shape

In [None]:
arr1 * arr2 - arr1 / ( arr2 + 1)

- Transposing arrays and inner matrix product

In [82]:
arr1

array([ 1,  3,  5,  7,  9, 11])

In [81]:
arr1.T

array([ 1,  3,  5,  7,  9, 11])

In [83]:
np.dot(arr1,arr2)

np.float64(292.5)

### Mathematical and Statistical Methods

- A set of mathematical functions which compute statistics about an entire array or about the data along an axis are accessible as array methods.



Algebric operations

```
+	np.add	Addition (e.g., 1 + 1 = 2)
-	np.subtract	Subtraction (e.g., 3 - 2 = 1)
-	np.negative	Unary negation (e.g., -2)
*	np.multiply	Multiplication (e.g., 2 * 3 = 6)
/	np.divide	Division (e.g., 3 / 2 = 1.5)
//	np.floor_divide	Floor division (e.g., 3 // 2 = 1)
**	np.power	Exponentiation (e.g., 2 ** 3 = 8)
%	np.mod	Modulus/remainder (e.g., 9 % 4 = 1)
```

Trigonometric functions:

```
sin, cos, tan	compute sine, cosine and tangent of angles
arcsin, arccos, arctan	calculate inverse sine, cosine and tangent
hypot	calculate hypotenuse of given right triangle
sinh, cosh, tanh	compute hyperbolic sine, cosine and tangent
arcsinh, arccosh, arctanh	compute inverse hyperbolic sine, cosine and tangent
deg2rad	convert degree into radians
rad2deg	convert radians into degree
```

Statistical functions:

```
amin, amax	returns minimum or maximum of an array or along an axis
ptp	returns range of values (maximum-minimum) of an array or along an axis
percentile(a, p, axis)	calculate pth percentile of array or along specified axis
median	compute median of data along specified axis
mean	compute mean of data along specified axis
std	compute standard deviation of data along specified axis
var	compute variance of data along specified axis
average	compute average of data along specified axis
```

In [None]:
arr1

In [None]:
np.median(arr1)

In [None]:
np.add(arr1,arr2)    # subtract, multiply, divide

In [None]:
arr1+arr2

- **np.where** returns elements depending on condition

In [84]:
a = np.arange(1,11)

In [85]:
a

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [86]:
np.where(a>5,0,10)

array([10, 10, 10, 10, 10,  0,  0,  0,  0,  0])

In [None]:
b = np.arange(10,20)

In [None]:
b

In [None]:
np.where(a%2==1,a,b)

## Boolean Arrays

- Boolean values are coerced to 1 (True) and 0 (False) in the above methods.
- Sum is often used as a means of counting True values in a boolean array.


In [None]:
bools = np.array([True,True,False,False,True])

In [None]:
bools.sum()

In [None]:
bools.any()

In [None]:
bools.all()

In [None]:
bools[:2].all()

## Sorting
- Like Pythonâ€™s built-in list type, NumPy arrays can be sorted in-place using the sort method

In [88]:
arrsort = np.random.randn(4,3)

In [89]:
arrsort

array([[-0.36241331, -2.40147292, -0.44161685],
       [ 0.08291757,  1.39162051, -0.81747857],
       [-1.00695865,  1.00535086,  0.62165588],
       [-0.35740841, -0.07467656,  0.07140298]])

In [90]:
arrsort.sort(0)    # righe

In [91]:
arrsort

array([[-1.00695865, -2.40147292, -0.81747857],
       [-0.36241331, -0.07467656, -0.44161685],
       [-0.35740841,  1.00535086,  0.07140298],
       [ 0.08291757,  1.39162051,  0.62165588]])

In [None]:
arrsort.sort(1)    # colonne

In [None]:
arrsort

## Unique and Other Set Logic

- NumPy has some basic set operations for one-dimensional ndarrays.

In [None]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])

In [None]:
np.unique(names)