### Numpy Primer
----------------------

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
%matplotlib inline
import random

### Numpy data manipulation

In [2]:
# Outcome of data manipulation of a normal Python array
l = [ 0. ,  0.5,  1.5,  3. ,  5. ]
2 * l

[0.0, 0.5, 1.5, 3.0, 5.0, 0.0, 0.5, 1.5, 3.0, 5.0]

In [3]:
# Outcome of data manipulation of Numpy array
l = np.array(l)
print (l * 2)
print (l ** 2)

[ 0.  1.  3.  6. 10.]
[ 0.    0.25  2.25  9.   25.  ]


In [4]:
g = np.linspace(5, 10, 15)
print (g)
print ('Parameters: Start, Stop, Number of data points between start and stop point')

[ 5.          5.35714286  5.71428571  6.07142857  6.42857143  6.78571429
  7.14285714  7.5         7.85714286  8.21428571  8.57142857  8.92857143
  9.28571429  9.64285714 10.        ]
Parameters: Start, Stop, Number of data points between start and stop point


### Vectorization of Code

Two dummy data sets.

In [5]:
r = np.random.standard_normal((4,3))
s = np.random.standard_normal((4,3))
print(r)
print('-'*40)
print(s)

[[ 0.5979909   0.83204476  0.46153519]
 [-0.90833755 -2.15974164 -0.24700226]
 [-0.48750506 -2.7975926   0.76752032]
 [-1.2314901  -1.04516582 -1.23548797]]
----------------------------------------
[[ 0.2568726   0.25959953  1.15572724]
 [-0.18204032  0.47165585 -1.77063832]
 [ 0.20752485  0.13689968  0.65910189]
 [-0.81238372 -0.08919235  0.21489045]]


Element-wise addition.

In [6]:
r + s

array([[ 0.85486349,  1.09164428,  1.61726243],
       [-1.09037787, -1.68808579, -2.01764059],
       [-0.27998021, -2.66069293,  1.42662221],
       [-2.04387382, -1.13435816, -1.02059752]])

Broadcasting

In [7]:
2 * r + 3

array([[ 4.1959818 ,  4.66408951,  3.92307037],
       [ 1.1833249 , -1.31948327,  2.50599547],
       [ 2.02498988, -2.59518521,  4.53504064],
       [ 0.53701981,  0.90966837,  0.52902407]])

In [8]:
t = np.random.standard_normal(3)
w = np.random.standard_normal(3)

In [9]:
t.transpose() + w

array([-0.08350541,  2.10609778, -0.30864684])

In [10]:
np.shape(t.T)

(3,)

### Basic Algebra

In [11]:
a = np.array([0, 0.5, 1.0, 1.5, 2.0])
type(a)

numpy.ndarray

In [12]:
a.sum()

5.0

In [13]:
a.std()

0.7071067811865476

In [14]:
a.std()

0.7071067811865476

In [15]:
a.cumsum()

array([0. , 0.5, 1.5, 3. , 5. ])

In [16]:
np.sqrt(a)

array([0.        , 0.70710678, 1.        , 1.22474487, 1.41421356])

In [17]:
a = np.array([1, 2, 3])
b = np.array([2, 3, 4])

In [18]:
a * b # element wise product

array([ 2,  6, 12])

In [19]:
np.dot(a,b) # Matrix multiplication

20

In [20]:
np.inner(a,b) # Matrix similar

20

In [21]:
o = np.outer(a,b) # It multiplies each element of vector b by each element of vector a
o

array([[ 2,  3,  4],
       [ 4,  6,  8],
       [ 6,  9, 12]])

In [22]:
np.linalg.matrix_power(o, 2)  # matrix power o ** 2

array([[ 40,  60,  80],
       [ 80, 120, 160],
       [120, 180, 240]])

In [23]:
np.linalg.eigvals(o)  # eigenvalues

array([0.00000000e+00, 2.00000000e+01, 3.33066907e-16])

In [24]:
np.linalg.eig(o)  # eigenvalues + right eigenvectors

(array([0.00000000e+00, 2.00000000e+01, 3.33066907e-16]),
 array([[-0.92827912,  0.26726124, -0.56069064],
        [ 0.20628425,  0.53452248, -0.50309305],
        [ 0.30942637,  0.80178373,  0.65766511]]))

In [25]:
np.linalg.norm(o, ord=1)  # norm of order 1

24.0

In [26]:
np.linalg.norm(o, ord=2)  # norm of order 2 (default)

20.149441679609886

In [27]:
np.linalg.norm(o, axis=0)  # along first axis

array([ 7.48331477, 11.22497216, 14.96662955])

In [28]:
np.linalg.norm(o, axis=1)  # along second axis

array([ 5.38516481, 10.77032961, 16.15549442])

In [29]:
arr11 = np.random.randn(5,4)
arr11

array([[-2.43416813e-01,  4.53331821e-02,  2.05487377e+00,
         8.33511241e-01],
       [-8.14522156e-02,  6.01056823e-01,  7.69512171e-01,
         1.44283972e+00],
       [ 1.62831652e+00,  1.48721328e+00,  5.05180762e-01,
        -1.66563997e-01],
       [ 1.40810211e+00,  1.32007259e+00, -5.71858473e-01,
        -1.27861303e-01],
       [-1.38204903e-03, -6.04300113e-01, -8.72467848e-01,
        -5.90855896e-01]])

In [30]:
print (arr11.mean()) # option 1
print (np.mean(arr11)) #option 2

0.44179267214231854
0.44179267214231854


In [31]:
print (arr11.sum() )
print (np.sum(arr11))

8.835853442846371
8.835853442846371


In [32]:
# ROWS Mean
arr11.mean(axis = 1)

array([ 0.67257534,  0.68298912,  0.86353664,  0.50711373, -0.51725148])

In [33]:
# COLUMNS Mean
arr11.mean(axis = 0)

array([0.54203351, 0.56987515, 0.37704808, 0.27821395])

In [34]:
arr12 = np.array([[1,2,3], [4,5,6], [7,8,9]])
arr12

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [35]:
print ('Cumulative Sum by column')
print (arr12.cumsum(0))
print ('Cumulative Sum by row')
print (arr12.cumsum(1))

Cumulative Sum by column
[[ 1  2  3]
 [ 5  7  9]
 [12 15 18]]
Cumulative Sum by row
[[ 1  3  6]
 [ 4  9 15]
 [ 7 15 24]]


In [36]:
print ('Cumulative Product by column')
print (arr12.cumprod(0))
print ('Cumulative Product by row')
print (arr12.cumprod(1))

Cumulative Product by column
[[  1   2   3]
 [  4  10  18]
 [ 28  80 162]]
Cumulative Product by row
[[  1   2   6]
 [  4  20 120]
 [  7  56 504]]


### Most common methods:
sum,
mean,
std, var,
min,max,
argmin, argmax,
cumsum,
cumprod,
exp,
sqrt

### Arrays comparison

In [37]:
arrA = np.random.randn(8)
arrB = np.random.randn(8)

In [38]:
np.maximum(arrA, arrB)

array([0.62460706, 0.1510896 , 0.08343006, 2.67569416, 0.74730451,
       1.56915544, 0.26059201, 2.15137725])

In [39]:
arrA

array([ 0.62460706, -0.9144302 , -1.13929151,  2.67569416,  0.74730451,
        0.00724531,  0.26059201,  1.61982831])

In [40]:
# Integer & Decimal components of a number

np.modf(arrA) # it breaks the number into the decimals and the integer components. Creates 2 different arrays

(array([ 0.62460706, -0.9144302 , -0.13929151,  0.67569416,  0.74730451,
         0.00724531,  0.26059201,  0.61982831]),
 array([ 0., -0., -1.,  2.,  0.,  0.,  0.,  1.]))

### How to create vectors and meshgrid

In [41]:
points = np.arange(-1, 1, 0.01) # it creates a 1D vector

In [42]:
xs,ys = np.meshgrid(points, points) # it creates a 2D vector combining the 1D


In [43]:
xs

array([[-1.  , -0.99, -0.98, ...,  0.97,  0.98,  0.99],
       [-1.  , -0.99, -0.98, ...,  0.97,  0.98,  0.99],
       [-1.  , -0.99, -0.98, ...,  0.97,  0.98,  0.99],
       ...,
       [-1.  , -0.99, -0.98, ...,  0.97,  0.98,  0.99],
       [-1.  , -0.99, -0.98, ...,  0.97,  0.98,  0.99],
       [-1.  , -0.99, -0.98, ...,  0.97,  0.98,  0.99]])

In [44]:
ys

array([[-1.  , -1.  , -1.  , ..., -1.  , -1.  , -1.  ],
       [-0.99, -0.99, -0.99, ..., -0.99, -0.99, -0.99],
       [-0.98, -0.98, -0.98, ..., -0.98, -0.98, -0.98],
       ...,
       [ 0.97,  0.97,  0.97, ...,  0.97,  0.97,  0.97],
       [ 0.98,  0.98,  0.98, ...,  0.98,  0.98,  0.98],
       [ 0.99,  0.99,  0.99, ...,  0.99,  0.99,  0.99]])

In [45]:
arr13 = np.random.randn(100)
(arr13 > 0).sum() # It only sums those values greater than 0

50

In [46]:
bools = np.array([False, False,True, False])

In [47]:
bools.any() # True if at least one True value in the array

True

In [48]:
bools.all() # True only all values are true in the array

False

In [49]:
arr14 = np.random.rand(8)
arr14.sort()
arr14

array([0.04541872, 0.15608938, 0.32671533, 0.40351621, 0.41158445,
       0.59683129, 0.63699628, 0.80360575])

In [50]:
arr15 = np.random.randn(5,3)

In [51]:
print ('Sort by rows')
arr15.sort(1)
print (arr15)
print ('Sort by columns')
arr15.sort(0)
print (arr15)

Sort by rows
[[-0.10615321  0.18483633  2.07404745]
 [-0.67865569 -0.46895644  1.76326969]
 [-1.0835082  -0.31881785  1.28343443]
 [-2.6386929  -1.29582922 -1.04588391]
 [ 0.00525276  0.59364935  1.16251656]]
Sort by columns
[[-2.6386929  -1.29582922 -1.04588391]
 [-1.0835082  -0.46895644  1.16251656]
 [-0.67865569 -0.31881785  1.28343443]
 [-0.10615321  0.18483633  1.76326969]
 [ 0.00525276  0.59364935  2.07404745]]


In [52]:
# quick way to calculate the pencentile 5
large_arr = np.random.randn(1000)
large_arr.sort()
print (large_arr[int(0.05 * len(large_arr))] )
print (large_arr[50])

-1.5018008559303302
-1.5018008559303302
