# Why is it called Python?

When he began implementing [Python](https://docs.python.org/3/faq/general.html), Guido van Rossum was also reading the published scripts from “Monty Python’s Flying Circus”, a BBC comedy series from the 1970s. Van Rossum thought he needed a name that was short, unique, and slightly mysterious, so he decided to call the language Python.

![guido van rossum](https://gvanrossum.github.io/images/guido-headshot-2019.jpg)

# NumPy

Numerical Python
NumPy is the swiss-army knife of Python. It is the preferred method for working with numbers. NumPy are optimized libaries that actually run faster than the built-in Python methods.

Most Python analysis packages prefer NumPy arrays.


In [1]:
import numpy as np

In [2]:
a = np.arange(15).reshape(3, 5) # Creates a 3x5 NumPy array with values in [0, 14]
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

# Difference between range and arange?

The Python function range returns an iterator (a function that generates an output). It is not an actual array from 0 to 14.

The NumPy function arange() returns an actual array.

In [3]:
b = range(15)

In [4]:
b

range(0, 15)

In [5]:
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [6]:
a.shape

(3, 5)

In [7]:
a.shape[1]

5

In [59]:
a.ndim

2

In [8]:
len(a)

3

# Converting regular Python arrays to NumPy arrays

In [9]:
c = [3,4,7,8,10]
print(type(c))

<class 'list'>


In [10]:
d = np.array(c)
print(type(d))

<class 'numpy.ndarray'>


In [11]:
s = np.array(['hello'])
np.char.isalpha(s)

array([ True])

In [12]:
s = np.array(['hello7'])
np.char.isalpha(s)

array([False])

# Special NumPy array generators

In [13]:
zeros = np.zeros((3, 3), dtype=np.int32) # create a 3x3 array of 0's
ones = np.ones((2, 2), dtype=np.float64) # create a 2x2 array of 1's
empty = np.empty((3, 3))

print(zeros, zeros.dtype.name)
print("\n")
print(ones, ones.dtype.name)
print("\n")
print(empty, empty.dtype.name)

[[0 0 0]
 [0 0 0]
 [0 0 0]] int32


[[1. 1.]
 [1. 1.]] float64


[[0.00000000e+000 0.00000000e+000 0.00000000e+000]
 [0.00000000e+000 0.00000000e+000 0.00000000e+000]
 [6.09249078e+247 4.95264277e+223 3.95252517e-322]] float64


In [14]:
a = np.arange(5, 30, 4) # create array from range [5, 30) with step of 4
print(a)

[ 5  9 13 17 21 25 29]


In [15]:
b = np.linspace(0, 1, 100) # create array of 100 values between 0 and 1
print(b)

[0.         0.01010101 0.02020202 0.03030303 0.04040404 0.05050505
 0.06060606 0.07070707 0.08080808 0.09090909 0.1010101  0.11111111
 0.12121212 0.13131313 0.14141414 0.15151515 0.16161616 0.17171717
 0.18181818 0.19191919 0.2020202  0.21212121 0.22222222 0.23232323
 0.24242424 0.25252525 0.26262626 0.27272727 0.28282828 0.29292929
 0.3030303  0.31313131 0.32323232 0.33333333 0.34343434 0.35353535
 0.36363636 0.37373737 0.38383838 0.39393939 0.4040404  0.41414141
 0.42424242 0.43434343 0.44444444 0.45454545 0.46464646 0.47474747
 0.48484848 0.49494949 0.50505051 0.51515152 0.52525253 0.53535354
 0.54545455 0.55555556 0.56565657 0.57575758 0.58585859 0.5959596
 0.60606061 0.61616162 0.62626263 0.63636364 0.64646465 0.65656566
 0.66666667 0.67676768 0.68686869 0.6969697  0.70707071 0.71717172
 0.72727273 0.73737374 0.74747475 0.75757576 0.76767677 0.77777778
 0.78787879 0.7979798  0.80808081 0.81818182 0.82828283 0.83838384
 0.84848485 0.85858586 0.86868687 0.87878788 0.88888889 0.89898

In [16]:
print(np.pi)  # Pi

3.141592653589793


In [17]:
print(np.nan)

nan


In [18]:
print(np.Infinity)

inf


In [19]:
print(np.e)

2.718281828459045


In [20]:
np.datetime64('today') # today's date

numpy.datetime64('2021-09-07')

In [21]:
np.datetime64('now') # timestamp right now 

numpy.datetime64('2021-09-07T21:39:37')

In [22]:
np.arange('2021-09', '2021-10-05', 3, dtype='datetime64[D]')

array(['2021-09-01', '2021-09-04', '2021-09-07', '2021-09-10',
       '2021-09-13', '2021-09-16', '2021-09-19', '2021-09-22',
       '2021-09-25', '2021-09-28', '2021-10-01', '2021-10-04'],
      dtype='datetime64[D]')

# Boolean searches

In [23]:
d = np.array([1.3, 12.5, -7, 15, 0.789, -9.2, 1.3])
d[d <= -2]

array([-7. , -9.2])

In [24]:
d[d == 1.3]

array([1.3, 1.3])

In [25]:
r = [4, 5, -2, 56.6, 23, -300.45, 50.1, -3.9, -901.7]
print(f'The largest value of {np.max(r)} is found at position {np.argmax(r)} in the array.')
print(f'The smallest value of {np.min(r)} is found at position {np.argmin(r)} in the array.')

The largest value of 56.6 is found at position 3 in the array.
The smallest value of -901.7 is found at position 8 in the array.


In [65]:
y1 = np.array([5, -2, 3, 4, 4, 5, 10])
y2 = np.array([1, -2, 4, 3, 4, 5, 99])

In [66]:
y1 == y2

array([False,  True, False, False,  True,  True, False])

In [67]:
y1 < y2

array([False, False,  True, False, False, False,  True])

In [68]:
np.array_equal(y1, y2)

False

# Simple Statistics

https://numpy.org/doc/stable/reference/routines.statistics.html

In [26]:
np.mean(r)

-118.81666666666668

In [27]:
np.std(r)

294.87836852204373

In [28]:
np.var(r)

86953.25222222222

In [29]:
m = np.array([10.5, -100, 5.6, 16.24])

print(f'Mean = {m.mean():.4f}')

Mean = -16.9150


In [30]:
m2 = np.array([np.nan, 10.5, -100, np.nan, 5.6, 16.24])

print(f'Nan Mean = {np.nanmean(m2):.4f}')


Nan Mean = -16.9150


In [31]:
np.median(m)

8.05

In [32]:
m3 = np.array([[3,5,4], [1,2,-3], [0.4, 12.3, 1], [102.4, 3, -9], [-44, -0.7, 22.6]])

In [33]:
m3.shape

(5, 3)

In [34]:
m3.shape[1]

3

In [35]:
np.mean(m3)

6.666666666666669

In [36]:
np.mean(m3, axis=0)

array([12.56,  4.32,  3.12])

In [37]:
np.mean(m3, axis=1)

array([ 4.        ,  0.        ,  4.56666667, 32.13333333, -7.36666667])

In [38]:
a = np.array([1,2,3,4,6,7,8,9])
b = np.array([2,4,6,8,10,12,13,15])

np.corrcoef([a,b])

array([[1.        , 0.99535001],
       [0.99535001, 1.        ]])

In [39]:
a = np.array([10.2459123, -3.5344, 16, 240.47678, -35.999])

In [40]:
np.floor(a)

array([ 10.,  -4.,  16., 240., -36.])

In [41]:
np.round(a, decimals=1)

array([ 10.2,  -3.5,  16. , 240.5, -36. ])

In [42]:
np.round(a, decimals=2)

array([ 10.25,  -3.53,  16.  , 240.48, -36.  ])

In [43]:
np.ceil(a)

array([ 11.,  -3.,  16., 241., -35.])

In [70]:
np.log(np.abs(a))

array([[0.        , 1.38629436],
       [1.09861229, 0.        ],
       [3.09104245, 1.09861229]])

In [76]:
a = np.array([[1,2,3], [4,5,6], [7,8,9], [10,11,12]])
print(a)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]


In [74]:
np.cumsum(a, axis=0)

array([[ 1,  2,  3],
       [ 5,  7,  9],
       [12, 15, 18],
       [22, 26, 30]])

In [75]:
np.cumsum(a, axis=1)

array([[ 1,  3,  6],
       [ 4,  9, 15],
       [ 7, 15, 24],
       [10, 21, 33]])

# Broadcasting

https://numpy.org/doc/stable/user/basics.broadcasting.html

The term broadcasting describes how numpy treats arrays with different shapes during arithmetic operations. Subject to certain constraints, the smaller array is “broadcast” across the larger array so that they have compatible shapes.

In [44]:
a = np.ones(10, dtype=int) * 5
b = np.ones(10, dtype=float) * 10.2 - 3.0

a_plus_b = a + b # int + float

print(a_plus_b.dtype.name)
print(a_plus_b)

float64
[12.2 12.2 12.2 12.2 12.2 12.2 12.2 12.2 12.2 12.2]


In [45]:
a = np.random.random((10, 12))
print(a)
print(a.shape)

[[0.3181365  0.25897361 0.48817566 0.61730491 0.60366161 0.50927346
  0.86240971 0.02623363 0.65680232 0.40369365 0.92281138 0.18079477]
 [0.39633916 0.51159906 0.88418159 0.84861312 0.56905267 0.11232138
  0.82960124 0.36844422 0.48378195 0.61671809 0.36971289 0.77166507]
 [0.85768044 0.64387285 0.27663112 0.87074771 0.74584461 0.44366242
  0.00679506 0.26167699 0.21139409 0.56173701 0.6747003  0.553765  ]
 [0.0958048  0.15895575 0.08829054 0.7315853  0.43848588 0.06985858
  0.98274366 0.50426382 0.00318783 0.3539469  0.74831183 0.49150417]
 [0.60634057 0.53395338 0.13434176 0.72940801 0.20375859 0.75231769
  0.54517708 0.03286044 0.68685742 0.57191038 0.34258984 0.43932806]
 [0.96129204 0.97555105 0.35678569 0.86145499 0.55170982 0.3928231
  0.89417668 0.43322079 0.40460562 0.90356574 0.0025588  0.80455721]
 [0.72403612 0.78200358 0.53608153 0.99023349 0.22163061 0.39799351
  0.02637775 0.74462453 0.52678773 0.49195768 0.98924836 0.94672265]
 [0.86130287 0.13122721 0.7799709  0.02464

# Fancy indexing


In [46]:
a[4:, 5:-3]

array([[0.75231769, 0.54517708, 0.03286044, 0.68685742],
       [0.3928231 , 0.89417668, 0.43322079, 0.40460562],
       [0.39799351, 0.02637775, 0.74462453, 0.52678773],
       [0.05430312, 0.14566205, 0.97727625, 0.51947956],
       [0.87770999, 0.20832529, 0.81512431, 0.73710572],
       [0.60392874, 0.38230198, 0.8750012 , 0.51031219]])

# Random numbers

In [47]:
a = np.random.choice([3, 4.5, -13, 155], size=(3,5), replace=True)
print(a)
print(a.shape)

[[155. -13. 155.   3.   3.]
 [155. 155. -13. 155. 155.]
 [  3. -13. 155. 155.   3.]]
(3, 5)


In [48]:
a = np.random.choice(['yes', 'no', 'maybe', 'never'], size=(1,8), replace=True)
print(a)
print(a.shape)

[['yes' 'never' 'no' 'never' 'never' 'maybe' 'never' 'yes']]
(1, 8)


In [49]:
np.random.seed(816)  # Sets the random number generator starting point
# Guarantees repeatability in randomness

# Permutations/Shuffles/Sorts

In [50]:
a = np.arange(10)
print(a)
np.random.shuffle(a)
print(a)

[0 1 2 3 4 5 6 7 8 9]
[1 7 0 6 2 8 5 3 9 4]


In [51]:
a = np.arange(9).reshape((3, 3))
print(a)

[[0 1 2]
 [3 4 5]
 [6 7 8]]


In [52]:
b = np.random.permutation(a)
print(b)

[[3 4 5]
 [6 7 8]
 [0 1 2]]


In [53]:
np.arange(0, 10)**3

array([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729])

In [54]:
s = np.array([-4, 5.6, 0, 0.1, 100.4, -3, 10])

In [55]:
print(np.sort(s))

[-4.000e+00 -3.000e+00  0.000e+00  1.000e-01  5.600e+00  1.000e+01
  1.004e+02]


In [56]:
a = np.array([[1,4],[3,1], [22, -3]])
np.sort(a, axis=0) 

array([[ 1, -3],
       [ 3,  1],
       [22,  4]])

In [57]:
np.sort(a, axis=1) 

array([[ 1,  4],
       [ 1,  3],
       [-3, 22]])