Similar notebook: https://www.kaggle.com/code/madhavigavade/numerical-computing-with-numpy

In [1]:
w1, w2, w3 = 0.3, 0.2, 0.5

In [2]:
kanto_temp = 73
kanto_rainfall = 67
kanto_humidity = 43

In [4]:
kanto_yield_apples = kanto_temp * w1 + kanto_rainfall * w2 + kanto_humidity * w3
kanto_yield_apples

56.8

In [5]:
print('The expected yield of apples in Kanto region is {} tons per hectare'.format(kanto_yield_apples))

The expected yield of apples in Kanto region is 56.8 tons per hectare


In [6]:
kanto = [73, 67, 43]
johto = [91, 88, 64]
hoenn = [87, 134, 58]
sinnoh = [102, 43, 37]
unova = [69, 96, 70]

In [7]:
weights = [w1, w2, w3]

In [13]:
kanto

[73, 67, 43]

In [14]:
weights

[0.3, 0.2, 0.5]

In [18]:
for item in zip(kanto, weights):
  print(item)

(73, 0.3)
(67, 0.2)
(43, 0.5)


In [8]:
def crop_yield(region, weights):
  result = 0
  for x, w in zip(region, weights):
    result += x * w
  return result

In [19]:
crop_yield(kanto, weights)

56.8

In [20]:
crop_yield(johto, weights)

76.9

In [21]:
crop_yield(unova, weights)

74.9

In [22]:
import numpy as np

In [23]:
kanto = np.array([73, 67, 43])

In [24]:
kanto

array([73, 67, 43])

In [25]:
weights = np.array([w1, w2, w3])

In [26]:
weights

array([0.3, 0.2, 0.5])

In [27]:
type(kanto)

numpy.ndarray

In [28]:
type(weights)

numpy.ndarray

In [29]:
weights[0]

np.float64(0.3)

In [30]:
kanto[2]

np.int64(43)

In [35]:
help(np.dot)

Help on _ArrayFunctionDispatcher in module numpy:

dot(...)
    dot(a, b, out=None)
    
    Dot product of two arrays. Specifically,
    
    - If both `a` and `b` are 1-D arrays, it is inner product of vectors
      (without complex conjugation).
    
    - If both `a` and `b` are 2-D arrays, it is matrix multiplication,
      but using :func:`matmul` or ``a @ b`` is preferred.
    
    - If either `a` or `b` is 0-D (scalar), it is equivalent to
      :func:`multiply` and using ``numpy.multiply(a, b)`` or ``a * b`` is
      preferred.
    
    - If `a` is an N-D array and `b` is a 1-D array, it is a sum product over
      the last axis of `a` and `b`.
    
    - If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a
      sum product over the last axis of `a` and the second-to-last axis of
      `b`::
    
        dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])
    
    It uses an optimized BLAS library when possible (see `numpy.linalg`).
    
    Parameters
    -----

In [31]:
np.dot(kanto, weights)

np.float64(56.8)

In [36]:
kanto

array([73, 67, 43])

In [37]:
weights

array([0.3, 0.2, 0.5])

In [38]:
kanto * weights

array([21.9, 13.4, 21.5])

In [32]:
(kanto * weights).sum()

np.float64(56.8)

In [33]:
arr1 = np.array([1, 2, 3])
arr2 = np.array([4, 5, 6])

In [34]:
arr1 * arr2

array([ 4, 10, 18])

In [39]:
arr2.sum()

np.int64(15)

In [41]:
# Python lists
arr1 = list(range(1000000))
arr2 = list(range(1000000, 2000000))

# Numpy arrays
arr1_np = np.array(arr1)
arr2_np = np.array(arr2)

In [43]:
%%time
result = 0
for x1, x2 in zip(arr1, arr2):
  result += x1*x2
result

CPU times: user 368 ms, sys: 4.56 ms, total: 373 ms
Wall time: 401 ms


833332333333500000

In [44]:
%%time
np.dot(arr1_np, arr2_np)

CPU times: user 2.61 ms, sys: 0 ns, total: 2.61 ms
Wall time: 2.03 ms


np.int64(833332333333500000)

In [45]:
climate_data = np.array([[73, 67, 43],
                         [91, 88, 64],
                         [87, 134, 58],
                         [102, 43, 37],
                         [69, 96, 70]])

In [46]:
climate_data

array([[ 73,  67,  43],
       [ 91,  88,  64],
       [ 87, 134,  58],
       [102,  43,  37],
       [ 69,  96,  70]])

In [47]:
# 2D array (matrix)
climate_data.shape

(5, 3)

In [48]:
weights

array([0.3, 0.2, 0.5])

In [49]:
# 1D array (vector)
weights.shape

(3,)

In [50]:
# 3D array
arr3 = np.array([
    [[11, 12, 13],
     [13, 14, 15]],
    [[15, 16, 17],
     [17, 18, 19.5]]
])

In [51]:
arr3.shape

(2, 2, 3)

In [52]:
weights.dtype

dtype('float64')

In [53]:
climate_data.dtype

dtype('int64')

In [54]:
arr3.dtype

dtype('float64')

In [55]:
arr3

array([[[11. , 12. , 13. ],
        [13. , 14. , 15. ]],

       [[15. , 16. , 17. ],
        [17. , 18. , 19.5]]])

In [56]:
climate_data

array([[ 73,  67,  43],
       [ 91,  88,  64],
       [ 87, 134,  58],
       [102,  43,  37],
       [ 69,  96,  70]])

In [58]:
weights

array([0.3, 0.2, 0.5])

In [59]:
np.matmul(climate_data, weights)

array([56.8, 76.9, 81.9, 57.7, 74.9])

In [60]:
climate_data @ weights

array([56.8, 76.9, 81.9, 57.7, 74.9])

In [67]:
climate_data = np.genfromtxt('climate.txt', delimiter=',', skip_header=1)

In [68]:
climate_data

array([[25., 76., 99.],
       [39., 65., 70.],
       [59., 45., 77.],
       ...,
       [99., 62., 58.],
       [70., 71., 91.],
       [92., 39., 76.]])

In [69]:
climate_data.shape

(10000, 3)

In [70]:
weights = np.array([0.3, 0.2, 0.5])

In [71]:
yields = climate_data @ weights

In [72]:
yields

array([72.2, 59.7, 65.2, ..., 71.1, 80.7, 73.4])

In [73]:
yields.shape

(10000,)

In [74]:
climate_results = np.concatenate((climate_data, yields.reshape(10000, 1)), axis=1)

In [75]:
climate_results

array([[25. , 76. , 99. , 72.2],
       [39. , 65. , 70. , 59.7],
       [59. , 45. , 77. , 65.2],
       ...,
       [99. , 62. , 58. , 71.1],
       [70. , 71. , 91. , 80.7],
       [92. , 39. , 76. , 73.4]])

In [76]:
climate_results

array([[25. , 76. , 99. , 72.2],
       [39. , 65. , 70. , 59.7],
       [59. , 45. , 77. , 65.2],
       ...,
       [99. , 62. , 58. , 71.1],
       [70. , 71. , 91. , 80.7],
       [92. , 39. , 76. , 73.4]])

In [77]:
np.savetxt('climate_results.txt',
           climate_results,
           fmt='%.2f',
           header='temperature,rainfall,humidity,yield_apples',
           comments='')

In [78]:
arr2 = np.array([[1, 2, 3, 4],
                 [5, 6, 7, 8],
                 [9, 1, 2, 3]])

In [79]:
arr3 = np.array([[11, 12, 13, 14],
                 [15, 16, 17, 18],
                 [19, 11, 12, 13]])

In [80]:
arr2 + arr3

array([[12, 14, 16, 18],
       [20, 22, 24, 26],
       [28, 12, 14, 16]])

In [82]:
# Adding a scalar
arr2 + 3

array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12,  4,  5,  6]])

In [83]:
# Element-wise subtraction
arr3 - arr2

array([[10, 10, 10, 10],
       [10, 10, 10, 10],
       [10, 10, 10, 10]])

In [84]:
# Division by scalar
arr2 / 2

array([[0.5, 1. , 1.5, 2. ],
       [2.5, 3. , 3.5, 4. ],
       [4.5, 0.5, 1. , 1.5]])

In [85]:
# Element-wise multiplication
arr2 * arr3

array([[ 11,  24,  39,  56],
       [ 75,  96, 119, 144],
       [171,  11,  24,  39]])

In [86]:
# Modulus with scalar
arr2 % 4

array([[1, 2, 3, 0],
       [1, 2, 3, 0],
       [1, 1, 2, 3]])

In [87]:
arr2 = np.array([[1, 2, 3, 4],
                 [5, 6, 7, 8],
                 [9, 1, 2, 3]])

In [88]:
arr2.shape

(3, 4)

In [89]:
arr4 = np.array([4, 5, 6, 7])

In [90]:
arr4.shape

(4,)

In [91]:
arr2 + arr4

array([[ 5,  7,  9, 11],
       [ 9, 11, 13, 15],
       [13,  6,  8, 10]])

In [92]:
arr5 = np.array([7, 8])

In [93]:
arr5.shape

(2,)

https://numpy.org/doc/stable/user/basics.broadcasting.html

In [95]:
arr1 = np.array([[1, 2, 3], [3, 4, 5]])
arr2 = np.array([[2, 2, 3], [1, 2, 5]])

In [96]:
arr1 == arr2

array([[False,  True,  True],
       [False, False,  True]])

In [97]:
arr1 != arr2

array([[ True, False, False],
       [ True,  True, False]])

In [98]:
arr1 >= arr2

array([[False,  True,  True],
       [ True,  True,  True]])

In [99]:
arr1 < arr2

array([[ True, False, False],
       [False, False, False]])

In [100]:
(arr1 == arr2).sum()

np.int64(3)

In [101]:
arr3 = np.array([
    [[11, 12, 13, 14],
     [13, 14, 15, 19]],

    [[15, 16, 17, 21],
     [63, 92, 36, 18]],

    [[98, 32, 81, 23],
     [17, 18, 19.5, 43]]
])

In [102]:
arr3.shape

(3, 2, 4)

In [103]:
# Single element
arr3[1, 1, 2]

np.float64(36.0)

In [104]:
# Subarray using ranges
arr3[1:, 0:1, :2]

array([[[15., 16.]],

       [[98., 32.]]])

In [105]:
# Mixing indices and ranges
arr3[1:, 1, 3]

array([18., 43.])

In [106]:
# Mixing indices and ranges
arr3[1:, 1, :3]

array([[63. , 92. , 36. ],
       [17. , 18. , 19.5]])

In [107]:
# Using fewer indices
arr3[1]

array([[15., 16., 17., 21.],
       [63., 92., 36., 18.]])

In [108]:
# Using fewer indices
arr3[1]

array([[15., 16., 17., 21.],
       [63., 92., 36., 18.]])

In [109]:
# Using fewer indices
arr3[:2, 1]

array([[13., 14., 15., 19.],
       [63., 92., 36., 18.]])

In [111]:
# All zeros
np.zeros((3, 2))

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [112]:
# All ones
np.ones([2, 2, 3])

array([[[1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.]]])

In [113]:
# Identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [114]:
# Random vector
np.random.rand(5)

array([0.00336891, 0.29226706, 0.80574545, 0.43139482, 0.65019354])

In [115]:
# Random matrix
np.random.randn(2, 3) # rand vs. randn - what's the difference?

array([[ 0.62690623,  0.75311851,  1.18058814],
       [-0.11789437,  0.66378086,  1.4886434 ]])

In [116]:
# Fixed value
np.full([2, 3], 42)

array([[42, 42, 42],
       [42, 42, 42]])

In [117]:
# Range with start, end and step
np.arange(10, 90, 3)

array([10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46, 49, 52, 55, 58,
       61, 64, 67, 70, 73, 76, 79, 82, 85, 88])

In [119]:
# Equally spaced numbers in a range
np.linspace(3, 27, 9)

array([ 3.,  6.,  9., 12., 15., 18., 21., 24., 27.])