## Numpy - Numerical computation in Python

In [264]:
# Computation in python
def crop_yield(region, weights):
  result=0

  for x, w in zip(region, weights):
    result+=x*w

  return result

kanto=[73, 67, 43]
weights=[0.3, 0.2, 0.5]
total_yield=crop_yield(kanto, weights)

print(f"The total yield of apples in kanto region is {total_yield}")

The total yield of apples in kanto region is 56.8


In [265]:
import numpy as np

In [266]:
kanto=[73, 67, 43]
np_kanto=np.array(kanto)
type(np_kanto)

numpy.ndarray

In [267]:
weights=[0.3, 0.2, 0.5]
np_weights=np.array(weights)
type(np_weights)

numpy.ndarray

In [268]:
# Dot product
total_yield=np.dot(np_kanto, np_weights)
print(total_yield)

56.8


In [269]:
# sum() function
total_yield=(np_kanto*np_weights).sum()
print(total_yield)

56.8


In [270]:
# Python lists
arr1=list(range(1000000))
arr2=list(range(1000000, 2000000))

# Numpy arrays
np_arr1=np.array(arr1)
np_arr2=np.array(arr2)

In [271]:
%%time
result=0
for x1, x2 in zip(arr1, arr2):
  result+=x1*x2
print(result)

833332333333500000
CPU times: total: 31.2 ms
Wall time: 91.1 ms


In [272]:
%%time
print(np.dot(np_arr1, np_arr2))

833332333333500000
CPU times: total: 0 ns
Wall time: 1 ms


### Multi-dimensional numpy arrays

In [273]:
# 2D array
np_climate=np.array([[73, 67, 43],
                     [91, 88, 64],
                     [87, 134, 58],
                     [102, 43, 37],
                     [69, 96, 70]])
np_climate

array([[ 73,  67,  43],
       [ 91,  88,  64],
       [ 87, 134,  58],
       [102,  43,  37],
       [ 69,  96,  70]])

In [274]:
# Dimensions of the numpy array
print(np_climate.shape)
print(np_weights.shape)

(5, 3)
(3,)


In [275]:
# 3D array
np_coordinates=np.array([
  [[11, 12, 13],
   [13, 14, 15]],
  [[15, 16, 17],
   [17, 18, 19.5]]
])
np_coordinates

array([[[11. , 12. , 13. ],
        [13. , 14. , 15. ]],

       [[15. , 16. , 17. ],
        [17. , 18. , 19.5]]])

In [276]:
print(np_coordinates.shape)

(2, 2, 3)


In [277]:
# Data type of numpy array
print(np_weights.dtype)
print(np_climate.dtype)
print(np_coordinates.dtype)

float64
int64
float64


In [278]:
# Matrix multiplication
matmul_array=np.matmul(np_climate, np_weights)
matmul_array

array([56.8, 76.9, 81.9, 57.7, 74.9])

### Working with CSV files

In [279]:
# urllib.request.urlretreive - to copy from a url
# shutil - to copy from local system

import shutil
shutil.copy('C:\\Users\\Kushal\\Downloads\\weather_data.csv', 'climate.txt')

'climate.txt'

In [280]:
climate_data=np.genfromtxt('climate.txt', delimiter=',', skip_header=1)

In [281]:
climate_data

array([[ 10.59970654, 112.09224554,  75.69984799],
       [ 42.28928685,  99.87362887,  26.6060796 ],
       [ 30.2596668 ,  52.84617375,  41.19757249],
       ...,
       [ 42.06893533,  91.10954074,  11.75102765],
       [ 11.86183958, 132.99600194,  46.09043582],
       [  1.94272222,  51.67944431,  33.16581812]])

In [282]:
climate_data.shape

(10000, 3)

In [283]:
tot_yield=np.matmul(climate_data, np_weights)
print(tot_yield)
print(tot_yield.shape)

[63.44828506 45.96455163 40.24592104 ... 36.71810257 53.20297018
 27.50161459]
(10000,)


In [284]:
# Concatenation
climate_results=np.concatenate((climate_data, tot_yield.reshape(10000, 1)), axis=1)

In [285]:
climate_results

array([[ 10.59970654, 112.09224554,  75.69984799,  63.44828506],
       [ 42.28928685,  99.87362887,  26.6060796 ,  45.96455163],
       [ 30.2596668 ,  52.84617375,  41.19757249,  40.24592104],
       ...,
       [ 42.06893533,  91.10954074,  11.75102765,  36.71810257],
       [ 11.86183958, 132.99600194,  46.09043582,  53.20297018],
       [  1.94272222,  51.67944431,  33.16581812,  27.50161459]])

In [286]:
np.savetxt('climate.txt',
           climate_results,
           fmt='%.2f',
           header='temperature,rainfall,humidity,yield_apples',
           comments='')

### Arithmetic operations and broadcasting

In [287]:
arr1=np.array([[1, 2, 3, 4],
               [5, 6, 7, 8],
               [9, 1, 2, 3]])

In [288]:
arr2=np.array([[11, 12, 13, 14],
               [15, 16, 17, 18],
               [19, 11, 12, 13]])

In [289]:
arr1+arr2

array([[12, 14, 16, 18],
       [20, 22, 24, 26],
       [28, 12, 14, 16]])

In [290]:
# Adding a scalar
arr1+3

array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12,  4,  5,  6]])

In [291]:
arr2-arr1

array([[10, 10, 10, 10],
       [10, 10, 10, 10],
       [10, 10, 10, 10]])

In [292]:
arr2/2

array([[5.5, 6. , 6.5, 7. ],
       [7.5, 8. , 8.5, 9. ],
       [9.5, 5.5, 6. , 6.5]])

In [293]:
arr1*arr2

array([[ 11,  24,  39,  56],
       [ 75,  96, 119, 144],
       [171,  11,  24,  39]])

In [294]:
arr2

array([[11, 12, 13, 14],
       [15, 16, 17, 18],
       [19, 11, 12, 13]])

In [295]:
arr2.shape

(3, 4)

In [296]:
arr3=np.array([4, 5, 6, 7])

In [297]:
arr3.shape

(4,)

In [298]:
'''Broadcasting - the 1D array replicates itself to number 
of rows of 2D array and then performs the operations. We have
to make sure the columns in both the arrays are equal'''
arr2+arr3

array([[15, 17, 19, 21],
       [19, 21, 23, 25],
       [23, 16, 18, 20]])

In [299]:
a1=np.array([[1, 2, 3],
             [3, 4, 5]])

In [300]:
a1

array([[1, 2, 3],
       [3, 4, 5]])

In [301]:
a2=np.array([[2, 2, 3],
             [1, 2, 5]])

In [302]:
a2

array([[2, 2, 3],
       [1, 2, 5]])

In [303]:
a1==a2

array([[False,  True,  True],
       [False, False,  True]])

In [304]:
a1!=a2

array([[ True, False, False],
       [ True,  True, False]])

In [305]:
a1>=a2

array([[False,  True,  True],
       [ True,  True,  True]])

In [306]:
a1<=a2

array([[ True,  True,  True],
       [False, False,  True]])

In [307]:
(a1==a2).sum()

np.int64(3)

### Array slicing and indexing

In [308]:
arr=np.array([
  [[11, 12, 13, 14],
   [13, 14, 15, 19]],
  [[16, 16, 17, 21],
   [63, 92, 36, 18]],
  [[98, 32, 81, 23],
   [17, 18, 19.5, 43]]
])

In [309]:
arr.shape

(3, 2, 4)

In [310]:
arr[1]

array([[16., 16., 17., 21.],
       [63., 92., 36., 18.]])

In [311]:
arr[1][1]

array([63., 92., 36., 18.])

In [312]:
# Single element
arr[1][1][2]

np.float64(36.0)

In [313]:
# Indexing using ranges
arr[1:, 0:1, :2]

array([[[16., 16.]],

       [[98., 32.]]])

In [314]:
# Mixing indices and ranges
arr[1:, 1, 3]

array([18., 43.])

In [315]:
arr[1:, 1, :3]

array([[63. , 92. , 36. ],
       [17. , 18. , 19.5]])

In [316]:
arr[:2, 1]

array([[13., 14., 15., 19.],
       [63., 92., 36., 18.]])

### Other ways of creating Numpy arrays

In [317]:
# All zeroes
np.zeros((3, 2, 2))

array([[[0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.]]])

In [318]:
# All ones
np.ones([2, 2, 3])

array([[[1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.]]])

In [319]:
# Identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [320]:
# Random vector
np.random.rand(5)

array([0.1767278 , 0.88800302, 0.58565691, 0.89110541, 0.59594611])

In [321]:
# Random matrix
np.random.randn(2, 3)

array([[ 0.65400844, -0.84896736, -1.200179  ],
       [-0.19092776,  0.68411364, -0.21592302]])

##### np.random.rand generates samples from a uniform distribution between 0 and 1. np.random.randn generates samples from a standard normal distribution with mean 0 and std 1

In [322]:
# Fixed value
np.full([2, 3], 42)

array([[42, 42, 42],
       [42, 42, 42]])

In [323]:
# Range with start, end, step
np.arange(10, 90, 3)

array([10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46, 49, 52, 55, 58,
       61, 64, 67, 70, 73, 76, 79, 82, 85, 88])

In [324]:
# Equally spaced numbers in a range
np.linspace(3, 27, 9)

array([ 3.,  6.,  9., 12., 15., 18., 21., 24., 27.])

In [325]:
np.arange(10, 90, 3).shape

(27,)

In [326]:
np.arange(10, 90, 3).reshape(3, 3, 3)

array([[[10, 13, 16],
        [19, 22, 25],
        [28, 31, 34]],

       [[37, 40, 43],
        [46, 49, 52],
        [55, 58, 61]],

       [[64, 67, 70],
        [73, 76, 79],
        [82, 85, 88]]])

### Important Numpy functions

##### Function 1 - np.concatenate

In [327]:
arr1=[[1, 2],
      [3, 4.]]

arr2=[[5, 6, 7],
      [8, 9, 10]]

np.concatenate((arr1, arr2), axis=1)

'''With axis=0, all the input array dimensions except for the 
concatenation axis must match exactly, but along dimension 1, 
the array at index 0 has size 2 and the array at index 1 has size 3'''

'With axis=0, all the input array dimensions except for the \nconcatenation axis must match exactly, but along dimension 1, \nthe array at index 0 has size 2 and the array at index 1 has size 3'

##### Function 2 - np.max

In [328]:
np.max([1, 2, 3, 4, 5])

np.int64(5)

##### Function 3 - np.min

In [329]:
np.min([1, 2, 3, 4, 5])

np.int64(1)

##### Function 4 - np.mean

In [330]:
np.mean([1, 2, 3, 4, 5])

np.float64(3.0)

##### Function 5 - np.median

In [331]:
np.median([1, 2, 3, 4, 5])

np.float64(3.0)

### File Handling with os library

In [332]:
import os

In [333]:
# Checking the present working directory
os.getcwd()

'd:\\python-for-ai\\Numpy'

In [334]:
# To get the names of files in the directory - relative path
os.listdir('.')

['climate.txt', 'data', 'numpy.ipynb']

In [335]:
# To get the names of files in the directory - absolute path
os.listdir(r'D:\\python-for-ai\\Numpy')

['climate.txt', 'data', 'numpy.ipynb']

In [336]:
# Create a new directory
os.makedirs('./data', exist_ok=True)

In [337]:
'data' in os.listdir('.')

True

In [338]:
os.listdir('./data')

['climate.txt']

In [339]:
# Reading from a file
file1=open('./data/climate.txt', mode='r')

In [340]:
file1_contents=file1.read()

In [341]:
print(file1_contents)

temperature,rainfall,humidity,yield_apples
10.60 112.09 75.70 63.45
42.29 99.87 26.61 45.96
30.26 52.85 41.20 40.25
22.93 182.18 69.70 78.16
-1.42 142.99 53.39 54.87
-1.42 259.71 76.47 89.75
-6.81 9.63 96.51 48.14
37.64 193.16 20.49 60.17
23.06 228.88 73.86 89.63
28.94 227.85 30.73 69.62
-8.87 265.82 47.30 74.16
43.35 218.71 12.96 63.22
35.78 278.34 22.23 77.52
1.68 99.80 38.78 39.85
0.00 150.96 40.78 50.58
0.09 4.22 90.96 46.35
6.73 2.09 76.76 40.82
18.86 72.04 97.50 68.81
13.76 30.24 63.92 42.13
6.02 78.06 31.75 33.30
23.65 53.11 39.49 37.46
-2.33 8.56 38.79 20.41
6.07 272.79 39.38 76.07
10.15 2.47 70.57 38.82
15.08 220.82 57.77 77.58
33.18 45.64 44.69 41.43
0.98 273.67 23.42 66.74
18.28 267.84 11.99 65.05
22.58 196.17 29.70 60.86
-7.45 201.67 19.67 47.94
23.41 1.60 80.69 47.69
-0.62 295.46 20.12 68.97
-6.42 263.35 75.18 88.33
42.19 268.53 44.71 88.72
43.11 197.77 16.60 60.79
34.46 103.44 83.15 72.60
6.75 268.09 62.17 86.73
-4.63 87.46 31.22 31.71
27.63 168.81 90.52 87.31
14.21 29.92

In [342]:
file1.close()

In [343]:
'''with function - Once the statements within the with block are 
executed, the .close method on file is automatically invoked
'''

with open('./data/climate.txt', 'r') as file2:
  file2_contents=file2.read()
  print(file2_contents)

temperature,rainfall,humidity,yield_apples
10.60 112.09 75.70 63.45
42.29 99.87 26.61 45.96
30.26 52.85 41.20 40.25
22.93 182.18 69.70 78.16
-1.42 142.99 53.39 54.87
-1.42 259.71 76.47 89.75
-6.81 9.63 96.51 48.14
37.64 193.16 20.49 60.17
23.06 228.88 73.86 89.63
28.94 227.85 30.73 69.62
-8.87 265.82 47.30 74.16
43.35 218.71 12.96 63.22
35.78 278.34 22.23 77.52
1.68 99.80 38.78 39.85
0.00 150.96 40.78 50.58
0.09 4.22 90.96 46.35
6.73 2.09 76.76 40.82
18.86 72.04 97.50 68.81
13.76 30.24 63.92 42.13
6.02 78.06 31.75 33.30
23.65 53.11 39.49 37.46
-2.33 8.56 38.79 20.41
6.07 272.79 39.38 76.07
10.15 2.47 70.57 38.82
15.08 220.82 57.77 77.58
33.18 45.64 44.69 41.43
0.98 273.67 23.42 66.74
18.28 267.84 11.99 65.05
22.58 196.17 29.70 60.86
-7.45 201.67 19.67 47.94
23.41 1.60 80.69 47.69
-0.62 295.46 20.12 68.97
-6.42 263.35 75.18 88.33
42.19 268.53 44.71 88.72
43.11 197.77 16.60 60.79
34.46 103.44 83.15 72.60
6.75 268.09 62.17 86.73
-4.63 87.46 31.22 31.71
27.63 168.81 90.52 87.31
14.21 29.92

In [344]:
# Reading individual lines
with open('./data/climate.txt', 'r') as file3:
  file3_contents=file3.readlines()
  print(file3_contents)

['temperature,rainfall,humidity,yield_apples\n', '10.60 112.09 75.70 63.45\n', '42.29 99.87 26.61 45.96\n', '30.26 52.85 41.20 40.25\n', '22.93 182.18 69.70 78.16\n', '-1.42 142.99 53.39 54.87\n', '-1.42 259.71 76.47 89.75\n', '-6.81 9.63 96.51 48.14\n', '37.64 193.16 20.49 60.17\n', '23.06 228.88 73.86 89.63\n', '28.94 227.85 30.73 69.62\n', '-8.87 265.82 47.30 74.16\n', '43.35 218.71 12.96 63.22\n', '35.78 278.34 22.23 77.52\n', '1.68 99.80 38.78 39.85\n', '0.00 150.96 40.78 50.58\n', '0.09 4.22 90.96 46.35\n', '6.73 2.09 76.76 40.82\n', '18.86 72.04 97.50 68.81\n', '13.76 30.24 63.92 42.13\n', '6.02 78.06 31.75 33.30\n', '23.65 53.11 39.49 37.46\n', '-2.33 8.56 38.79 20.41\n', '6.07 272.79 39.38 76.07\n', '10.15 2.47 70.57 38.82\n', '15.08 220.82 57.77 77.58\n', '33.18 45.64 44.69 41.43\n', '0.98 273.67 23.42 66.74\n', '18.28 267.84 11.99 65.05\n', '22.58 196.17 29.70 60.86\n', '-7.45 201.67 19.67 47.94\n', '23.41 1.60 80.69 47.69\n', '-0.62 295.46 20.12 68.97\n', '-6.42 263.35 75.1

### Processing data from files

In [345]:
def parse_headers(header_line):
  return header_line.strip().split(',')

In [346]:
file4=open('./data/climate.txt', mode='r')
file4_contents=file4.readlines()
print(file4_contents)

['temperature,rainfall,humidity,yield_apples\n', '10.60 112.09 75.70 63.45\n', '42.29 99.87 26.61 45.96\n', '30.26 52.85 41.20 40.25\n', '22.93 182.18 69.70 78.16\n', '-1.42 142.99 53.39 54.87\n', '-1.42 259.71 76.47 89.75\n', '-6.81 9.63 96.51 48.14\n', '37.64 193.16 20.49 60.17\n', '23.06 228.88 73.86 89.63\n', '28.94 227.85 30.73 69.62\n', '-8.87 265.82 47.30 74.16\n', '43.35 218.71 12.96 63.22\n', '35.78 278.34 22.23 77.52\n', '1.68 99.80 38.78 39.85\n', '0.00 150.96 40.78 50.58\n', '0.09 4.22 90.96 46.35\n', '6.73 2.09 76.76 40.82\n', '18.86 72.04 97.50 68.81\n', '13.76 30.24 63.92 42.13\n', '6.02 78.06 31.75 33.30\n', '23.65 53.11 39.49 37.46\n', '-2.33 8.56 38.79 20.41\n', '6.07 272.79 39.38 76.07\n', '10.15 2.47 70.57 38.82\n', '15.08 220.82 57.77 77.58\n', '33.18 45.64 44.69 41.43\n', '0.98 273.67 23.42 66.74\n', '18.28 267.84 11.99 65.05\n', '22.58 196.17 29.70 60.86\n', '-7.45 201.67 19.67 47.94\n', '23.41 1.60 80.69 47.69\n', '-0.62 295.46 20.12 68.97\n', '-6.42 263.35 75.1

In [347]:
headers=file4_contents[0]
headers

'temperature,rainfall,humidity,yield_apples\n'

In [348]:
parsed_headers=parse_headers(headers)
parsed_headers

['temperature', 'rainfall', 'humidity', 'yield_apples']

In [349]:
file4_contents[2]

'42.29 99.87 26.61 45.96\n'

In [350]:
def parse_values(data_line):
  data_values=[]
  for item in data_line.strip().split(' '):
    data_values.append(float(item))
  return data_values

In [351]:
data_values=parse_values(file4_contents[1])
data_values

[10.6, 112.09, 75.7, 63.45]

In [352]:
def create_item_dict(headers, values):
  result={}
  for header, value in zip(headers, values):
    result[header]=value
  return result

In [353]:
create_item_dict(parsed_headers, data_values)

{'temperature': 10.6,
 'rainfall': 112.09,
 'humidity': 75.7,
 'yield_apples': 63.45}

In [354]:
def process_file(path):
  result=[]

  with open(path, 'r') as f:
    lines=f.readlines()

    headers=parse_headers(lines[0])

    for data_line in lines[1:]:
      values=parse_values(data_line)
      item_dict=create_item_dict(headers, values)
      result.append(item_dict)

  return result

In [355]:
process_file('./data/climate.txt')

[{'temperature': 10.6,
  'rainfall': 112.09,
  'humidity': 75.7,
  'yield_apples': 63.45},
 {'temperature': 42.29,
  'rainfall': 99.87,
  'humidity': 26.61,
  'yield_apples': 45.96},
 {'temperature': 30.26,
  'rainfall': 52.85,
  'humidity': 41.2,
  'yield_apples': 40.25},
 {'temperature': 22.93,
  'rainfall': 182.18,
  'humidity': 69.7,
  'yield_apples': 78.16},
 {'temperature': -1.42,
  'rainfall': 142.99,
  'humidity': 53.39,
  'yield_apples': 54.87},
 {'temperature': -1.42,
  'rainfall': 259.71,
  'humidity': 76.47,
  'yield_apples': 89.75},
 {'temperature': -6.81,
  'rainfall': 9.63,
  'humidity': 96.51,
  'yield_apples': 48.14},
 {'temperature': 37.64,
  'rainfall': 193.16,
  'humidity': 20.49,
  'yield_apples': 60.17},
 {'temperature': 23.06,
  'rainfall': 228.88,
  'humidity': 73.86,
  'yield_apples': 89.63},
 {'temperature': 28.94,
  'rainfall': 227.85,
  'humidity': 30.73,
  'yield_apples': 69.62},
 {'temperature': -8.87,
  'rainfall': 265.82,
  'humidity': 47.3,
  'yield_app

In [356]:
# Calculating EMI
import math

def loan_emi(amount, duration, rate, down_payment=0):
  loan_amount=amount-down_payment
  try:
    emi=loan_amount*rate*((1+rate)**duration)/(((1+rate)**duration)-1)
  except ZeroDivisionError:
    emi=loan_amount/duration

  emi=math.ceil(emi)
  return emi

In [360]:
loan_emi(10000, 3, 0.06, 8)

3739

In [363]:
os.makedirs('./temp', exist_ok=True)

In [364]:
with open('./temp/climate.txt', 'w') as f:
  f.write('{},{},{},{}\n'.format(
    4.32,
    1.29,
    34.5,
    67.3
  ))

In [365]:
with open('./temp/climate.txt', 'r') as f:
  print(f.readlines())

['4.32,1.29,34.5,67.3\n']
