# **Introduction to CS and Python concepts for ML**
For course EE769 Introduction to Machine Learning @IITB by Amit Sethi

Note: The best way to learn is to type the code while watching this video in parts
- Constants, variables, naming
- Conditions
- Functions
- Numpy arrays and linear algebra
- While loop
- For loop
- Vectorization
- Big-O for time complexity (linear search in O(n), binary search in O(log n))


# Constants, variables, and naming

In [None]:
# Bad example of hard-coding

print('Area of the circle is ' + str(3.1416 * 4 * 4)) 
# + sign concatenates strings in the line above

Area of the circle is 50.2656


In [None]:
# Better example of using variables

a = 3.1416
b = 4

print('Area of the circle is ' + str(a * b * b))

Area of the circle is 50.2656


In [None]:
# Even better example with self-explanatory variable names
# Note, python does not have constants, but by convention, we can use CAPS

PI = 3.1416

radius = 4.0 # adding a decimal ensures that the number is float (decimal)
area = PI * radius * radius

print('Area of the circle is ' + str(area))

Area of the circle is 50.2656


# Conditions

In [None]:
# Lazy programming: 
# Pros - fast, 
# Cons - hard to understand after a week, inflexible, error-prone

x = float(input('Enter a whole number to check if it is divisble by '+ str(7) +' : '))
if round(x/7,0) == x/7:
  print('The number is divisble by '+ str(7))
else: # Not divisible
  print('The number is NOT divisble by '+ str(7))

Enter a whole number to check if it is divisble by 7 : qw


ValueError: ignored

In [None]:
# Notice good programming fundamentals
#   Different types of inputs are assumed
#   Errors are caught before they crash your program
#   Variables are used instead of hardcoding
#   Variable names are self-explanatory
#   Copious use of comments
# Notice how nested if's work. Also read about elif.

div_by = 12 # Using a variable makes it easy to change the program for div by 12

x = input('Enter a whole number to check if it is divisble by '+ str(div_by) +' : ')
#User input

try:
  x = float(x) 
  # This line can fail if no number is entered.
  # We catch that error using try-else

  if round(x,0) == x: # Checking for whole number
    if round(x/div_by,0) == x/div_by: #Checking for divisibility
      print('The number is divisble by '+ str(div_by)) # Result
    else: # Not divisible
      print('The number is NOT divisble by '+ str(div_by)) # Result
  else: # Did not enter a whole number
    print('The number is NOT a whole number') # Result
    
except ValueError: # Did not enter a number
  print('You did NOT enter a number') # Result

Enter a whole number to check if it is divisble by 12 : 48.0
The number is divisble by 12


# Functions

In [None]:
# You can outsource some repetitive jobs from the main program to functions
# It also makes the program more readable


# FUNCTION
def check_divisibility(numerator, denominator):
  try:
    numerator = float(numerator) 
    if round(numerator,0) == numerator: # Checking for whole number
      if round(numerator/denominator,0) == numerator/denominator: #Checking for divisibility
        print(str(int(numerator)) + ' is divisble by '+ str(denominator)) # Result
        return True
      else: # Not divisible
        print(str(int(numerator)) + ' is NOT divisble by '+ str(denominator)) # Result
        return False
    else: # Did not enter a whole number
      print(str(numerator) + ' number is NOT a whole number') #M Result
      return False
  except ValueError:
    print('You did NOT enter a number') # Result
    return False




# MAIN PROGRAM

div_by = 7 # Using a variable makes it easy to change the program for div by 12

x = input('Enter a whole number to check if it is divisble by '+ str(div_by) +' : ')
_ = check_divisibility(x,div_by) 
# Adding an underscore in the beginning is like adding a dummy variable
# Without that returned value (in this case, True or False) will be printed

Enter a whole number to check if it is divisble by 7 : 6
6 is NOT divisble by 7


# Numpy arrays

In [None]:
import numpy as np # convention is to import numpy as np

x = np.random.random([3,2]) # generating a random array with 3 rows, 2 cols
print(x)

[[0.8293671  0.6617965 ]
 [0.91009785 0.99309904]
 [0.21181765 0.84958217]]


In [None]:
np.transpose(x) # Transpose

array([[0.8293671 , 0.91009785, 0.21181765],
       [0.6617965 , 0.99309904, 0.84958217]])

In [None]:
x.T.dot(x) # .T is also transpose, .dot is matrix multiplication

array([[1.56099461, 1.63264604],
       [1.63264604, 2.14601018]])

In [None]:
x * x # Asterisk is point-wise multiplication

array([[0.68784979, 0.43797461],
       [0.8282781 , 0.98624571],
       [0.04486672, 0.72178987]])

In [None]:
np.linalg.pinv(x).dot(x) 
# linalg is a linear algebra module, pinv is pseudo inverse function in it

array([[1.00000000e+00, 1.90249264e-17],
       [1.37724092e-16, 1.00000000e+00]])

# While loop

In [None]:
import random

hidden = round(random.randint(1,11))

try: 
  guess = int(input('This is a number guessing game. Enter the number between 1 and 10: '))
  while guess != hidden:
    try:
      guess = int(input('Try again: '))
    except: 
      print('Bad input. Whole number expected.')
  print('You got it!')
except:
  print('Bad input. Whole number expected.')

This is a number guessing game. Enter the number between 1 and 10: 3
Try again: 5
Try again: 7
Try again: 9
Try again: 1
Try again: 2
Try again: 4
Try again: 7
Try again: 8
Try again: 3
Try again: 10
You got it!


# For loop and vectorization

In [None]:
# Count number of elements divisibly by 7 using nested for loops

import numpy as np
import time

x = np.round(100*np.random.rand(1000,100))


tic = time.perf_counter()

count = 0

for i in range(np.shape(x)[0]):
  for j in range(np.shape(x)[1]):
    if np.round(x[i,j]/7.0)*7.0 == x[i,j]:
      count += 1

toc = time.perf_counter()

print("Found %d elements divisible by 7 in %d microseconds"%(count, 1000000*(toc-tic)))

Found 14613 elements divisible by 7 in 583927 microseconds


In [None]:
# Pre-assiging nRows and nCols reduces time substantially

import numpy as np
import time

x = np.round(100*np.random.rand(1000,100))

nRows = np.shape(x)[0]
nCols = np.shape(x)[1]

tic = time.perf_counter()

count = 0

for i in range(nRows):
  for j in range(nCols):
    if np.round(x[i,j]/7.0)*7.0 == x[i,j]:
      count += 1

toc = time.perf_counter()

print("Found %d elements divisible by 7 in %d microseconds"%(count, 1000000*(toc-tic)))

Found 14495 elements divisible by 7 in 517565 microseconds


In [None]:
# Vectorizing to avoid for loops is the best

import numpy as np
import time

x = np.round(100*np.random.rand(1000,100))


tic = time.perf_counter()

count = np.sum(np.round(x/7.0)*7.0 == x, axis = None)

toc = time.perf_counter()

print("Found %d elements divisible by 7 in %d microseconds"%(count, 1000000*(toc-tic)))

Found 14441 elements divisible by 7 in 5225 microseconds


# Time complexity

In [None]:
# Find a number in a sorted array: O(n) using brute force, where n is array size
# Pronounced "Order n".

import numpy as np
import time

# Make a rando sorted array of size n
n = 100000
x = np.random.rand(n)
x = np.sort(x)

# Choose an element in the array that we will try to find
y = x[np.random.randint(0,n)]

tic = time.perf_counter()

for i in range(n):
  if x[i] == y:
    break

toc = time.perf_counter()

print("Found %f at position %d in %d microseconds"%(y, i, 1000000*(toc-tic)))

Found 0.191713 at position 19100 in 5196 microseconds


In [None]:
# Binary search has O(log(n)) complexity, where n is array size:
# Pronounced "Order log n".

import numpy as np
import time

# Make a rando sorted array of size n
n = 1000000
x = np.random.rand(n)
x = np.sort(x)

# Choose an element in the array that we will try to find
y = x[np.random.randint(0,n)]

tic = time.perf_counter()

lower, upper = 0, n

i = round((lower+upper)/2)

while x[i] != y:
  if x[i]>y:
    upper = i
    i = round((lower+i)/2)
  else:
    lower = i
    i = round((i+upper)/2)
 
toc = time.perf_counter()

print("Found %f at position %d in %d microseconds"%(y, i, 1000000*(toc-tic)))

Found 0.264301 at position 264512 in 195 microseconds
