# VECTORIZATION  

- Vectorization is the process of converting an algorithm from operating on a single value at a time to operating on a set of values (vector) at one time.

In [72]:
### NATIVE PYTHON APPROACH
import random
def random_walk(n):
    position = 0
    walk = [position]
    for i in range(n):
        position += 2*random.randint(0, 1)-1
        walk.append(position)
    return walk

%timeit walk = random_walk(1000)

2.22 ms ± 753 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [80]:
import numpy as np
def random_walk_fastest(n=1000):
    steps = np.random.choice([-1,+1], n)
    return np.cumsum(steps)

%timeit walk = random_walk_fastest(1000)

48.5 µs ± 11.6 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [95]:
Z = np.ones(4*1000000, np.float32)
Z[...] = 0

In [148]:
Z = np.arange(9).reshape(3,3).astype(np.int64)

In [154]:
Z

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]], dtype=int64)

In [157]:
Z[::1]

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]], dtype=int64)

In [151]:
Z.nbytes

72

In [152]:
(Z.shape[1]*Z.itemsize, Z.itemsize)

(24, 8)

# MEMOIZATION METHOD 

In [38]:
from collections import defaultdict

def memoize(func):
    cache = dict()
    def memoized_func(*args):
        if args in cache.keys():
            return cache[args]
        else:
            result = func(*args)
            cache[args] = result
            return result

    return memoized_func

import functools
import numpy as np

@functools.lru_cache()
def distance_calculator(data_point_1,data_point_2):
    result = np.sqrt(np.square(data_point_2[0] - data_point_1[0]) + np.square(data_point_2[1] - data_point_1[0]))
    time.sleep(3)
    return result

In [39]:
import time
start_time = time.time()
distance_calculator((3,2),(5,2))
time_taken = time.time() - start_time
time_taken

3.000471353530884

In [40]:
start_time = time.time()
distance_calculator((3,2),(5,2))
time_taken = time.time() - start_time
time_taken

0.0

In [41]:
distance_calculator.cache_info() ### To Get The Cache information

distance_calculator.cache_clear()  ### To clear the cache memory

In [259]:
##### WHEN NOT TO USE MEMOIZATION

### SHOULDN'T USE IN INDETERMINISTIC FUNCTION 

from datetime import datetime

def nondeterministic_adder(x, y):
    # Check to see if today is Monday (weekday 0)
    if datetime.now().weekday() == 0:
        return x + y + x
    return x + y

# GENERATOR (SAVES THE MEMORY CONSUMPTION) 

In [166]:
import numpy as np
import pandas as pd
series1 = pd.Series(np.random.randn(660)).astype(int)

In [160]:
import sys 
print(f'{round(sys.getsizeof(series1)/1048576,2)} MB is occupied for string type')

0.05 MB is occupied for string type


In [167]:
def square(x):
    result = x*x
# time.sleep(3)
    yield result

In [170]:
temp = map(square,series1)

In [172]:
for i in temp:
    print(list(i))