# Profiling and optimizing Python code

When to optimize?

- Do you need optimization?
  - If speed is not a problem, then there is no reason to optimize
- If yes: Which parts of your code should be optimized?
  - Use a profiler, such as `cProfile`
  - Usually, almost all execution time occurs within a small part of your code
  - Optimize that code, and leave the rest alone
- If you need even better performance
  - Redesign the code completely
  - But this takes effort!

![](image.jpg)

## Example: Find duplicate movie titles

- Read 5000 movie titles
- Return a list of movie titles that occur twice
- Search is case insensitive

In [None]:
def read_movies(src):
    
    with open(src) as fd:
        return fd.read().splitlines()

In [None]:
# Slow solution
def is_duplicate(needle, haystack):
    for movie in haystack:
        if needle.lower() == movie.lower():
            return True
    return False

@profile
def find_duplicate_movies(src='movies.txt'):
    movies = read_movies(src)
    duplicates = []
    while movies:
        movie = movie.pop()
        if is_duplicate(movie, movies):
            duplicates.append(movie)
    return duplicates

In [None]:
# Better solution
@profile
def find_duplicate_movies(src='movies.txt'):
    movies = read_movies(src)
    movies = [movie.lower() for movie in movies]
    duplicates = []
    while movies:
        movie = movie.pop()
        if movie in movies:
            duplicates.append(movie)
    return duplicates

In [None]:
  
# Optimized solution
@profile
def find_duplicate_movies(src='movies.txt'):
    
    movies = read_movies(src)
    movies = [movie.lower() for movie in movies]
    movies.sort()
    duplicates = [movie1 for movie1, movie2 in zip(movies[:-1], movies[1:]) if movie1 == movie2]
    return duplicates


find_duplicate_movies()

         5015 function calls in 0.007 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.007    0.007 <ipython-input-7-84db1b5425ee>:7(find_duplicate_movies)
        1    0.003    0.003    0.003    0.003 {method 'sort' of 'list' objects}
        1    0.001    0.001    0.003    0.003 <ipython-input-7-84db1b5425ee>:11(<listcomp>)
     5000    0.001    0.000    0.001    0.000 {method 'lower' of 'str' objects}
        1    0.000    0.000    0.001    0.001 <ipython-input-7-84db1b5425ee>:1(read_movies)
        1    0.001    0.001    0.001    0.001 {method 'splitlines' of 'str' objects}
        1    0.000    0.000    0.000    0.000 <ipython-input-7-84db1b5425ee>:13(<listcomp>)
        1    0.000    0.000    0.000    0.000 {method 'read' of '_io.TextIOWrapper' objects}
        1    0.000    0.000    0.000    0.000 {built-in method io.open}
        1    0.000    0.000    0.000    0.000 /usr/lib/python

['4: rise of the silver surfer',
 'a foreign affair',
 'a tale of two cities',
 'alien³',
 "all the king's men",
 'angus, thongs and perfect snogging',
 'appaloosa',
 'assault on precinct 13',
 'babe',
 'bangkok dangerous',
 'beautiful boxer',
 'black robe',
 'blueberry',
 'boogeyman',
 'broken english',
 'buddy',
 'bug',
 'caché',
 'cape fear',
 'cape fear',
 'cat people',
 'chaos',
 "charlotte's web",
 'children of men',
 'crossroads',
 'da wan',
 'darkness falls',
 'dawn of the dead',
 'deep impact',
 'doa: dead or alive',
 'dracula',
 'dressed to kill',
 'eight below',
 'elizabeth: the golden age',
 'enigma',
 'evan almighty',
 'f/x2',
 'face/off',
 'fail-safe',
 'father of the bride',
 'fire down below',
 'five easy pieces',
 'flawless',
 'flipper',
 'flirting with disaster',
 'freaky friday',
 'frozen river',
 'frozen river',
 'funny games u.s.',
 'galaxy quest',
 'gangs of new york',
 'grease',
 "grey's anatomy",
 'halloween',
 'hamlet',
 'hamlet',
 'hamlet',
 'hard target',
 'h

## A profiling decorator

- Apply to a function with `@profile`
- Profiles the function using `cProfile`, and prints out a report
- Adapted from the Python 3.6 docs:
  - https://docs.python.org/3/library/profile.html#profile.Profile

In [2]:
import cProfile, pstats, io



def profile(fnc):
    
    """A decorator that uses cProfile to profile a function"""
    
    def inner(*args, **kwargs):
        
        pr = cProfile.Profile()
        pr.enable()
        retval = fnc(*args, **kwargs)
        pr.disable()
        s = io.StringIO()
        sortby = 'cumulative'
        ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
        ps.print_stats()
        print(s.getvalue())
        return retval

    return inner