# Python Code Profilng
- Code / Example taken from Sebastian Mathot: 
    - https://www.youtube.com/watch?v=8qEnExGLZfY
- Adapted / annotated / slightly edited for live demo as part of Code Profiling lesson.

## Goal: Find all duplicate movies from a text file of 10,000 movie titles.

In [1]:
def read_movies(src):
    """Read movies from a text file, return the movie titles as a list"""
    
    with open(src) as f:
        movie_list = f.read().splitlines() 
        return movie_list

In [2]:
def is_duplicate(item:str, collection:list) -> bool:
    
    """Determine (True or False) whether a given item (i.e. movie)
       is in a collection of other movie titles (i.e. list).
       
       If you've exhausted the list of movies and found no matches, return False."""
    
    for movie in collection:
        if movie.lower() == item.lower():
            return True
        
    return False

In [3]:
def find_duplicate_movies(src='movies.txt') -> list:
    
    """Return all movies that appear twice (i.e. duplicates) in the text file.
       Search through the list of movies systematically, collecting duplicates as you go."""
    
    movie_list = read_movies(src)
    duplicates = []
    
    while movie_list: 
        
        movie = movie_list.pop()
        
        if is_duplicate(movie, movie_list): 
            
            duplicates.append(movie)
            
    
    return duplicates

In [4]:
find_duplicate_movies()

['Zookeeper (2011)',
 'Miracle on 34th Street (1994)',
 'Babylon 5: Thirdspace (1998)',
 'Police Academy 6: City Under Siege (1989)',
 'War of the Worlds (2005)',
 'Chaplin (1992)',
 'Twelfth Night (1996)',
 'Memento (2000)',
 'Fire and Ice (2008)',
 'Stan Helsing (2009)',
 'Intimate Strangers (Confidences trop intimes) (2004)',
 'Anything for Her (Pour elle) (2008)',
 'Simpatico (1999)',
 'High School Musical 2 (2007)',
 'Big Blue, The (Grand bleu, Le) (1988)',
 'Bedazzled (1967)',
 'Remember Me (Ricordati di me) (2003)',
 'Saturn 3 (1980)',
 '11:14 (2003)',
 "Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)",
 'Thousand Words, A (2012)',
 'Carnosaur (1993)',
 'Cold Fish (Tsumetai nettaigyo) (2010)',
 'Very Potter Sequel, A (2010)',
 'Antichrist (2009)',
 'Captain Horatio Hornblower R.N. (1951)',
 'Postman Always Rings Twice, The (1981)',
 'Red Violin, The (Violon rouge, Le) (1998)',
 'Sorority House Massacre II (1990)',
 'Just Jim (2015)'

---

---

---

### cProfile decorator:

In [5]:
import cProfile, pstats, io


def profile(fnc):
    
    """A decorator that uses cProfile to profile a function. 
       Starts the profile before executing a function, then exeuctes the function,
       then stops the profile, then prints out a diagnostics report.
       
       Lots of boilerplate code from the Python 3 documentation:
       https://docs.python.org/3/library/profile.html#profile.Profile
       """
    
    def inner(*args, **kwargs):
        
        pr = cProfile.Profile()
        pr.enable()  
        retval = fnc(*args, **kwargs)       
        pr.disable() 
        s = io.StringIO()
        sortby = 'cumulative'
        ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
        ps.print_stats()
        print(s.getvalue())

        return retval

    return inner

In [6]:
@profile
def find_duplicate_movies(src='movies.txt') -> list:
    
    """Return all movies that appear twice (i.e. duplicates) in the text file.
       Search through the list of movies systematically, collecting duplicates as you go."""
    
    movie_list = read_movies(src)
    duplicates = []
    
    while movie_list: 
        
        movie = movie_list.pop()
        
        if is_duplicate(movie, movie_list): 
            
            duplicates.append(movie)
            
    
    return duplicates

In [7]:
duplicates = find_duplicate_movies()

         98214041 function calls in 20.592 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.028    0.028   20.592   20.592 <ipython-input-6-9f548ec218b7>:1(find_duplicate_movies)
    10000   11.556    0.001   20.559    0.002 <ipython-input-2-8bd31298e24e>:1(is_duplicate)
 98193766    9.002    0.000    9.002    0.000 {method 'lower' of 'str' objects}
    10000    0.002    0.000    0.002    0.000 {method 'pop' of 'list' objects}
        1    0.000    0.000    0.002    0.002 <ipython-input-1-27c53f1fe273>:1(read_movies)
        1    0.001    0.001    0.001    0.001 {method 'splitlines' of 'str' objects}
        1    0.000    0.000    0.001    0.001 {method 'read' of '_io.TextIOWrapper' objects}
        1    0.000    0.000    0.000    0.000 /home/mmuratardag/anaconda3/lib/python3.8/codecs.py:319(decode)
        1    0.000    0.000    0.000    0.000 {built-in method _codecs.utf_8_decode}
        1    0.000    0.0

In [None]:
@profile
def find_duplicate_movies(src='movies.txt') -> list:
    
    """Return all movies that appear twice (i.e. duplicates) in the text file.
       Search through the list of movies systematically, collecting duplicates as you go."""
    
    movie_list = read_movies(src)
    duplicates = []
    
    while movie_list: 
        
        movie = movie_list.pop()
        
        if is_duplicate(movie, movie_list): 
            
            duplicates.append(movie)
            
    
    return duplicates