Code Challenge 13: Highest Rated Movie Directors
- use template provided by try to implement some of the improvements suggested in discussion
- Try to use itertools groupby and statistics.mean
- Consider using a second named Tuple



First get the column names that `csv.DictReader` is going to use as dictionary keys for each row

In [None]:
! head -n 1 movie_metadata.csv

In [None]:
import csv
from collections import defaultdict, namedtuple
from itertools import groupby
import statistics

MOVIE_DATA = 'movie_metadata.csv'
NUM_TOP_DIRECTORS = 20
MIN_MOVIES = 4
MIN_YEAR = 1960

Movie = namedtuple('Movie', 'title year score')
Films = namedtuple('Films', 'director films average_score')

def get_movies_by_director(data = MOVIE_DATA):
    '''Extracts all movies from csv and stores them in a dictionary
    where keys are directors, and values is a list of movies (named tuples)'''
    movies = defaultdict(list)
    with open(data, encoding = 'utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        movies = sorted([(row['director_name'], Movie(title = row['movie_title'].strip(), year  = row['title_year'], 
                                                score = float(row['imdb_score']))) for row in reader], key = lambda k:k[0]) 
    return {director : [movie[1] for movie in g] for director, g in groupby(movies, lambda k:k[0])}

def get_top_directors_films(data):
    '''Get the films for the top directors, 
       in data k is director v is movie list'''
    films = sorted([Films(director = k, films = v, average_score = _calc_mean(v)) 
                    for k, v in data.items() if len(v) >= MIN_MOVIES], reverse=True, key = lambda k:k.average_score)
    return films[:20]


def _calc_mean(movies):
    '''Helper method to calculate mean of list of Movie namedtuples'''
    return statistics.mean([movie.score for movie in movies])


def print_results(films):
    '''Print directors ordered by highest average rating. For each director
    print his/her movies also ordered by highest rated movie.
    See http://pybit.es/codechallenge13.html for example output'''
    for film in films:
        print(f'{film.director}')
        for movie in film.films:
            print(f'\t\t{movie}')
        print(f'')
    return
    #fmt_director_entry = '{counter}. {director:<52} {avg}'
    #fmt_movie_entry = '{year}] {title:<50} {score}'
    #sep_line = '-' * 60


#def main():
    '''This is a template, feel free to structure your code differently.
    We wrote some tests based on our solution: test_directors.py'''
    directors = get_movies_by_director()
    films = get_average_scores(directors)
    print_results(directors)


#if __name__ == '__main__':
#    main()

In [8]:
directors = get_movies_by_director()
films = get_top_directors_films(directors)
for film in films:
        print(f'{film.director}')
        for movie in film.films:
            print(f'\tTitle: {movie.title},\tYear: {movie.year},\tScore: {movie.score}')
        print(f'')
#print_results(films)

Sergio Leone
	Title: Once Upon a Time in America,		Year: 1984,	Score: 8.4
	Title: Once Upon a Time in the West,		Year: 1968,	Score: 8.6
	Title: The Good, the Bad and the Ugly,		Year: 1966,	Score: 8.9
	Title: A Fistful of Dollars,		Year: 1964,	Score: 8.0

Christopher Nolan
	Title: The Dark Knight Rises,		Year: 2012,	Score: 8.5
	Title: The Dark Knight,		Year: 2008,	Score: 9.0
	Title: Interstellar,		Year: 2014,	Score: 8.6
	Title: Inception,		Year: 2010,	Score: 8.8
	Title: Batman Begins,		Year: 2005,	Score: 8.3
	Title: Insomnia,		Year: 2002,	Score: 7.2
	Title: The Prestige,		Year: 2006,	Score: 8.5
	Title: Memento,		Year: 2000,	Score: 8.5

Hayao Miyazaki
	Title: Ponyo,		Year: 2008,	Score: 7.7
	Title: Howl's Moving Castle,		Year: 2004,	Score: 8.2
	Title: Princess Mononoke,		Year: 1997,	Score: 8.4
	Title: Spirited Away,		Year: 2001,	Score: 8.6

Quentin Tarantino
	Title: Django Unchained,		Year: 2012,	Score: 8.5
	Title: Inglourious Basterds,		Year: 2009,	Score: 8.3
	Title: The Hateful Eight,		

TODO
- This works in a notebook.  For Day 7 or 6 b whatever I decide to call it lets get it to work in a separate file using the .main stuff
- And also for day 7 lets try to figure out the print results fx in the template
- Also rewrite the tests to work with my functions 
- try to fix the printing

In [9]:
import pytest
import ipytest

# enable IPython magics for test execution
import ipytest.magics

# enable pytest's assertions
ipytest.config.rewrite_asserts = True

# set the filename
__file__ = 'Day_006.ipynb'

In [10]:
# from directors import get_movies_by_director, get_average_scores, _calc_mean


def test():
    directors = get_movies_by_director()

    assert 'Sergio Leone' in directors
    assert 'Andrew Stanton' in directors  # has 3 movies, but not yet filtered
    assert len(directors['Sergio Leone']) == 4
    assert len(directors['Peter Jackson']) == 12

    movies_sergio = directors['Sergio Leone']
    movies_nolan = directors['Christopher Nolan']
    assert _calc_mean(movies_sergio) == 8.5
    assert _calc_mean(movies_nolan) == 8.4

    directors = get_average_scores(directors)
    assert 'Andrew Stanton' not in directors  # director 3 movies now filtered out

    expected_directors = ['Sergio Leone', 'Christopher Nolan', 'Quentin Tarantino',
                          'Hayao Miyazaki', 'Frank Darabont', 'Stanley Kubrick']
    expected_avg_scores = [8.5, 8.4, 8.2, 8.2, 8.0, 8.0]
    expected_num_movies = [4, 8, 8, 4, 4, 7]
    report = sorted(directors.items(), key=lambda x: float(x[0][1]), reverse=True)
    for counter, (i, j, k) in enumerate(
                            zip(expected_directors,
                                expected_avg_scores, expected_num_movies)):
        assert report[counter][0] == (i, j)
        assert len(report[counter][1]) == k
        assert  _calc_mean(report[counter][1]) == j

    return "tests pass"

    
ipytest.run()

platform darwin -- Python 3.7.0, pytest-3.8.2, py-1.7.0, pluggy-0.7.1
rootdir: /Users/NewEarth/Documents/Projects/ANewTraining/Python/HundredDaysCode/My100Days, inifile:
collected 1 item

Day_006.py F                                                             [100%]

_____________________________________ test _____________________________________

    def test():
        directors = get_movies_by_director()
    
        assert 'Sergio Leone' in directors
        assert 'Andrew Stanton' in directors  # has 3 movies, but not yet filtered
        assert len(directors['Sergio Leone']) == 4
        assert len(directors['Peter Jackson']) == 12
    
        movies_sergio = directors['Sergio Leone']
        movies_nolan = directors['Christopher Nolan']
>       assert _calc_mean(movies_sergio) == 8.5
E       AssertionError: assert 8.475 == 8.5
E        +  where 8.475 = _calc_mean([Movie(title='Once Upon a Time in America', year='1984', score=8.4), Movie(title='Once Upon a Time in the West', ye