Code Challenge 13: Highest Rated Movie Directors
- use template provided by try to implement some of the improvements suggested in discussion
- Try to use itertools groupby and statistics.mean
- Consider using a second named Tuple
- look and learn from solution of atakume and refactor 



First get the column names that `csv.DictReader` is going to use as dictionary keys for each row

In [None]:
! head -n 1 movie_metadata.csv

In [24]:
import csv
from collections import defaultdict, namedtuple
from itertools import groupby
import statistics

MOVIE_DATA = 'movie_metadata.csv'
NUM_TOP_DIRECTORS = 20
MIN_MOVIES = 4
MIN_YEAR = 1960

Movie = namedtuple('Movie', 'title year score')
Films = namedtuple('Films', 'director films average_score')

def get_movies_by_director(data = MOVIE_DATA):
    '''Extracts all movies from csv and stores them in a dictionary
    where keys are directors, and values is a list of movies (named tuples)'''
    movies = defaultdict(list)
    with open(data, encoding = 'utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        movies = sorted([(row['director_name'], Movie(title = row['movie_title'].strip(), year  = row['title_year'], 
                                                score = float(row['imdb_score']))) for row in reader], key = lambda k:k[0]) 
    return {director : [movie[1] for movie in g] for director, g in groupby(movies, lambda k:k[0])}

def get_top_directors_films(data):
    '''Get the films for the top directors, 
       in data k is director v is movie list'''
    films = sorted([Films(director      = k, 
                          films         = sorted(filter(lambda x: _by_prod_year(x.year) , v), key = lambda k: k.year, reverse = True),
                          average_score = _calc_mean(v)) for k, v in data.items()])
    return sorted(filter(lambda x: len(x.films)>=MIN_MOVIES, films), key = lambda k: k.average_score, reverse =True)[:NUM_TOP_DIRECTORS]

def _by_prod_year(year):
    return year.isdigit() and int(year) >= MIN_YEAR

def _calc_mean(movies):
    '''Helper method to calculate mean of list of Movie namedtuples'''
    return statistics.mean([movie.score for movie in movies])


def print_results(films):
    '''Print directors ordered by highest average rating. For each director
    print his/her movies also ordered by highest rated movie.
    See http://pybit.es/codechallenge13.html for example output'''
    fmt_director_entry = '{counter}. {director:<52} {avg}'
    fmt_movie_entry = '{year}] {title:<50} {score}'
    sep_line = '-' * 60
    for films, i in zip(films, range(1, len(films)+1)):
        print(fmt_director_entry.format(counter=i, director=films.director, avg=films.average_score))
        print(sep_line)
        for movie in films.films:
            print(fmt_movie_entry.format(year=movie.year, title=movie.title, score=movie.score))
        print()

#def main():
#     '''This is a template, feel free to structure your code differently.
#     We wrote some tests based on our solution: test_directors.py'''
#     directors = get_movies_by_director()
#     films = get_average_scores(directors)
#     print_results(directors)


#if __name__ == '__main__':
#    main()

In [25]:
directors = get_movies_by_director()
films = get_top_directors_films(directors)
print_results(films)

1. Sergio Leone                                         8.475
------------------------------------------------------------
1984] Once Upon a Time in America                        8.4
1968] Once Upon a Time in the West                       8.6
1966] The Good, the Bad and the Ugly                     8.9
1964] A Fistful of Dollars                               8.0

2. Christopher Nolan                                    8.425
------------------------------------------------------------
2014] Interstellar                                       8.6
2012] The Dark Knight Rises                              8.5
2010] Inception                                          8.8
2008] The Dark Knight                                    9.0
2006] The Prestige                                       8.5
2005] Batman Begins                                      8.3
2002] Insomnia                                           7.2
2000] Memento                                            8.5

3. Hayao Miyazaki   

Still TODO
- This works in a notebook. Get it to work in a separate file using the .main stuff
- Also rewrite the tests to work with my functions 


In [9]:
import pytest
import ipytest

# enable IPython magics for test execution
import ipytest.magics

# enable pytest's assertions
ipytest.config.rewrite_asserts = True

# set the filename
__file__ = 'Day_006.ipynb'

In [10]:
# from directors import get_movies_by_director, get_average_scores, _calc_mean


def test():
    directors = get_movies_by_director()

    assert 'Sergio Leone' in directors
    assert 'Andrew Stanton' in directors  # has 3 movies, but not yet filtered
    assert len(directors['Sergio Leone']) == 4
    assert len(directors['Peter Jackson']) == 12

    movies_sergio = directors['Sergio Leone']
    movies_nolan = directors['Christopher Nolan']
    assert _calc_mean(movies_sergio) == 8.5
    assert _calc_mean(movies_nolan) == 8.4

    directors = get_average_scores(directors)
    assert 'Andrew Stanton' not in directors  # director 3 movies now filtered out

    expected_directors = ['Sergio Leone', 'Christopher Nolan', 'Quentin Tarantino',
                          'Hayao Miyazaki', 'Frank Darabont', 'Stanley Kubrick']
    expected_avg_scores = [8.5, 8.4, 8.2, 8.2, 8.0, 8.0]
    expected_num_movies = [4, 8, 8, 4, 4, 7]
    report = sorted(directors.items(), key=lambda x: float(x[0][1]), reverse=True)
    for counter, (i, j, k) in enumerate(
                            zip(expected_directors,
                                expected_avg_scores, expected_num_movies)):
        assert report[counter][0] == (i, j)
        assert len(report[counter][1]) == k
        assert  _calc_mean(report[counter][1]) == j

    return "tests pass"

    
ipytest.run()

platform darwin -- Python 3.7.0, pytest-3.8.2, py-1.7.0, pluggy-0.7.1
rootdir: /Users/NewEarth/Documents/Projects/ANewTraining/Python/HundredDaysCode/My100Days, inifile:
collected 1 item

Day_006.py F                                                             [100%]

_____________________________________ test _____________________________________

    def test():
        directors = get_movies_by_director()
    
        assert 'Sergio Leone' in directors
        assert 'Andrew Stanton' in directors  # has 3 movies, but not yet filtered
        assert len(directors['Sergio Leone']) == 4
        assert len(directors['Peter Jackson']) == 12
    
        movies_sergio = directors['Sergio Leone']
        movies_nolan = directors['Christopher Nolan']
>       assert _calc_mean(movies_sergio) == 8.5
E       AssertionError: assert 8.475 == 8.5
E        +  where 8.475 = _calc_mean([Movie(title='Once Upon a Time in America', year='1984', score=8.4), Movie(title='Once Upon a Time in the West', ye