Item 37 Compose Classes Instead of Nesting Many Levels of Built-in Types 

Things to Remember
- Avoid making dictionaries with values that are dictionaries, long tuples, or complex nestings of other built-in types
- Use namedtuple for lightweight, immutable data containers before you need the flexibility of a full class
- Move your bookkeeping code to using multiple classes when your internal state dictionaries get complicated 

In [None]:
# - record the grades of a set of students whose
#   names aren't known in advance

class SimpleGradebook:
    def __init__(self):
        self._grades = {}
    def add_student(self, name):
        # - bookkeeps an unexpected set
        #   of identifiers (dynamic)
        #   using dictionary  
        self._grades[name] = [] # _grades
    def report_grade(self, name, score):
        self._grades[name].append(score)
    def average_grade(self, name):
        grades = self._grades[name]
        return sum(grades) / len(grades)

book = SimpleGradebook()
book.add_student('Isaac Newton')
book.report_grade('Isaac Newton', 90)
book.report_grade('Isaac Newton', 95)
book.report_grade('Isaac Newton', 85)

print(book.average_grade('Isaac Newton'))

In [None]:
# now you want to track the scores per subject

from collections import defaultdict

class BySubjectGradebook:
    def __init__(self):
        self._grades = {} # outer dict    
    def add_student(self, name):
        self._grades[name] = defaultdict(list) # inner dict
    def report_grade(self, name, subject, grade):
        by_subject = self._grades[name]
        grade_list = by_subject[subject]
        grade_list.append(grade)
    def average_grade(self, name):
        by_subject = self._grades[name]
        total, count = 0, 0
        for grades in by_subject.values():
            total += sum(grades)
            count += len(grades)
        return total / count

book = BySubjectGradebook()
book.add_student('Albert Einstein')
book.report_grade('Albert Einstein', 'Math', 75)
book.report_grade('Albert Einstein', 'Math', 65)
book.report_grade('Albert Einstein', 'Gym', 90)
book.report_grade('Albert Einstein', 'Gym', 95)
print(book.average_grade('Albert Einstein'))

defaultdict
- defaultdict(list): 
  When each key is encountered for the first time, an entry is automatically created using the default_factory function which returns an empty list in this case.

In [None]:
# - now you want to track the weight of each score toward the overall
#   grade in the class so that midterm and final exams are more 
#   important than pop quizzes
from collections import defaultdict

class WeightedGradebook:
    def __init__(self):
        self._grades = {} # outer dict    
    def add_student(self, name):
        self._grades[name] = defaultdict(list) # inner dict
    def report_grade(self, name, subject, score, weight):
        by_subject = self._grades[name]
        grade_list = by_subject[subject]
        grade_list.append((score, weight)) # store tuple instances
    def average_grade(self, name):
        by_subject = self._grades[name]

        score_sum, score_count = 0, 0
        for subject, scores in by_subject.items():
            subject_avg, total_weight = 0, 0
            for score, weight in scores:
                subject_avg += score * weight
                total_weight += weight
            # - you can't assume that total weight is 1,
            #   so to calculate the score for a subject 
            #   you need to divide subject average by
            #   total weight   
            score_sum += subject_avg / total_weight
            score_count += 1
        return score_sum / score_count

book = WeightedGradebook()
book.add_student('Albert Einstein')
book.report_grade('Albert Einstein', 'Math', 75, 0.05)
book.report_grade('Albert Einstein', 'Math', 65, 0.15)
book.report_grade('Albert Einstein', 'Math', 70, 0.80)
book.report_grade('Albert Einstein', 'Gym', 100, 0.40)
book.report_grade('Albert Einstein', 'Gym', 85, 0.60)
print(book.average_grade('Albert Einstein'))

- It's time to make the leap from built-in types like dictionaries, tuples, sets, and lists to a hierarchy of classes as the code becomes increasingly complex and harder to read
- Python's built-in dictionary and tuple types made it easy to keep going, adding layer after layer to the internal bookkeeping, but we should stop at one level of nesting as more levels will make the code hard to read and difficult to maintain
- When your bookkeeping is getting complicated, break it all out into classes to provide well-defined interfaces that better encapsulate your data


Refactoring to Classes
- Start at the bottom of the dependency tree: a single grade
- Write a Subject class to represent a single subject that contains a set of grades
- Write a Student class to represent a set of subjects that are being studied by a single student
- Write a Gradebook class serves a container for all the students

In [None]:
# single grade - the tuple approach 
grades = []
grades.append((95, 0.45))
grades.append((85, 0.55))
total = sum(score * weight for score, weight in grades)
# _ is Python convention for unused variables
total_weight = sum(weight for _, weight in grades)
average_grade = total / total_weight
print(average_grade)

what is the problem?
- the tuple instances are positional
- this becomes a problem when you want to add more info to a grade as you will need to rewrite every usage of the two-tuple to be aware that there are now more items present instead of two


In [None]:
# single grade - the tuple approach
# adding notes
grades = []
grades.append((95, 0.45, 'Great job'))
grades.append((85, 0.55, 'Better next time'))
total = sum(score * weight for score, weight, _ in grades) # adding _
# _ is Python convention for unused variables
total_weight = sum(weight for _, weight, _ in grades) # adding one more _
average_grade = total / total_weight
print(average_grade)

In [None]:
# single grade - the namedtuple approach
from collections import namedtuple
Grade = namedtuple('Grade', ('score', 'weight'))
theGrade = Grade(100, 1) # use it like a data class
print(theGrade.score)

Limitations of namedtuple
- You can't specify default argument values for namedtuple
- You still can access the attribute values of namedtuple instances using numerical indexes and iteration, which can lead to unintentional usage that makes it harder to move to a real class later

In [None]:
# - namedtuple is still a tuple, you can't really
#   prevent the following usages of it  
for x in theGrade: # access attribute values via iteration
    print(x)

# access the attribute values using numerical indexes  
print(theGrade[0]) 

In [None]:
# - a single subject that contains 
#   a set of grades
class Subject:
    def __init__(self):
        self._grades = []
    def report_grade(self, score, weight):
        self._grades.append(Grade(score, weight))
    def average_grade(self):
        total, total_weight = 0, 0
        for grade in self._grades:
            total += grade.score * grade.weight
            total_weight += grade.weight
        return total / total_weight

In [None]:
from collections import defaultdict
class Student:
    def __init__(self):
        self._subjects = defaultdict(Subject)
    def get_subject(self, name):
        return self._subjects[name]
    def average_grade(self):
        total, count = 0, 0
        for subject in self._subjects.values():
            total += subject.average_grade()
            count += 1
        return total / count
    

In [None]:

class Gradebook:
    def __init__(self):
        self._students = defaultdict(Student)
    
    def get_student(self, name):
        return self._students[name]

In [None]:
book = Gradebook()
albert = book.get_student('Albert Einstein')
math = albert.get_subject('Math')
math.report_grade(75, 0.05)
math.report_grade(65, 0.15)
math.report_grade(70, 0.80)
gym = albert.get_subject('Gym')
gym.report_grade(100, 0.40)
gym.report_grade(85, 0.60)
print(albert.average_grade())