# Uma classe simples com Student e Grade

In [1]:
class SimpleGradebook:
    def __init__(self):  # constructor -> __init__
        # self -> instance of the class
        self._grades = {}

    def add_student(self, name):
        self._grades[name] = []

    def report_grade(self, name, score):
        self._grades[name].append(score)

    def average_grade(self, name):
        grades = self._grades[name]
        return sum(grades) / len(grades)


book = SimpleGradebook()
book.add_student("Isaac Newton")  # add student in the gradebook (dictionary)
book.report_grade("Isaac Newton", 90)  # report grade for the student

print(book.average_grade("Isaac Newton"))  # 90.0

90.0


# Classe GradeBook Subject e Student separadas

In [2]:
import collections

Grade = collections.namedtuple("Grade", ("score", "weight"))

class Subject:
    def __init__(self):
        self._grades = {}

    def report_grade(self, name, score, weight):
        self._grades[name] = Grade(score, weight)
    
    def average_grade(self):
        total, total_weight = 0, 0
        for grade in self._grades.values():
            total += grade.score * grade.weight
            total_weight += grade.weight
        return total / total_weight
    
class Student:
    def __init__(self):
        self._subjects = {}

    def subject(self, name):
        if name not in self._subjects:
            self._subjects[name] = Subject()
        return self._subjects[name]
    
    def average_grade(self):
        total, count = 0, 0
        for subject in self._subjects.values():
            total += subject.average_grade()
            count += 1
        return total / count
    
class Gradebook:
    def __init__(self):
        self._students = {}

    def student(self, name):
        if name not in self._students:
            self._students[name] = Student()
        return self._students[name]
    
book = Gradebook()
albert = book.student("Albert Einstein")
math = albert.subject("Math")
math.report_grade("calculus", 80, 0.1)

print(albert.average_grade())  # 80.0

80.0


# Sempre que a classe for chamada ela executará um código

In [None]:
class BetterCountMissing:
    def __init__(self):
        self.added = 0

    def __call__(self):
        self.added += 1
        return 0
    
counter = BetterCountMissing()
counter()  # counter.added = 1
assert callable(counter)  # True -> Callabe checa se o objeto é chamável

1


# Herança

In [6]:
class InputData:
    def read(self):
        raise NotImplementedError
    
class PathInputData(InputData):
    def __init__(self, path):
        super().__init__() # chama o construtor da classe pai
        self.path = path
    
    # método abstrato -> deve ser implementado pelas classes filhas
    def read(self):
        return open(self.path).read()
    
class Worker:
    def __init__(self, input_data):
        self.input_data = input_data
        self.result = None
    
    def map(self):
        raise NotImplementedError
    
    def reduce(self, other): 
        raise NotImplementedError
    
class LineCountWorker(Worker):
    def map(self): # map -> conta o número de linhas
        data = self.input_data.read()
        self.result = data.count("\n")
    
    def reduce(self, other): # reduce -> soma o número de linhas
        self.result += other.result
        
import os

def generate_inputs(data_dir):
    for name in os.listdir(data_dir):
        yield PathInputData(os.path.join(data_dir, name)) # yield -> gera um objeto iterável com os dados
        
def create_workers(input_list):
    workers = []
    for input_data in input_list:
        workers.append(LineCountWorker(input_data)) # cria um worker para cada input
    return workers

from threading import Thread # Thread -> executa várias tarefas ao mesmo tempo

def execute(workers):
    threads = [Thread(target=worker.map) for worker in workers] # cria uma thread para cada worker
    for thread in threads:
        thread.start()
    for thread in threads:
        thread.join()
        
    first, rest = workers[0], workers[1:]
    for worker in rest:
        first.reduce(worker) # reduce -> soma o número de linhas
    return first.result

def mapreduce(data_dir):
    inputs = generate_inputs(data_dir)
    workers = create_workers(inputs)
    return execute(workers)

import random

def write_test_files(tmpdir):
    for i in range(100):
        with open(os.path.join(tmpdir, str(i)), "w") as f:
            f.write("\n" * random.randint(0, 100))
            
import tempfile

with tempfile.TemporaryDirectory() as tmpdir: # cria um diretório temporário para armazenar os arquivos
    write_test_files(tmpdir) # escreve os arquivos de teste
    result = mapreduce(tmpdir) # executa o mapreduce
    
print(f"There are {result} lines")


There are 5344 lines


# Simplificando com cls-> instancia a classe nela mesma

In [None]:
class GenericInputData:
    def read(self):
        raise NotImplementedError
    
    @classmethod # método de classe -> pode ser chamado sem instanciar a classe
    def generate_inputs(cls, config): # cls -> referência à classe, normalmente usado em métodos de classe
        raise NotImplementedError
    
class PathInputData(GenericInputData):
    def __init__(self, path):
        super().__init__()
        self.path = path
    
    def read(self):
        return open(self.path).read()
    
    @classmethod
    def generate_inputs(cls, config): # config -> dicionário com as configurações
        data_dir = config["data_dir"] # data_dir -> diretório com os dados
        for name in os.listdir(data_dir): # lista os arquivos no diretório
            yield cls(os.path.join(data_dir, name)) # cria um objeto PathInputData para cada arquivo
            
class GenericWorker:
    def __init__(self, input_data):
        self.input_data = input_data
        self.result = None
    
    def map(self):
        raise NotImplementedError
    
    def reduce(self, other):
        raise NotImplementedError
    
    @classmethod
    def create_workers(cls, input_class, config):
        workers = []
        for input_data in input_class.generate_inputs(config): # gera os inputs
            workers.append(cls(input_data)) # cria um worer para cada input
        return workers
    
class LineCountWorker(GenericWorker):
    def map(self):
        data = self.input_data.read()
        self.result = data.count("\n")
    
    def reduce(self, other):
        self.result += other.result
        
def mapreduce(worker_class, input_class, config):
    workers = worker_class.create_workers(input_class, config)
    return execute(workers)

from tempfile import TemporaryDirectory

with TemporaryDirectory() as tmpdir:
    write_test_files(tmpdir)
    config = {"data_dir": tmpdir}
    result = mapreduce(LineCountWorker, PathInputData, config)

print(f"There are {result} lines")

There are 4978 lines
