In [None]:
def get_stats(numbers):
  minimum = min(numbers)
  maximum = max(numbers)
  return minimum, maximum

In [None]:
lengths = [63, 73, 72, 60, 67, 66, 71, 61, 72, 70]

minimum, maximum = get_stats(lengths)

print(f'Min: {minimum}, Max: {maximum}')

# 演示unpacking 语句和返回多个值的函数

In [None]:
first, second = 1, 2
assert first == 1
assert second == 2

def my_function():
    return 1, 2

first, second = my_function()
assert first == 1
assert second == 2

用*返回没有被捕获到的值

In [None]:
def get_avg_ratio(numbers):
  average = sum(numbers) / len(numbers)
  scaled = [x / average for x in numbers]
  scaled.sort(reverse=True)
  return scaled

longest, *middle, shortest = get_avg_ratio(lengths)

print(f'Longest: {longest:>4.0%}')
print(f'Shortest: {shortest:>4.0%}')

## 在闭包中使用外围变量

In [None]:
def sort_priority(values, group):
    found = False
    def helper(x):
        nonlocal found # Must declare nonlocal to modify
        if x in group:
            found = True # Seems simple
            return (0, x)
        return (1, x)
    values.sort(key=helper)
    return found

In [None]:
values = [3, 5, 1, 2, 4]
group = [1, 2, 3]
expected = True
result = sort_priority(values, group)
assert result == expected, f"Expected {expected}, but got {result}"

## 用None 和docstring描述默认值会变的参数

### 只会执行一次

In [None]:
from time import sleep
from datetime import datetime

def log(message, when=datetime.now()):
    print(f'{when}: {message}')

In [None]:
log('Hi there!')
sleep(1)
log('Hi there again!')

when设置成None docstring中对行为进行描述

In [None]:
def log(message, when=None):
    """Log a message with a timestamp.

    Args:
        message: Message to print.
        when: datetime of when the message occurred.
            Defaults to the present time.
    """
    when = datetime.now() if when is None else when
    print(f'{when}: {message}')

In [None]:
log('Hi,there!')
sleep(1)
log('Hi,there!')

### 参数传惨 指定位置 和 按位置传入

In [None]:
def safe_division(number, divisor, ignore_overflow, ignore_zero_division):
    try:
        return number / divisor
    except OverflowError:
        if ignore_overflow:
            return 0
        else:
            raise
    except ZeroDivisionError:
        if ignore_zero_division:
            return float('inf')
        else:
            raise

In [None]:
result = safe_division(1.0, 10**500, True, False)
print(result)

* keyword-only-argument

In [None]:
def safe_division_c(number, divisor,*, ignore_overflow=False, ignore_zero_division=False):
    try:
        return number / divisor
    except OverflowError:
        if ignore_overflow:
            return 0
        else:
            raise
    except ZeroDivisionError:
        if ignore_zero_division:
            return float('inf')
        else:
            raise

In [None]:
safe_division_c(1.0, 10**500, True, False)

In [None]:
result = safe_division_c(1.0, 10**500, ignore_overflow=True)
print(result)

assert safe_division_c(number=2, divisor=5, ignore_overflow=True) == 0.4
assert safe_division_c(divisor=5,number=2, ignore_overflow=False) == 0.4

* positional-only argument

# 类与接口

In [None]:
class SimpleGradebook:
    def __init__(self):
        self._grades = {}
    
    def add_student(self, name):
        self._grades[name] = []
    
    def report_grade(self, name, score):
        self._grades[name].append(score)
    
    def average_grade(self, name):
        grades = self._grades[name]
        return sum(grades) / len(grades)

In [None]:
book = SimpleGradebook()
book.add_student('Isaac Newton')
book.report_grade('Isaac Newton', 90)
book.report_grade('Isaac Newton', 95)
book.report_grade('Isaac Newton', 85)
print(book.average_grade('Isaac Newton'))

In [None]:
from collections import defaultdict

class BySubjectGradeBook:
    
    def __init__(self):
        self._grades = {}
    
    def add_student(self, name):
        self._grades[name] = defaultdict(list)
    
    def report_grade(self, name, subject, grade):
        by_subject = self._grades[name]
        grade_list = by_subject[subject]
        grade_list.append(grade)
        
    def average_grade(self, name):
        by_subject = self._grades[name]
        total, count = 0, 0
        for grades in by_subject.values():
            total += sum(grades)
            count += len(grades)
        return total / count

In [None]:
book = BySubjectGradeBook()
book.add_student('Albert Einstein')
book.report_grade('Albert Einstein', 'Math', 75)
book.report_grade('Albert Einstein', 'Math', 65)
book.report_grade('Albert Einstein', 'Gym', 90)
book.report_grade('Albert Einstein', 'Gym', 95)
book.report_grade('Albert Einstein', 'Gym', 85)
print(book.average_grade('Albert Einstein'))

In [None]:
grades = []
grades.append((90, 0.3))
grades.append((80, 0.2))
grades.append((70, 0.2))
grades.append((60, 0.1))
grades.append((50, 0.1))
grades.append((0, 0.1))
total = sum(score * weight for score, weight in grades)
total_weight = sum(weight for _, weight in grades)
average_grade = total / total_weight
print(average_grade)

## nametuple

In [None]:
from collections import namedtuple

Grade = namedtuple('Grade', ('score', 'weight'))
print(Grade(81, 0.3))

In [None]:
from collections import defaultdict

class Subject:
    def __init__(self):
        self.grades = []
    
    def report_grade(self, score, weight):
        self.grades.append(Grade(score, weight))
    
    def average_grade(self):
        total, total_weight = 0, 0
        for grade in self.grades:
            total += grade.score * grade.weight
            total_weight += grade.weight
        return total / total_weight
 
class Student:
    def __init__(self):
        self._subjects = defaultdict(Subject)
    
    def get_subject(self, name):
        return self._subjects[name]
    
    def average_grade(self):
        total, count = 0, 0
        for subject in self._subjects.values():
            total += subject.average_grade()
            count += 1
        return total / count

class Gradebook:
    def __init__(self):
        self._students = defaultdict(Student)
    
    def get_student(self, name):
        return self._students[name]
      
book = Gradebook()
albert = book.get_student('Albert Einstein')
print(albert)
math = albert.get_subject('Math')
math.report_grade(80, 0.10)
math.report_grade(90, 0.20)
math.report_grade(70, 0.10)
gym = albert.get_subject('Gym')
gym.report_grade(100, 0.40)
gym.report_grade(85, 0.20)
gym.report_grade(75, 0.20)

print(albert.average_grade())

# 让简单的接口接受函数，而不是类的实例

内置API-挂钩函数（hook）

In [None]:
# hook  stateless function

names = ['Sara', 'Jen', 'Mike', 'David']
names.sort(key=len)
print(names)

In [None]:
def log_missing():
    print('Keys added')
    return 0

In [None]:
from collections import defaultdict

current = {'green': 12, 'blue': 3}
increments = [
    ('red', 5),
    ('blue', 17),
    ('orange', 9)
]
result = defaultdict(log_missing, current)
print('Before:', dict(result))
for key, amount in increments:
    result[key] += amount
print('After:', dict(result))

## 闭包

In [None]:
def increment_with_report(current, increments):
    added_count = 0

    def missing():
        nonlocal added_count
        added_count += 1
        return 0
    
    result = defaultdict(missing, current)
    for key, amount in increments:
        result[key] += amount
    
    return result, added_count

In [None]:
result, count = increment_with_report(current, increments)
print(result,count)

## 将闭包改成辅助类的形式

In [None]:
class CountMissing:
    def __init__(self):
        self.count = 0

    def missing(self):
        self.count += 1
        return 0

counter = CountMissing()
result = defaultdict(counter.missing)
for key, amount in increments:
    result[key] += amount
print(counter.count)

更好的实现

In [None]:
class BetterCountMissing:
    def __init__(self):
        self.count = 0

    def __call__(self):
        self.count += 1
        return 0

counter = BetterCountMissing()
assert counter() == 0
assert callable(counter)

# 通过@classmethod多态构造同一个体系中的各类对象

多态：同一个体系中的多个类可以按照各自独有的方式来实现同一个方法

实现一套MapReduce

In [None]:
class InputData:
    def read(self):
        raise NotImplementedError

## 实现

In [16]:
class PathInputData(InputData):
    
    def __init__(self, path):
        super().__init__()
        self.path = path
    
    def read(self):
        with open(self.path) as f:
            data = f.read()
        return data
    
class Worker:
  def __init__(self, input_data):
    self.input_data = input_data
    self.result = None
  
  def map(self):
    raise NotImplementedError
  
  def reduce(self, other):
    raise NotImplementedError

class LineCountWorker(Worker):
  
   def map(self):
     data = self.input_data.read()
     self.result = data.count('\n')
  
   def reduce(self, other):
      print(f'other.result: {other.result}')
      print(f'self.result: {self.result}')
      self.result += other.result

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 32)

In [None]:
import os

def generate_inputs(data_dir):
    for name in os.listdir(data_dir):
        yield PathInputData(os.path.join(data_dir, name))
            

def create_workers(input_list):
    workers = []
    for input_data in input_list:
        workers.append(LineCountWorker(input_data))
    return workers

In [None]:
from threading import Thread

def excute(workers):
    threads = [Thread(target=w.map) for w in workers]
    for thread in threads : thread.start()
    for thread in threads : thread.join()
    
    first, *rest = workers
    for worker in rest:
        first.reduce(worker)
    return first.result

In [None]:
def mapreduce(data_dir):
    inputs = generate_inputs(data_dir)
    print(inputs)
    workers = create_workers(inputs)
    return excute(workers)

In [None]:
import os
import random

def write_test_files(tempDir):
    os.makedirs(tempDir)
    for i in range(100):
        with open(os.path.join(tempDir, str(i)), "w") as f:
            f.write('\n' * random.randint(0, 100))

tempDir = 'test_inputs'
# write_test_files(tempDir)

result = mapreduce(tempDir)
print(f'There are {result} lines in {tempDir}')
