In [2]:
from typing import List
from collections import Counter

def raw_majority_vote(labels: List[str]) -> str:
    votes = Counter(labels)
    winner, _ = votes.most_common(1)[0]
    return winner

In [3]:
assert raw_majority_vote(['a', 'b', 'c', 'b']) == 'b'

In [4]:
def majority_vote(labels: List[str]) -> str:
    #labels는 가장 가까운 데이터부터 가장 먼 데이터 순서로 정렬되어 있다고 가정
    vote_counts = Counter(labels)
    winner, winner_count = vote_counts.most_common(1)[0]
    num_winners = len([count for count in vote_counts.values()
                       if count == winner_count])
    if num_winners == 1:
        return winner
    else:
        return majority_vote(labels[:-1])

In [5]:
assert majority_vote(['a', 'b', 'c', 'b', 'a']) == 'b'

In [6]:
import requests

data = requests.get(
    'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
)
with open('iris.dat', 'w') as f:
    f.write(data.text)

In [7]:
from typing import NamedTuple
from scratch.linear_algebra import Vector,distance

class LabeledPoint(NamedTuple):
    point: Vector
    label: str

def knn_classify(k: int,
                 labeled_points: List[LabeledPoint],
                 new_point: Vector) -> str:

    # Order the labeled points from nearest to farthest.
    by_distance = sorted(labeled_points,
                         key=lambda lp: distance(lp.point, new_point))

    # Find the labels for the k closest
    k_nearest_labels = [lp.label for lp in by_distance[:k]]

    # and let them vote.
    return majority_vote(k_nearest_labels)

In [7]:
from typing import Dict
import csv
from collections import defaultdict

