In [None]:
%matplotlib inline

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from sklearn.datasets import load_iris

In [None]:
X, y = load_iris(return_X_y=True)

In [None]:
len(set(y))

In [None]:
num_classes = _

In [None]:
f"We have {y.size} labeled examples across the following {num_classes} classes:\n{set(y)}\n"

In [None]:
X[:4], y[:4]

In [None]:
sample_size = int(y.size/4)

In [None]:
X_train, y_train = X[:sample_size], y[:sample_size]

In [None]:
f"Training set labels: {y_train}"

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
np.random.seed(0)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.25)

In [None]:
f"Training set labels: {y_train}"

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca_model = PCA()
transformed_data_2D = pca_model.fit_transform(X_train)

In [None]:
unlabeled_data = pca_model.transform(X_test)

In [None]:
plt.scatter(unlabeled_data[:, 0], unlabeled_data[:, 1], color='khaki', marker='^', label='test')
for label in range(3):
    data_subset = transformed_data_2D[y_train == label]
    plt.scatter(data_subset[:,0], data_subset[:,1], color=['r', 'k', 'b'][label], label=f'train: {label}')
plt.legend()

In [None]:
X_test.shape

In [None]:
X_train.shape

In [None]:
from sklearn.metrics.pairwise import euclidean_distances

In [None]:
distance_matrix = euclidean_distances(X_test, X_train)

In [None]:
distance_matrix

In [None]:
f_train, f_test = X_test[0], X[0]
distance = distance_matrix[0][0]
print(f"Our first test set feature is {f_train}")
print(f"Our first training set feature is {f_test}")
print(f"The Euclidean distance between the features is {distance:.2f}")

In [None]:
np.random.seed(6)
random_index = np.random.randint(y_test.size)

In [None]:
labeled_distances = distance_matrix[random_index]

In [None]:
labeled_distances

In [None]:
labeled_neighbors = np.argsort(labeled_distances)[:3]

In [None]:
labels = y_train[labeled_neighbors]

In [None]:
labels

In [None]:
from collections import Counter

In [None]:
top_label, count = Counter(labels).most_common()[0]

In [None]:
top_label

In [None]:
print(f"The 3 nearest neighbors of Point {random_index} have the " f"following labels:\n{labels}")
print(f"\nThe most common class label is {top_label}. It occurs {count} " "times.")

In [None]:
true_label = y_test[random_index]
print(f"The true class of Point {random_index} is {true_label}.")

In [None]:
labeled_neighbors

In [None]:
import networkx as nx

In [None]:
def generate_neighbor_graph(unlabeled_index, labeled_neighbors):
    G = nx.Graph()
    nodes = [(i, {'label': y_train[i]}) for i in labeled_neighbors]
    nodes.append((unlabeled_index, {'label': 'U'}))
    G.add_nodes_from(nodes)
    G.add_edges_from([(i, unlabeled_index) for i in labeled_neighbors])
    labels = y_train[labeled_neighbors]
    label_colors = ['pink', 'khaki', 'cyan']
    colors = [label_colors[y_train[i]] for i in labeled_neighbors] + ['k']
    labels = {i: G.nodes[i]['label'] for i in G.nodes}
    nx.draw(G, node_color=colors, labels=labels, with_labels=True)
    return G

In [None]:
np.random.seed(0)
generate_neighbor_graph(random_index, labeled_neighbors)

In [None]:
np.random.seed(0)
labeled_neighbors = np.argsort(labeled_distances)[:4]
G = generate_neighbor_graph(random_index, labeled_neighbors)

In [None]:
from collections import defaultdict

In [None]:
class_to_votes = defaultdict(int)

In [None]:
for node in G.neighbors(random_index):
    label = G.nodes[node]['label']
    distance = distance_matrix[random_index][node]
    num_votes = 1 / distance
    print(f"A data point with a label of {label} is {distance:.2f} units "
    f"away. It receives {num_votes:.2f} votes.")
    class_to_votes[label] += num_votes

In [None]:
for class_label, votes in class_to_votes.items():
    print(f"We counted {votes:.2f} votes for class {class_label}.")
top_class = max(class_to_votes.items(), key=lambda x: x[1])[0]
print(f"Class {top_class} has received the plurality of the votes.")

In [None]:
def predict(index, K=1, weighted_voting=False):
    labeled_distances = distance_matrix[index]
    labeled_neighbors = np.argsort(labeled_distances)[:K]
    class_to_votes = defaultdict(int)
    for neighbor in labeled_neighbors:
        label = y_train[neighbor]
        distance = labeled_distances[neighbor]
        num_votes = 1 / distance if weighted_voting else 1
        class_to_votes[label] += num_votes
    return max(class_to_votes, key=lambda x: class_to_votes[x])

In [None]:
assert predict(random_index, K=3) == 2

In [None]:
assert predict(random_index, K=3, weighted_voting=True) == 2

In [None]:
y_pred = np.array([predict(i) for i in range(y_test.size)])

In [None]:
def compute_confusion_matrix(y_pred, y_test):
    num_classes = len(set(y_pred) | set(y_test))
    confusion_matrix = np.zeros((num_classes, num_classes))
    for predition, actual in zip(y_pred, y_test):
        confusion_matrix[predition][actual] += 1
    return confusion_matrix

In [None]:
M = compute_confusion_matrix(y_pred, y_test)

In [None]:
M

In [None]:
sns.heatmap(
    M, 
    annot=True, 
    cmap='YlGnBu', 
    yticklabels=[f"Predict {i}" for i in range(3)], 
    xticklabels = [f"Actual {i}" for i in range(3)]
)
plt.yticks(rotation=0)

In [None]:
num_accurate_preditions = M.diagonal().sum()

In [None]:
print(f"Our results contain {int(num_accurate_preditions)} accurate " "predictions.")

In [None]:
accuracy = M.diagonal().sum() / M.sum()
assert accuracy == 104 / (104 + 9)
print(f"Our predictions are {100 * accuracy:.0f}% accurate.")

In [None]:
precision = M[1][1] / M[1].sum()

In [None]:
assert precision == 33 / 38

In [None]:
recall = M[1][1] / M[:, 1].sum()

In [None]:
assert recall == 33 / 37
print(f"Recall of Class 1 is {recall:.2f}")

In [None]:
inverse_average = (1/precision + 1/recall) / 2

In [None]:
f_measure = 2 * precision * recall / (precision + recall)
print(f"The f-measure of Class 1 is {f_measure:.2f}")

In [None]:
tp, fp, fn = 1, 1, 0
precision = tp / (tp + fp)
recall = tp / (tp + fn)
f_measure = 2 * precision * recall / (precision + recall)
average = (precision + recall) / 2
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"Average: {average}")
print(f"F-measure: {f_measure:.2f}")

In [None]:
def compute_f_measures(M):
    precision = M.diagonal() / M.sum(axis=0)
    recall = M.diagonal() / M.sum(axis=1)
    return 2*precision*recall / (precision + recall)

In [None]:
f_measures = compute_f_measures(M)

In [None]:
for class_label, f_measure in enumerate(f_measures):
    print(f"The f-measure for Class {class_label} is {f_measure:.2f}")

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
new_M = confusion_matrix(y_pred, y_test)

In [None]:
assert np.array_equal(M, new_M)

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
assert accuracy_score(y_pred, y_test) == accuracy

In [None]:
from sklearn.metrics import f1_score

In [None]:
new_f_measures = f1_score(y_pred, y_test, average=None)

In [None]:
assert np.array_equal(new_f_measures, f_measures)