# Cheat Sheet

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# No np.linalg.norm
# No np.linalg.lstsq

### K-means clustering in 2D for 2 means, 5 iterations

In [None]:
df = pd.DataFrame() # Unclassified DataFrame with an x1 and x2 column

x1 = df['x1'] # Extract columns as vars (not strictly necessary)
x2 = df['x2']

mean_1 = np.array([-1, -1]) # Input starting means, add other means here for larger K
mean_2 = np.array([2, 1])


def distance(x1, x2, target): # Vectorised distance between a list of points (separated into x1 and x2) and a target point
    return np.sqrt((x1-target[0])**2 + (x2-target[1])**2) # L2 norm

for _ in range(5): # Change here for more iterations
    df["class"] = np.where(distance(x1, x2, mean_1) < distance(x1, x2, mean_2), 1, 2) # Vectorised class column creation based on minimum distance to means
    c1_df = df[df["class"] == 1] # DF with class 1 points only
    c2_df = df[df["class"] == 2] # DF with class 2 points only

    mean_1 = np.array([c1_df["x1"].mean(), c1_df["x2"].mean()]) # Update starting means with the average point of each class
    mean_2 = np.array([c2_df["x1"].mean(), c2_df["x2"].mean()])

    x1_mean = np.array([mean_1[0], mean_2[0]]) # Get separate arrays of x1 and x2 points of the means for plotting
    x2_mean = np.array([mean_1[1], mean_2[1]]) 

    plt.scatter(x1, x2, c=df["class"]) # Scatter plot coloured by class
    plt.scatter(x1_mean, x2_mean, color="red") # Plot means
    plt.show()
    print(f"{mean_1=}, {mean_2=}") # Print means

