In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
import seaborn as sns
from itertools import product
from matplotlib import animation
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
from sklearn.linear_model import LinearRegression

mpl.rc('animation', html='html5')
plt.style.use(style="seaborn-whitegrid")
plt.rc("figure", autolayout=True)
plt.rc(
    "axes",
    labelweight="bold",
    labelsize="large",
    titleweight="bold",
    titlesize=14,
    titlepad=10,
)

In [None]:
def initialize_centroids(points, k):
    """returns k centroids from the initial points"""
    centroids = points.copy()
    np.random.shuffle(centroids)
    return centroids[:k]


def closest_centroid(points, centroids):
    """returns an array containing the index to the nearest centroid for
each point"""
    distances = np.sqrt(((points - centroids[:, np.newaxis]) ** 2).sum(axis=2))
    return np.argmin(distances, axis=0)


def move_centroids(points, closest, centroids):
    """returns the new centroids assigned from the points closest to them"""
    return np.array(
        [points[closest == k].mean(axis=0) for k in range(centroids.shape[0])]
    )


def make_grid(xlim, ylim, xnum=1000, ynum=1000):
    xs = np.linspace(*xlim, num=xnum)
    ys = np.linspace(*ylim, num=ynum)
    points = np.zeros(shape=[xnum * ynum, 2])
    for n, (i, j) in enumerate(product(range(xnum), range(ynum))):
        x = xs[i]
        y = ys[j]
        points[n, :] = [x, y]
    return points


def make_tessallation(grid, centroids, xnum=1000, ynum=1000):
    clusters = closest_centroid(grid, centroids)
    points = np.zeros(shape=[xnum, ynum])
    for n, (i, j) in enumerate(product(range(xnum), range(ynum))):
        points[j, i] = clusters[n]
    return points

# ffmpeg -i kmeans-ani.mp4 -filter_complex "[0:v] fps=12,scale=480:-1,split [a][b];[a] palettegen [p];[b][p] paletteuse" kmeans.gif

In [None]:
points, _ = make_blobs(
    cluster_std=4.0, n_samples=1000, n_features=2, random_state=1,
)
centroids = initialize_centroids(points, 7)
closest = closest_centroid(points, centroids)
xlim = (points[:, 0].min(), points[:, 0].max())
ylim = (points[:, 1].min(), points[:, 1].max())
num = 1000
xs = np.linspace(*xlim, num)
ys = np.linspace(*ylim, num)

colors = sns.color_palette()
fig = plt.figure(figsize=(10, 8))
ax = plt.axes()


def init():
    return [fig]


def animate(i):
    global centroids
    global closest
    centroids = move_centroids(points, closest, centroids)
    closest = closest_centroid(points, centroids)
    grid = make_grid(xlim, ylim, num, num)
    tes = make_tessallation(grid, centroids)
    ax.cla()
    ax.contourf(xs, ys, tes, alpha=0.15, colors=colors)
    cs = [colors[k] for k in closest]
    ax.scatter(points[:, 0], points[:, 1], c=cs, s=15)
    ax.scatter(centroids[:, 0], centroids[:, 1], c="k", edgecolor="w", marker="X", s=150)
    ax.contour(xs, ys, tes, colors="k")
    ax.set_xticklabels("")
    ax.set_yticklabels("")
    return [fig]


ani = animation.FuncAnimation(
    fig, animate, init_func=init, frames=40, interval=200, blit=True
)
plt.close()
ani

In [None]:
df = pd.read_csv('../input/fe-course-data/airbnb.csv')
points = df.loc[:, ["longitude", "latitude"]]
points = points.to_numpy()
centroids = initialize_centroids(points, 6)
closest = closest_centroid(points, centroids)
xlim = (points[:, 0].min(), points[:, 0].max())
ylim = (points[:, 1].min(), points[:, 1].max())
num = 1000
xs = np.linspace(*xlim, num)
ys = np.linspace(*ylim, num)

colors = sns.color_palette()
fig = plt.figure(figsize=(10, 8))
ax = plt.axes()

def init():
    return [fig]


def animate(i):
    global centroids
    global closest
    centroids = move_centroids(points, closest, centroids)
    closest = closest_centroid(points, centroids)
    grid = make_grid(xlim, ylim, num, num)
    tes = make_tessallation(grid, centroids)
    ax.cla()
    ax.contourf(xs, ys, tes, alpha=0.15, colors=colors)
    cs = [colors[k] for k in closest]
    ax.scatter(points[:, 0], points[:, 1], c=cs, s=15)
    ax.scatter(centroids[:, 0], centroids[:, 1], c="k", edgecolor="w", marker="X", s=150)
    ax.contour(xs, ys, tes, colors="k")
    ax.set_xticklabels("")
    ax.set_yticklabels("")
    return [fig]

ani = animation.FuncAnimation(
    fig, animate, init_func=init, frames=40, interval=200, blit=True
)
plt.close()
ani

In [None]:
ames = pd.read_csv("../input/fe-course-data/ames.csv")
points = ames.loc[:, ["Longitude", "Latitude"]]
points = points.to_numpy()
centroids = initialize_centroids(points, 6)
closest = closest_centroid(points, centroids)
xlim = (points[:, 0].min(), points[:, 0].max())
ylim = (points[:, 1].min(), points[:, 1].max())
num = 1000
xs = np.linspace(*xlim, num)
ys = np.linspace(*ylim, num)

# fig = plt.figure(dpi=100)
# ax = plt.axes()
colors = sns.color_palette()
fig = plt.figure(figsize=(8, 10))
ax = plt.axes()

def init():
    return [fig]


def animate(i):
    global centroids
    global closest
    centroids = move_centroids(points, closest, centroids)
    closest = closest_centroid(points, centroids)
    grid = make_grid(xlim, ylim, num, num)
    tes = make_tessallation(grid, centroids)
    ax.cla()
    ax.contourf(xs, ys, tes, alpha=0.15, colors=colors)
    cs = [colors[k] for k in closest]
    ax.scatter(points[:, 0], points[:, 1], c=cs, s=15)
    ax.scatter(centroids[:, 0], centroids[:, 1], c="k", edgecolor="w", marker="X", s=150)
    ax.contour(xs, ys, tes, colors="k")
    ax.set_xticklabels("")
    ax.set_yticklabels("")
    return [fig]

ani = animation.FuncAnimation(
    fig, animate, init_func=init, frames=20, interval=200, blit=True
)
plt.close()
ani

In [None]:
housing = pd.read_csv("../input/fe-course-data/housing.csv")
points = housing.loc[:, ["Longitude", "Latitude"]]
points = points.to_numpy()
centroids = initialize_centroids(points, 6)
closest = closest_centroid(points, centroids)
xlim = (points[:, 0].min(), points[:, 0].max())
ylim = (points[:, 1].min(), points[:, 1].max())
num = 1000
xs = np.linspace(*xlim, num)
ys = np.linspace(*ylim, num)

# fig = plt.figure(dpi=100)
# ax = plt.axes()
colors = sns.color_palette()
fig = plt.figure(figsize=(8, 10))
ax = plt.axes()

def init():
    return [fig]


def animate(i):
    global centroids
    global closest
    centroids = move_centroids(points, closest, centroids)
    closest = closest_centroid(points, centroids)
    grid = make_grid(xlim, ylim, num, num)
    tes = make_tessallation(grid, centroids)
    ax.cla()
    ax.contourf(xs, ys, tes, alpha=0.15, colors=colors)
    cs = [colors[k] for k in closest]
    ax.scatter(points[:, 0], points[:, 1], c=cs, s=15)
    ax.scatter(centroids[:, 0], centroids[:, 1], c="k", edgecolor="w", marker="X", s=150)
    ax.contour(xs, ys, tes, colors="k")
    ax.set_xticklabels("")
    ax.set_yticklabels("")
    return [fig]

ani = animation.FuncAnimation(
    fig, animate, init_func=init, frames=40, interval=200, blit=True
)
plt.close()
ani