# Bloomberg - Two chords

In [1]:
'''
-----------------------------
%%manim -ql -v WARNING Video
%%manim -qm -v WARNING Video
%%manim -qh -v WARNING Video
-----------------------------

^^^ THE CODE IS ABOVE EVERY CELL AND "qL", "qM", "qH" INDICATES LOW, MEDIUM OR HIGH RESOLUTION.

    USE LOW WHEN WRITING THE CODE, TESTING STUFF ETC. AND HIGH WHEN PRODUCING THE FINAL VERSION.
'''



# I - Code

In [2]:
from manim import *
import numpy as np
from sklearn.mixture import GaussianMixture

In [72]:
%%manim -qh -v WARNING --disable_caching GMMClustering
from manim import *
import numpy as np
from sklearn.mixture import GaussianMixture
from sklearn.cluster import KMeans

class GMMClustering(Scene):
    def generate_data(self, n=900):
        np.random.seed(42)
        n_per_cluster = n // 3

        def generate_diagonal_cluster(center, direction, noise_scale, count):
            t = np.linspace(-1, 1, count)
            direction = direction / np.linalg.norm(direction)
            points = center + np.outer(t, direction)
            noise = np.random.normal(scale=noise_scale, size=points.shape)
            return points + noise

        C1 = generate_diagonal_cluster(np.array([1, 8]), np.array([-3, -5]), 0.2, n_per_cluster)
        C2 = generate_diagonal_cluster(np.array([2, 7]), np.array([-3, -5]), 0.2, n_per_cluster)
        C3 = generate_diagonal_cluster(np.array([3, 6]), np.array([-3, -5]), 0.2, n_per_cluster)


        return np.vstack([C1, C2, C3])

    def draw_points_with_aura(self, data, labels, colors):
        group = VGroup()
        for i, (x, y) in enumerate(data):
            color = colors[labels[i]]
            glow = Dot(point=[x, y, 0], radius=0.1, color=color, stroke_width=0)
            glow.set_fill(color, opacity=0.15)
            point = Dot(point=[x, y, 0], radius=0.03, color=color)
            group.add(glow, point)
        return group

    def draw_gaussians(self, gmm, colors, layers=5, alpha_start=0.5):
        gaussians = VGroup()
        for i, (mean, covar) in enumerate(zip(gmm.means_, gmm.covariances_)):
            vals, vecs = np.linalg.eigh(covar)
            order = vals.argsort()[::-1]
            vals = vals[order]
            vecs = vecs[:, order]
            theta = np.arctan2(*vecs[:, 0][::-1])

            for j in range(1, layers + 1):
                scale = j / layers
                ellipse = Circle(radius=1.0)
                ellipse.stretch_to_fit_width(2 * np.sqrt(vals[0]) * scale)
                ellipse.stretch_to_fit_height(2 * np.sqrt(vals[1]) * scale)
                ellipse.rotate(theta)
                ellipse.move_to([*mean, 0])

                fill_opacity = alpha_start * (1 - scale * 0.7)
                fill_color = interpolate_color(WHITE, colors[i], 0.4)
                stroke_color = interpolate_color(colors[i], BLACK, 0.4)

                ellipse.set_fill(color=fill_color, opacity=fill_opacity)
                ellipse.set_stroke(color=stroke_color, width=1.5)

                gaussians.add(ellipse)

        return gaussians

    def create_legend(self, colors):
        entries = VGroup()
        for i, color in enumerate(colors):
            dot = Dot(color=color).scale(0.8)
            label = Text(f"Cluster {i+1}", font_size=24, color=color)
            entry = VGroup(dot, label).arrange(RIGHT, buff=0.3)
            entries.add(entry)
        legend = entries.arrange(DOWN, aligned_edge=LEFT)
        return legend

    def construct(self):
        data = self.generate_data()
        colors = [GREEN, ORANGE, BLUE]

        # GMM clustering
        gmm = GaussianMixture(n_components=3, covariance_type='full', random_state=42)
        gmm.fit(data)
        gmm_labels = gmm.predict(data)

        gmm_points = self.draw_points_with_aura(data, gmm_labels, colors)
        gmm_ellipses = self.draw_gaussians(gmm, colors)
        gmm_legend = self.create_legend(colors).next_to(gmm_points, DOWN, buff=0.5)
        gmm_title = Text("GMM Clustering", font_size=28).next_to(gmm_points, UP, buff=0.6)

        gmm_group = VGroup(gmm_points, gmm_ellipses, gmm_legend, gmm_title).move_to(ORIGIN)

        self.play(FadeIn(gmm_points), run_time=2)
        self.play(FadeIn(gmm_ellipses), run_time=2)
        self.play(FadeIn(gmm_legend), Write(gmm_title))
        self.wait(3)

        # Dezoom and shift GMM to the left
        self.play(gmm_group.animate.scale(0.8).shift(LEFT * 4), run_time=1)

        # K-Means clustering
        kmeans = KMeans(n_clusters=3, random_state=42)
        kmeans_labels = kmeans.fit_predict(data)

        kmeans_points = self.draw_points_with_aura(data, kmeans_labels, colors)
        kmeans_legend = self.create_legend(colors).next_to(kmeans_points, DOWN, buff=0.5)
        kmeans_title = Text("K-Means Clustering", font_size=28).next_to(kmeans_points, UP, buff=0.6)

        kmeans_group = VGroup(kmeans_points, kmeans_legend, kmeans_title).scale(0.8).next_to(gmm_group, RIGHT, buff=1.0)

        self.play(FadeIn(kmeans_points), FadeIn(kmeans_legend), Write(kmeans_title), run_time=5)
        self.wait(10)

#         # Conclusion Box
#         conclusion_text = """Why GMM is better here:

# • GMM captures
#   elongated shapes
#   (using full covariance)

# • It handles
#   orientation &
#   overlap well.

# • K-Means expects
#   spherical, equal-size
#   clusters."""

#         conclusion_lines = Text(conclusion_text, font_size=16, line_spacing=1.2)
#         conclusion_box = SurroundingRectangle(conclusion_lines, color=BLUE_B, buff=0.2, corner_radius=0.2)
#         conclusion_group = VGroup(conclusion_box, conclusion_lines).next_to(kmeans_group, RIGHT, buff=1.5)


#         self.play(FadeIn(conclusion_group), run_time=3)
#         self.wait(15)

                                                                                                        