In [None]:
from manim import *
import numpy as np

class PCAWithFormulas(ThreeDScene):
    def construct(self):
        # ---------------- 1️⃣ Generate synthetic data ----------------
        np.random.seed(1)
        data = np.random.randn(30, 2) @ np.array([[2, 0.5], [0.5, 1]])  # elongated cluster
        dots = VGroup(*[
            Dot3D(point=np.array([pt[0], pt[1], 0]) * 1.5, radius=0.12, color=BLUE)
            for pt in data
        ])
        self.play(FadeIn(dots))
        self.wait(1)

        # ---------------- 2️⃣ Show mean ----------------
        mean = np.mean(data, axis=0)
        mean_3d = np.array([mean[0], mean[1], 0])
        mean_dot = Dot3D(point=mean_3d * 1.5, color=RED, radius=0.15)
        self.play(FadeIn(mean_dot))
        self.wait(1)

        # ---------------- 3️⃣ Show formula for mean ----------------
        mean_formula = MathTex(r"\mu = \frac{1}{N}\sum_{i=1}^N x_i").to_edge(UP)
        self.play(Write(mean_formula))
        self.wait(2)

        # ---------------- 4️⃣ Center the data ----------------
        centered_data = data - mean

        # ---------------- 5️⃣ Show formula for covariance ----------------
        cov_formula = MathTex(r"\Sigma = \frac{1}{N}\sum_{i=1}^N (x_i - \mu)(x_i - \mu)^T").next_to(mean_formula, DOWN)
        self.play(Write(cov_formula))
        self.wait(2)

        # ---------------- 6️⃣ Compute PCs ----------------
        cov = np.cov(centered_data.T)
        eigvals, eigvecs = np.linalg.eigh(cov)
        idx = np.argsort(eigvals)[::-1]
        pc1 = eigvecs[:, idx[0]]
        pc2 = eigvecs[:, idx[1]]

        pc1_3d = np.array([pc1[0], pc1[1], 0])
        pc2_3d = np.array([pc2[0], pc2[1], 0])

        # ---------------- 7️⃣ Show formula for eigenvectors ----------------
        eig_formula = MathTex(r"\Sigma v_k = \lambda_k v_k").next_to(cov_formula, DOWN)
        self.play(Write(eig_formula))
        self.wait(2)

        # ---------------- 8️⃣ Draw principal component lines ----------------
        pc1_line = Line(
            start=mean_3d * 1.5 - pc1_3d * 3,
            end=mean_3d * 1.5 + pc1_3d * 3,
            color=YELLOW, stroke_width=4
        )
        pc2_line = Line(
            start=mean_3d * 1.5 - pc2_3d * 2,
            end=mean_3d * 1.5 + pc2_3d * 2,
            color=ORANGE, stroke_width=4
        )
        self.play(Create(pc1_line), Create(pc2_line))
        self.wait(2)

        # ---------------- 9️⃣ Project points onto PC1 ----------------
        projected_points = []
        for pt in data:
            diff = pt - mean
            proj = mean + np.dot(diff, pc1) * pc1
            proj_3d = np.array([proj[0], proj[1], 0])
            projected_points.append(proj_3d)

        proj_dots = VGroup(*[Dot3D(point=pt*1.5, color=GREEN, radius=0.12) for pt in projected_points])
        animations = [dots[i].animate.move_to(proj_dots[i].get_center()) for i in range(len(dots))]
        self.play(*animations, run_time=3)
        self.wait(2)

        # ---------------- 🔟 Show projection formula ----------------
        proj_formula = MathTex(r"y_i = V^T (x_i - \mu)").to_edge(DOWN)
        self.play(Write(proj_formula))
        self.wait(2)


%manim -ql -v ERROR PCAWithFormulas