<a href="https://colab.research.google.com/github/sanhiitaa/prompt-engineering-projects/blob/main/L2-regularization-manim/FINAL_DRAFT_l2_normalization_manim_prompt_engineering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install dependencies
!apt update
!apt install -y libcairo2-dev libjpeg-dev libpango1.0-dev ffmpeg
!apt install -y texlive texlive-latex-extra texlive-fonts-extra

# Install manim
!pip install manim


In [None]:
from manim import *
import numpy as np

class RidgeRegressionAnimation(Scene):
    def construct(self):
        # Scene 1: Title page
        self.first_page()

        # Scene 2: Overfitting in Regression
        self.show_overfitting()

        # Scene 3: Regularization to Overcome Overfitting
        self.show_regularization_intro()

        # Scene 4: L2 Regularization
        self.show_l2_regularization()

        # Scene 5: Ridge Regression
        self.show_ridge_regression()

        # Scene 6: Ridge Regression Part2
        self.ridge_regression_p2()

        # Scene 7: Last page
        self.last_page()

        # Scene 8: Screeshot this!
        self.screenshot_this()

    def plot_data_points(self):
        np.random.seed(42)
        x = np.random.rand(8) * 10
        y = 0.5 * x + np.random.randn(8) * 2 + 3
        points = np.array([x, y]).T
        axes = Axes(
            x_range=[0, 10, 1], y_range=[0, 10, 1],
            axis_config={"include_numbers": True}
        )
        scatter_points = VGroup(*[Dot(axes.c2p(px, py), color=YELLOW) for px, py in points])
        return axes, scatter_points, x, y

    def plot_new_data_points(self):
        np.random.seed(43)
        x_new = np.random.rand(8) * 10
        y_new = 0.5 * x_new + np.random.randn(8) * 2 + 3
        points_new = np.array([x_new, y_new]).T
        axes_new = Axes(
            x_range=[0, 10, 1], y_range=[0, 10, 1],
            axis_config={"include_numbers": True}
        )
        scatter_points_new = VGroup(*[Dot(axes_new.c2p(px, py), color=RED) for px, py in points_new])
        return axes_new, scatter_points_new, x_new, y_new

    def first_page(self):
        title= Text("L2 Regularization | Ridge Regression")
        self.play(Write(title))
        self.wait(3)
        self.clear()

    def show_overfitting(self):
        title = Text("Why do we need regularization?").to_edge(UP)
        self.play(Write(title))

        subtitle = Text('because: Overfitting', font_size=17, line_spacing=1.5).next_to(title, DOWN)
        self.play(Write(subtitle))

        description = Text(
            "Linear regression predicts a continuous value.\n"
            "When a machine learning model memorizes training data too well and performs poorly\n"
            "on unseen data, we say the model is overfitting on the training data.",
            font_size=20, line_spacing=2.0
        ).next_to(subtitle, DOWN)

        self.play(Write(description))

        # Training data with overfit curve
        axes, scatter_points, x, y = self.plot_data_points()
        degree = len(x) - 1
        coefs = np.polyfit(x, y, degree)
        overfit_poly = np.poly1d(coefs)
        overfit_line = axes.plot(overfit_poly, color=RED, x_range=[min(x), max(x)])

        self.play(Create(axes), Create(scatter_points))
        self.play(Create(overfit_line))

        graph = VGroup(axes, scatter_points, overfit_line)
        self.play(graph.animate.scale(0.3).shift(LEFT * 3 + DOWN * 1.5))

        # New data with the same overfit curve
        axes_new, scatter_points_new, x_new, y_new = self.plot_new_data_points()
        overfit_line_new = axes_new.plot(overfit_poly, color=RED, x_range=[min(x_new), max(x_new)])

        self.play(Create(axes_new), Create(scatter_points_new))
        self.play(Create(overfit_line_new))

        graph_new = VGroup(axes_new, scatter_points_new, overfit_line_new)
        self.play(graph_new.animate.scale(0.3).shift(RIGHT * 3 + DOWN * 1.5))

        self.wait(5)
        self.clear()

    def show_regularization_intro(self):
        title = Text("Regularization to Overcome Overfitting").to_edge(UP)
        self.play(Write(title))

        description = Text(
            "Regularization is a technique used in machine learning\n"
            "to prevent models from overfitting the training data\n"
            "and improve their generalizability to unseen data.\n"
            "Regularization comes in various forms, with L1 and L2 being the most prominent.",
            font_size=20, line_spacing=2.0
        ).move_to(ORIGIN)

        self.play(Write(description))
        self.wait(5)
        self.clear()

    def show_l2_regularization(self):
        title = Text("L2 Regularization").to_edge(UP)
        self.play(Write(title))

        description = Text(
            "L2 regularization shrinks the weights/coefficients of the model.\n"
            "It penalizes large coefficients, making the regression line\n"
            "smoother and less prone to fitting noise in the data.",
            font_size=20, line_spacing=2.0
        ).move_to(ORIGIN)

        self.play(Write(description))
        self.wait(5)
        self.clear()

    def show_ridge_regression(self):
        title = Text("Ridge Regression").to_edge(UP)
        self.play(Write(title))

        description = Text(
            "Ridge Regression uses L2 regularization.\n"
            "It adds an L2 penalty term to the loss function,\n"
            "leading to a simpler model that generalizes better to unseen data.",
            font_size=20, line_spacing=2.0
        ).move_to(ORIGIN)

        self.play(Write(description))
        self.wait(5)
        self.clear()

    def ridge_regression_p2(self):
        l2_formula = MathTex(r"\text{Cost} = \sum_{i=1}^n (y_i - \hat{y}_i)^2 + \lambda \sum_{j=1}^p w_j^2").to_edge(UP)

        form_exp= Text(
            "By providing a low value of lambda in Ridge Regression,\n"
            "large weight values have less influence, resulting in a \n"
            "less biased and more flexible model.",
            font_size=17, line_spacing=1.5).next_to(l2_formula, DOWN)


        self.play(Write(l2_formula))
        self.play(Write(form_exp))

        axes, scatter_points, x, y = self.plot_data_points()

        # Fit a linear regression model with L2 normalization
        X = np.column_stack([x, np.ones_like(x)])  # Add a column of ones for bias term
        lambda_param = 0.1
        w = np.linalg.inv(X.T @ X + lambda_param * np.eye(X.shape[1])) @ X.T @ y
        ridge_line = axes.plot(lambda x: w[0] * x + w[1], color=GREEN, x_range=[0, 10])

        self.play(Create(axes), Create(scatter_points))
        self.play(Create(ridge_line))

        graph = VGroup(axes, scatter_points, ridge_line)
        self.play(graph.animate.scale(0.4).shift(DOWN))

        self.wait(5)
        self.clear()

    def last_page(self):
        title = Text("Question for you!").to_edge(UP)
        self.play(Write(title))
        subtitle= Text("Pause and answer in comments :)", font_size=16, line_spacing=1.5).next_to(title,DOWN)
        self.play(Write(subtitle))

        question= Text("How does increasing the value of lambda affect the strictness of regularization in ridge regression? \n(Select one of the below options)\n"
                        "\n1. A higher value of lambda makes the regularization stricter, penalizing large coefficients more heavily.\n"
                        "\n2. A higher value of lambda makes the regularization more lenient, allowing larger coefficients.\n",
                       font_size=18, line_spacing=1.5).move_to(ORIGIN)
        self.play(Write(question))

        self.wait(7)
        self.clear()

    def screenshot_this(self):

        # Title
        title = Text("Screenshot this for future reference").to_edge(UP)

        # Table Data
        table_data = [
            ["Low (e.g., 0.01)", "Less biased, more flexible model", "Large weight values have less influence"],
            ["High (e.g., 10)", "More biased, stricter model", "Large weight values are heavily penalized"],
        ]

        # Create Table
        table = Table(
            table_data,
            col_labels=[Text(col) for col in ["Lambda (λ) Value", "Model Characteristics", "Impact on Coefficients"]],
            top_left_entry=Text("Parameter").set_color(BLUE)
        )

        # Adjust Table Position
        table.scale(0.4)
        table.move_to(ORIGIN)

        # Add Title and Table to Scene
        self.play(Write(title))
        self.wait(1)
        self.play(Create(table))
        self.wait(7)
        self.clear()


if __name__ == "__main__":
    from manim import config
    config.media_width = "100%"
    scene = RidgeRegressionAnimation()
    scene.render()
