In [1]:
from manim import *
from scipy.stats import norm
import numpy as np
import random
import math
import pandas as pd

def gen_dataset(n: int = 50) -> pd.DataFrame:
    groups = ["A", "B", "C"]
    
    grps = random.choices(groups, weights=[0.4, 0.4, 0.2], k=n)
    z_xs = np.vectorize(lambda x: norm.ppf(max(1e-9, x)))(np.random.rand(n))
    z_ys = np.vectorize(lambda x: norm.ppf(max(1e-9, x)))(np.random.rand(n))
    
    x_noise = np.vectorize(lambda x: 0.01*norm.ppf(max(1e-9, x)))(np.random.rand(n))
    y_noise = np.vectorize(lambda x: 0.01*norm.ppf(max(1e-9, x)))(np.random.rand(n))
    
    df = pd.DataFrame({"x": z_xs, "y": z_ys, "grp": grps, "x_noise": x_noise, "y_noise": y_noise})
    
    df['label'] = np.where(df['x'] - df['y'] < 1, -1, 1)
    
    return df


data = gen_dataset()

print(data.groupby(by=["grp", "label"]).size())

grp  label
A    -1       18
      1       10
B    -1        9
      1        6
C    -1        6
      1        1
dtype: int64


In [7]:
%%manim -qm -v WARNING Introduction

dots_grp = {
   "A": [],
   "B": [], 
   "C": []
}

label_grp = {
   -1: [],
   1: []
}

grp_map = {
   "A": RED,
   "B": BLUE,
   "C": GREEN
}

class Introduction(Scene):
   def construct(self):
      ax = Axes(
         x_range=[-5, 5, 1],
         y_range=[-5, 5, 1],
         x_length=6.5,
         y_length=6.5,
         tips=False,
         axis_config={"include_numbers": False}
      )
      
      for row in data.iterrows():
         x, y, g, lbl = row[1]['x'], row[1]['y'], row[1]['grp'], row[1]['label']
         d = Dot(ax.c2p(x, y, 0), radius=0.04, color=grp_map[g])
         dots_grp[g].append(d)
         label_grp[lbl].append(d)
      
      group_a_tex = Tex(r"$\mathbb{A}$", font_size=80, color=RED).to_edge(UR)
      group_b_tex = Tex(r"$\mathbb{B}$", font_size=80, color=BLUE).next_to(group_a_tex, DOWN)
      group_c_tex = Tex(r"$\mathbb{C}$", font_size=80, color=GREEN).next_to(group_b_tex, DOWN)
      
      group_frameboxes = [
         SurroundingRectangle(group_a_tex, buff = .1),
         SurroundingRectangle(group_b_tex, buff = .1),
         SurroundingRectangle(group_c_tex, buff = .1)
      ]
      
      positive_tex = Tex(r"Label:  1", color=ORANGE, font_size=60).to_edge(UL)
      negative_tex = Tex(r"Label: -1", color=TEAL_A, font_size=60).next_to(positive_tex, DOWN)
      
      label_frameboxes = [
         SurroundingRectangle(positive_tex, buff=.1),
         SurroundingRectangle(negative_tex, buff=.1)
      ]
      
      graph = ax.plot(lambda x: x - 1, x_range=[-5, 5], use_smoothing=False, color=YELLOW)
      graph_label = ax.get_graph_label(graph, r"\text{Decision Boundary}", x_val=5, direction=UR)
      
      # introduce A,B,C datasets
      self.play(Write(VGroup(ax, group_a_tex, group_b_tex, group_c_tex)))
      self.wait(4)
      
      self.play(Write(group_frameboxes[0]))
      self.play(Create(VGroup(*dots_grp["A"])))
      self.wait()
      
      self.play(ReplacementTransform(group_frameboxes[0], group_frameboxes[1]), Create(VGroup(*dots_grp["B"])))
      self.wait()
      
      self.play(ReplacementTransform(group_frameboxes[1], group_frameboxes[2]), Create(VGroup(*dots_grp["C"])))
      self.wait()
      
      self.play(FadeOut(group_frameboxes[2], group_a_tex, group_b_tex, group_c_tex))

      # introduce classifier labels
      self.play(Write(VGroup(positive_tex, negative_tex)))
      self.play(Write(label_frameboxes[0]))
      self.play(FadeToColor(VGroup(*label_grp[1]), color=ORANGE))
      
      self.play(ReplacementTransform(label_frameboxes[0], label_frameboxes[1]), FadeToColor(VGroup(*label_grp[-1]), color=TEAL_A))
      self.play(FadeOut(label_frameboxes[1], positive_tex, negative_tex))
      self.wait(2)
      
      self.play(Create(graph))
      self.play(Write(graph_label))
      
      self.wait()
      
      self.play(FadeOut(graph_label, graph))
      
      
      


                                                                                                               

In [26]:
%%manim -qm -v WARNING IndTable

class IndTable(Scene):
      def get_perc(self, d: pd.Dataframe, y=None, grp=None):
            pass
      
      def construct(self):
            # table for showing %s across groups
            n = data.shape[0]
            data[data['label'] == -1].shape[0] 
            t0 = Table(
                  [[str(data[data['label'] == -1].shape[0]), "Second", "A", "C"],
                  ["Third","Fourth", "A", "A"],
                  ],
                  row_labels=[
                        Text("% -1"), 
                        Text("%  1")
                  ],
                  col_labels=[
                        Text("All"), 
                        Tex(r"$\mathbb{A}$", color=RED, font_size=80), 
                        Tex(r"$\mathbb{B}$", color=BLUE, font_size=80), 
                        Tex(r"$\mathbb{C}$", color=GREEN, font_size=80)
                  ])

            self.play(Write(t0))

            self.wait()

                                                                                 