In [1]:
# Changing the working directory
import os
abspath = os.path.abspath("./src")
dname = os.path.dirname(abspath)
os.chdir(dname)

# Dependencies

In [2]:
from manim import *

import numpy as np
import pandas as pd
import torch
from copy import deepcopy
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import sentencepiece as spm
from sklearn.decomposition import PCA
plt.style.use('dark_background')

In [3]:
from src.processing import src_processing
from src.transformer import Transformer

from utils import to_tokens, animate_emb, show_emb, animate_attn, remove_invisible_chars, NN

# Positional Encoding

In [4]:
shakespeare = spm.SentencePieceProcessor(model_file="../trained_models/tokenizer/shakespeare_en.model")
modern = spm.SentencePieceProcessor(model_file="../trained_models/tokenizer/modern_en.model")

model = Transformer(emb=256, heads=8, max_seq_len=256, 
                    src_vocab_len=shakespeare.vocab_size(), trg_vocab_len=modern.vocab_size(),
                    num_layers=2)
model.load_state_dict(torch.load("../trained_models/nn_models/model_emb256_heads8_layers2_ep0.pt", map_location=torch.device('cpu')))

<All keys matched successfully>

In [5]:
src_toy_1 = ["boy thanked king"]
src_toy_2 = ["king thanked boy"]

# Encoding src
src_id_toy_1 = shakespeare.EncodeAsIds(list(src_toy_1))

# Tokenized src
src_toy_1_tok = to_tokens(src_toy_1, shakespeare, 256, "src")[:len(src_id_toy_1[0])]

src_id_toy_1 = [src_processing(id, 256) for id in src_id_toy_1] 

# Moving everything to torch tensors
src_id_toy_1 = torch.tensor(src_id_toy_1)

# Encoding src
src_id_toy_2 = shakespeare.EncodeAsIds(list(src_toy_2))

# Tokenized src
src_toy_2_tok = to_tokens(src_toy_2, shakespeare, 256, "src")[:len(src_id_toy_2[0])]

src_id_toy_2 = [src_processing(id, 256) for id in src_id_toy_2] 

# Moving everything to torch tensors
src_id_toy_2 = torch.tensor(src_id_toy_2)

In [6]:
%%manim -qk -v WARNING PosEnc

class PosEnc(Scene):
    def Linear(self):
        # Input Layer
        input_layer = VGroup(Circle(color=BLUE))
        for neuron in range(1, 10) :
            input_layer.add(Circle(color=BLUE).next_to(input_layer[-1], DOWN).shift(0.25*DOWN))
            if neuron == 3 or neuron == 6:
                input_layer.add(Tex(r"\vdots", color=BLUE).scale(3).next_to(input_layer[-1], DOWN).shift(0.25*DOWN))
        input_box = SurroundingRectangle(input_layer, color=WHITE)

        # Output Layer
        output_layer = VGroup(Circle(color=BLUE))
        for neuron in range(1, 7) :
            output_layer.add(Circle(color=BLUE).next_to(output_layer[-1], DOWN).shift(0.25*DOWN))
            if neuron == 2 or neuron == 4:
                output_layer.add(Tex(r"\vdots", color=BLUE).scale(3).next_to(output_layer[-1], DOWN).shift(0.25*DOWN))
        output_box = SurroundingRectangle(output_layer, color=WHITE)
        VGroup(output_layer, output_box).next_to(VGroup(input_layer, input_box), RIGHT).shift(5*RIGHT)

        # Connections
        connections = VGroup()
        for neuron_1 in input_layer:
            if isinstance(neuron_1, Circle):
                for neuron_2 in output_layer:
                    if isinstance(neuron_2, Circle):
                        connection = Line(neuron_1.get_edge_center(RIGHT), neuron_2.get_edge_center(LEFT), color=GREEN).set_stroke(width=2.5)
                        connections.add(connection)

        return VGroup(input_layer, input_box), VGroup(output_layer, output_box), connections

    def Linear_inv(self):
        # Input Layer
        input_layer = VGroup(Circle(color=BLUE))
        for neuron in range(1, 10) :
            input_layer.add(Circle(color=BLUE).next_to(input_layer[-1], DOWN).shift(0.25*DOWN))
            if neuron == 3 or neuron == 6:
                input_layer.add(Tex(r"\vdots", color=BLUE).scale(3).next_to(input_layer[-1], DOWN).shift(0.25*DOWN))
        input_box = SurroundingRectangle(input_layer, color=WHITE)

        # Output Layer
        output_layer = VGroup(Circle(color=BLUE))
        for neuron in range(1, 7) :
            output_layer.add(Circle(color=BLUE).next_to(output_layer[-1], DOWN).shift(0.25*DOWN))
            if neuron == 2 or neuron == 4:
                output_layer.add(Tex(r"\vdots", color=BLUE).scale(3).next_to(output_layer[-1], DOWN).shift(0.25*DOWN))
        output_box = SurroundingRectangle(output_layer, color=WHITE)
        VGroup(output_layer, output_box).next_to(VGroup(input_layer, input_box), LEFT).shift(5*LEFT)

        # Connections
        connections = VGroup()
        for neuron_1 in input_layer:
            if isinstance(neuron_1, Circle):
                for neuron_2 in output_layer:
                    if isinstance(neuron_2, Circle):
                        connection = Line(neuron_1.get_edge_center(LEFT), neuron_2.get_edge_center(RIGHT), color=GREEN).set_stroke(width=2.5)
                        connections.add(connection)

        return VGroup(input_layer, input_box), VGroup(output_layer, output_box), connections

    def construct(self):
        # Introduction
        src_1 = Tex("Boy", " thanked King").to_edge(UP)
        src_2 = Tex("King thanked", " Boy").to_edge(DOWN)
        self.play(Write(src_1), Write(src_2))
        self.wait(1)
        input_layer, output_layer, connections = self.Linear()
        VGroup(input_layer, output_layer, connections).scale(0.25).move_to(ORIGIN).to_edge(LEFT).to_edge(DOWN).shift(RIGHT)
        tok1 = Tex("Boy").scale(0.75).next_to(input_layer[0][2], LEFT)
        tok2 = Tex("King").scale(0.75).next_to(input_layer[0][6], LEFT)
        tokens = VGroup(tok1, tok2)   
        self.play(Create(VGroup(input_layer, output_layer, connections, tokens)))
        self.wait(1)
        
        # 1st stn Boy
        model.load_state_dict(torch.load("../trained_models/nn_models/model_emb256_heads8_layers2_ep{}.pt".format(454), map_location=torch.device('cpu')))
        src_embs_1 = model.src_token_emb(src_id_toy_1)[0]
        box = SurroundingRectangle(src_1[0])
        self.play(Create(box))
        self.wait(1)
        tok = Tex("1", color=RED).scale(0.5).move_to(input_layer[0][2])
        self.play(input_layer[0][2].animate.set_fill(color=WHITE, opacity=1), Write(tok))
        self.play(ApplyMethod(connections.set_color, RED), run_time=0.1)
        self.play(ApplyMethod(connections.set_color, GREEN), run_time=0.1)
        embs = []
        embs_ = []
        i = 0
        for neuron in output_layer[0]:
            if isinstance(neuron, Circle):
                embs.append(Tex(str(np.round(src_embs_1[0][i].item(), 1)), color=BLUE_B).scale(0.45).move_to(neuron))
                embs_.append(neuron.animate.set_fill(color=WHITE, opacity=np.abs(src_embs_1[0][i].item()/src_embs_1[0].max().item())))
            i += 1
        self.play(*[Write(_) for _ in embs], *embs_)
        self.wait(1)
        embs_ = []
        i = 0
        for neuron in output_layer[0]:
            if isinstance(neuron, Circle):
                embs_.append(neuron.animate.set_fill(color=WHITE, opacity=0))
            i += 1
        self.play(input_layer[0][2].animate.set_fill(color=WHITE, opacity=0), FadeOut(tok, *embs, box), *embs_)
        self.wait(1)

        # 2nd stn Boy
        model.load_state_dict(torch.load("../trained_models/nn_models/model_emb256_heads8_layers2_ep{}.pt".format(454), map_location=torch.device('cpu')))
        src_embs_2 = model.src_token_emb(src_id_toy_2)[0]
        box = SurroundingRectangle(src_2[1])
        self.play(Create(box))
        self.wait(1)
        tok = Tex("1", color=RED).scale(0.5).move_to(input_layer[0][2])
        self.play(input_layer[0][2].animate.set_fill(color=WHITE, opacity=1), Write(tok))
        self.play(ApplyMethod(connections.set_color, RED), run_time=0.1)
        self.play(ApplyMethod(connections.set_color, GREEN), run_time=0.1)
        embs = []
        embs_ = []
        i = 0
        for neuron in output_layer[0]:
            if isinstance(neuron, Circle):
                embs.append(Tex(str(np.round(src_embs_2[3][i].item(), 1)), color=BLUE_B).scale(0.45).move_to(neuron))
                embs_.append(neuron.animate.set_fill(color=WHITE, opacity=np.abs(src_embs_2[3][i].item()/src_embs_2[3].max().item())))
            i += 1
        self.play(*[Write(_) for _ in embs], *embs_)
        self.wait(1)
        self.play(FadeOut(box))
        self.wait(1)

        # Positional Encodings
        input_layer_inv, output_layer_inv, connections_inv = self.Linear_inv()
        VGroup(input_layer_inv, output_layer_inv, connections_inv).scale(0.25).move_to(ORIGIN).to_edge(RIGHT).to_edge(DOWN).shift(LEFT)
        pos_brace = Brace(input_layer_inv[1], sharpness=1, direction=[1, 0, 0])
        pos_text = Tex("Pos").scale(0.75).next_to(pos_brace, RIGHT)
        self.play(Create(VGroup(input_layer_inv, output_layer_inv, connections_inv)))
        self.wait(1)
        self.play(Create(VGroup(pos_brace, pos_text)))
        self.wait(1)
        # 1st stn Boy
        model.load_state_dict(torch.load("../trained_models/nn_models/model_emb256_heads8_layers2_ep{}.pt".format(454), map_location=torch.device('cpu')))
        src_posembs_1 = model.src_pos_emb(torch.arange(256, device="cpu")) #model.src_token_emb(src_id_toy_1)[0] + model.src_pos_emb(torch.arange(256, device="cpu"))[0]
        box = SurroundingRectangle(src_1[0])
        self.play(Create(box))
        self.wait(1)
        tok_inv = Tex("1", color=RED).scale(0.5).move_to(input_layer_inv[0][0])
        self.play(input_layer_inv[0][0].animate.set_fill(color=WHITE, opacity=1), Write(tok_inv))
        self.play(ApplyMethod(connections_inv.set_color, RED), run_time=0.1)
        self.play(ApplyMethod(connections_inv.set_color, GREEN), run_time=0.1)
        embs_inv = []
        embs_inv_ = []
        i = 0
        for neuron in output_layer_inv[0]:
            if isinstance(neuron, Circle):
                embs_inv.append(Tex(str(np.round(src_posembs_1[0][i].item(), 1)), color=BLUE_B).scale(0.45).move_to(neuron))
                embs_inv_.append(neuron.animate.set_fill(color=WHITE, opacity=np.abs(src_posembs_1[0][i].item()/src_posembs_1[0].max().item())))
            i += 1
        self.play(*[Write(_) for _ in embs_inv], *embs_inv_)
        self.wait(1)
        embs_inv_ = []
        i = 0
        for neuron in output_layer_inv[0]:
            if isinstance(neuron, Circle):
                embs_inv_.append(neuron.animate.set_fill(color=WHITE, opacity=0))
            i += 1
        self.play(input_layer_inv[0][0].animate.set_fill(color=WHITE, opacity=0), FadeOut(tok_inv, *embs_inv, box), *embs_inv_)
        self.wait(1)
        # 2nd stn Boy
        model.load_state_dict(torch.load("../trained_models/nn_models/model_emb256_heads8_layers2_ep{}.pt".format(454), map_location=torch.device('cpu')))
        src_posembs_2 = model.src_pos_emb(torch.arange(256, device="cpu")) #model.src_token_emb(src_id_toy_2)[0] + model.src_pos_emb(torch.arange(256, device="cpu"))[0]
        box = SurroundingRectangle(src_2[1])
        self.play(Create(box))
        self.wait(1)
        tok_inv = Tex("1", color=RED).scale(0.5).move_to(input_layer_inv[0][2])
        self.play(input_layer_inv[0][2].animate.set_fill(color=WHITE, opacity=1), Write(tok_inv))
        self.play(ApplyMethod(connections_inv.set_color, RED), run_time=0.1)
        self.play(ApplyMethod(connections_inv.set_color, GREEN), run_time=0.1)
        embs_inv = []
        embs_inv_ = []
        i = 0
        for neuron in output_layer_inv[0]:
            if isinstance(neuron, Circle):
                embs_inv.append(Tex(str(np.round(src_posembs_2[3][i].item(), 1)), color=BLUE_B).scale(0.45).move_to(neuron))
                embs_inv_.append(neuron.animate.set_fill(color=WHITE, opacity=np.abs(src_posembs_2[3][i].item()/src_posembs_2[3].max().item())))
            i += 1
        self.play(*[Write(_) for _ in embs_inv], *embs_inv_)
        self.wait(1)
        embs_inv_ = []
        i = 0
        for neuron in output_layer_inv[0]:
            if isinstance(neuron, Circle):
                embs_inv_.append(neuron.animate.set_fill(color=WHITE, opacity=0))
            i += 1
        self.play(input_layer_inv[0][2].animate.set_fill(color=WHITE, opacity=0), FadeOut(tok_inv, *embs_inv, box), *embs_inv_)
        self.wait(1)

        # Adding Pos and Tok Emb
        add = Tex(r"+").scale(4).next_to(output_layer[1], RIGHT).shift(2*RIGHT)
        l1 = Line(output_layer[1].get_edge_center(RIGHT)+0.1*RIGHT, add.get_edge_center(LEFT)+0.25*LEFT)
        l2 = Line(output_layer_inv[1].get_edge_center(LEFT)+0.1*LEFT, add.get_edge_center(RIGHT)+0.25*RIGHT)
        self.play(Create(add), Create(l1), Create(l2))
        self.wait(1)
        _, output_layer_, _ = self.Linear()
        output_layer_.scale(0.25).move_to(add)
        self.play(ReplacementTransform(add, output_layer_))
        self.wait(1)
        # 1st stn Boy
        model.load_state_dict(torch.load("../trained_models/nn_models/model_emb256_heads8_layers2_ep{}.pt".format(454), map_location=torch.device('cpu')))
        src_tokencposembs_1 = model.src_token_emb(src_id_toy_1)[0] + model.src_pos_emb(torch.arange(256, device="cpu"))
        box = SurroundingRectangle(src_1[0])
        self.play(Create(box))
        self.wait(1)
        tok_inv = Tex("1", color=RED).scale(0.5).move_to(input_layer_inv[0][0])
        self.play(input_layer_inv[0][0].animate.set_fill(color=WHITE, opacity=1), Write(tok_inv))
        self.play(ApplyMethod(connections_inv.set_color, RED), run_time=0.1)
        self.play(ApplyMethod(connections_inv.set_color, GREEN), run_time=0.1)
        embs_inv = []
        embs_inv_ = []
        i = 0
        for neuron in output_layer_inv[0]:
            if isinstance(neuron, Circle):
                embs_inv.append(Tex(str(np.round(src_posembs_1[0][i].item(), 1)), color=BLUE_B).scale(0.45).move_to(neuron))
                embs_inv_.append(neuron.animate.set_fill(color=WHITE, opacity=np.abs(src_posembs_1[0][i].item()/src_posembs_1[0].max().item())))
            i += 1
        self.play(*[Write(_) for _ in embs_inv], *embs_inv_)
        self.wait(1)
        final_out = []
        final_out_ = []
        i = 0
        for neuron in output_layer_[0]:
            if isinstance(neuron, Circle):
                final_out.append(Tex(str(np.round(src_tokencposembs_1[0][i].item(), 1)), color=BLUE_B).scale(0.45).move_to(neuron))
                final_out_.append(neuron.animate.set_fill(color=WHITE, opacity=np.abs(src_tokencposembs_1[0][i].item()/src_tokencposembs_1[0].max().item())))
            i += 1
        self.play(*[Write(_) for _ in final_out], *final_out_)
        self.wait(1)
        embs_inv_ = []
        i = 0
        for neuron in output_layer_inv[0]:
            if isinstance(neuron, Circle):
                embs_inv_.append(neuron.animate.set_fill(color=WHITE, opacity=0))
            i += 1
        self.play(input_layer_inv[0][0].animate.set_fill(color=WHITE, opacity=0), FadeOut(tok_inv, *embs_inv, box), *embs_inv_)
        self.wait(1)
        final_out_ = []
        i = 0
        for neuron in output_layer_[0]:
            if isinstance(neuron, Circle):
                final_out_.append(neuron.animate.set_fill(color=WHITE, opacity=0))
            i += 1
        self.play(FadeOut(*final_out), *final_out_)
        self.wait(1)
        # 2nd stn Boy
        model.load_state_dict(torch.load("../trained_models/nn_models/model_emb256_heads8_layers2_ep{}.pt".format(454), map_location=torch.device('cpu')))
        src_tokencposembs_2 = model.src_token_emb(src_id_toy_2)[0] + model.src_pos_emb(torch.arange(256, device="cpu"))
        box = SurroundingRectangle(src_2[1])
        self.play(Create(box))
        self.wait(1)
        tok_inv = Tex("1", color=RED).scale(0.5).move_to(input_layer_inv[0][2])
        self.play(input_layer_inv[0][2].animate.set_fill(color=WHITE, opacity=1), Write(tok_inv))
        self.play(ApplyMethod(connections_inv.set_color, RED), run_time=0.1)
        self.play(ApplyMethod(connections_inv.set_color, GREEN), run_time=0.1)
        embs_inv = []
        embs_inv_ = []
        i = 0
        for neuron in output_layer_inv[0]:
            if isinstance(neuron, Circle):
                embs_inv.append(Tex(str(np.round(src_posembs_2[3][i].item(), 1)), color=BLUE_B).scale(0.45).move_to(neuron))
                embs_inv_.append(neuron.animate.set_fill(color=WHITE, opacity=np.abs(src_posembs_2[3][i].item()/src_posembs_2[3].max().item())))
            i += 1
        self.play(*[Write(_) for _ in embs_inv], *embs_inv_)
        self.wait(1)
        final_out = []
        final_out_ = []
        i = 0
        for neuron in output_layer_[0]:
            if isinstance(neuron, Circle):
                final_out.append(Tex(str(np.round(src_tokencposembs_2[3][i].item(), 1)), color=BLUE_B).scale(0.45).move_to(neuron))
                final_out_.append(neuron.animate.set_fill(color=WHITE, opacity=np.abs(src_tokencposembs_2[3][i].item()/src_tokencposembs_2[3].max().item())))
            i += 1
        self.play(*[Write(_) for _ in final_out], *final_out_)
        self.wait(1)
        embs_inv_ = []
        i = 0
        for neuron in output_layer_inv[0]:
            if isinstance(neuron, Circle):
                embs_inv_.append(neuron.animate.set_fill(color=WHITE, opacity=0))
            i += 1
        self.play(input_layer_inv[0][2].animate.set_fill(color=WHITE, opacity=0), FadeOut(tok_inv, *embs_inv, box), *embs_inv_)
        self.wait(1)
        final_out_ = []
        i = 0
        for neuron in output_layer_[0]:
            if isinstance(neuron, Circle):
                final_out_.append(neuron.animate.set_fill(color=WHITE, opacity=0))
            i += 1
        self.play(FadeOut(*final_out), *final_out_)
        self.wait(1)

        # Transition
        self.play(
            *[FadeOut(mob)for mob in self.mobjects]
        )
        self.wait(1)
        self.wait(1)

                                                                                                     