In [1]:
# Changing the working directory
import os
abspath = os.path.abspath("./src")
dname = os.path.dirname(abspath)
os.chdir(dname)

# Dependencies

In [2]:
from manim import *

import numpy as np
import pandas as pd
import torch
from copy import deepcopy
import matplotlib.pyplot as plt
import sentencepiece as spm
plt.style.use('dark_background')

In [3]:
from src.processing import src_processing
from src.transformer import Transformer

from utils import to_tokens, animate_emb, show_emb, animate_attn, remove_invisible_chars

# Dataset

## Tokenization

In [4]:
%%manim -qk -v WARNING Tokenization

class Tokenization(Scene):
    def get_unique_chars(self, strs):
        unique_chars = set()
        for s in strs:
            unique_chars.update(set(s))
        return list(unique_chars)

    def get_unique_words(self, strs):
        unique_words = set()
        for s in strs:
            unique_words.update(s.split())
        return list(unique_words)

    def construct(self):
        # Tokenization & Numericalization
        df = pd.read_csv("../data/shk2mod.csv", index_col=0)
        df.drop("id", axis=1, inplace=True)                                  
        d = df.to_numpy()
        src = d[0,0]
        src_stn = Tex(src).scale(0.75)
        self.play(Write(src_stn))
        self.wait(2)

        # Title
        title = Title("Tokenization")
        self.play(Write(title), FadeOut(src_stn))
        self.wait(1)

        # Tokenizer types
        subtitle1 = Tex("Character Level Tokenization").scale(0.7).next_to(title, DOWN).to_edge(LEFT)
        subtitle2 = Tex("Word Level Tokenization").scale(0.7).next_to(title, DOWN).to_edge(RIGHT)
        self.play(Write(subtitle1))
        self.play(Write(subtitle2))
        self.wait(1)

        # Char Level Tokenizer
        unq_chars = self.get_unique_chars(d[:,0])
        unq_chars = VGroup(*[Text(s).scale(0.3) for s in unq_chars])
        unq_chars.arrange_in_grid(rows=5, cols=20, buff=0.05).to_edge(DOWN).to_edge(LEFT).shift(0.15*RIGHT+UP)
        char_box = SurroundingRectangle(unq_chars, buff=SMALL_BUFF, color=WHITE).scale(0.65)
        self.play(Create(char_box))
        self.wait(1)
        for i in range(len(unq_chars)):
            stn = Tex(d[i,0]).scale(0.3).next_to(subtitle1, DOWN).shift(0.25*DOWN)
            self.play(Write(unq_chars[i]), FadeIn(stn), run_time=0.05)
            self.play(FadeOut(stn), run_time=0.05)
        self.wait(1)

        src = d[0,0]
        src_stn = Tex(src).scale(0.5).next_to(subtitle1, DOWN).shift(0.25*DOWN)
        src_chars = VGroup(Tex(src[1]))
        src_chars.add(SurroundingRectangle(src_chars[-1], buff=0.25))
        for char in src[2:]:
            if char != " ":
                src_chars.add(Tex(char).next_to(src_chars[-1], RIGHT))
                src_chars.add(SurroundingRectangle(src_chars[-1], buff=0.25))
        src_chars.scale(0.45).move_to(src_stn).to_edge(LEFT)
        self.play(Write(src_stn))
        self.wait(1)
        self.play(ReplacementTransform(src_stn, src_chars))
        self.wait(1)

        char_drawback = Tex("An 8 word sentence has turned into a sequence of length 26!").scale(0.5).next_to(src_chars, DOWN).shift(0.25*DOWN)
        self.play(Write(char_drawback))
        self.wait(1)
        
        # Word level tokenizer
        unq_words = self.get_unique_words(d[:,0])
        unq_words = VGroup(*[Text(s).scale(0.3) for s in unq_words[:50]])
        unq_words.arrange_in_grid(rows=10, cols=5, buff=0.05).to_edge(DOWN).to_edge(RIGHT).shift(DOWN)
        word_box = SurroundingRectangle(unq_words, buff=SMALL_BUFF, color=WHITE)
        self.play(Create(word_box))
        self.wait(1)
        for i in range(len(unq_words[:50])):
            stn = Tex(d[i,0]).scale(0.3).next_to(subtitle2, DOWN).shift(0.25*DOWN)
            self.play(Write(unq_words[i]), FadeIn(stn), run_time=0.05)
            self.play(FadeOut(stn), run_time=0.05)
        self.wait(1)

        src = d[0,0]
        src_stn = Tex(src).scale(0.5).next_to(subtitle2, DOWN).shift(0.25*DOWN)
        src_words = VGroup(Tex(src.split()[0]))
        src_words.add(SurroundingRectangle(src_words[-1], buff=0.25))
        for word in src.split()[1:]:
            src_words.add(Tex(word).next_to(src_words[-1], RIGHT))
            src_words.add(SurroundingRectangle(src_words[-1], buff=0.25))
        src_words.scale(0.35).move_to(src_stn).to_edge(RIGHT)
        self.play(Write(src_stn))
        self.wait(1)
        self.play(ReplacementTransform(src_stn, src_words))
        self.wait(1)

        word_drawback = Tex("Vocabulary size here is 73558!").scale(0.5).next_to(src_words, DOWN).shift(0.25*DOWN)
        self.play(Write(word_drawback))
        self.wait(1)

        # Transition
        self.play(
            *[FadeOut(mob)for mob in self.mobjects if mob != title]
        )
        
        title_ = Title("Sub-Word Tokenization")
        self.play(ReplacementTransform(title, title_))
        self.wait(1)

        self.wait(1)

                                                                                                                                                                                                                                                                                                                                                                                       

In [5]:
%%manim -qk -v WARNING SentencePiece

class SentencePiece(Scene):
    def construct(self):
        # Transition
        title = Title("Sub-Word Tokenization")
        self.add(title)
        self.wait(1)

        # Sub word tokenization
        spm_sc = ImageMobject("../data/spm.png")
        self.play(FadeIn(spm_sc))
        self.wait(1)
        self.play(FadeOut(spm_sc))
        self.wait(1)

        df = pd.read_csv("../data/shk2mod.csv", index_col=0)                           
        df.drop("id", axis=1, inplace=True)                                  
        d = df.to_numpy()
        src, trg = d[:,0], d[:,1]
        sk_stns = VGroup(Tex(src[0]))
        md_stns = VGroup(Tex(trg[0]))
        np.random.seed(0)
        idx = np.random.randint(1, len(src), 7)
        for i in range(7):
            if i != 3:
                # Shakespeare sentence
                sk_stn = Tex(src[idx[i]]).next_to(sk_stns[-1], DOWN).shift(0.25*DOWN)
                sk_stns.add(sk_stn)
                # Modern sentence
                md_stn = Tex(trg[idx[i]]).next_to(md_stns[-1], DOWN).shift(0.25*DOWN)
                md_stns.add(md_stn)
            else:
                # Shakespeare vdots
                sk_vdots = Tex(r"\vdots").next_to(sk_stns[-1], DOWN).shift(0.25*DOWN)
                sk_stns.add(sk_vdots)
                # Modern English vdots
                md_vdots = Tex(r"\vdots").next_to(md_stns[-1], DOWN).shift(0.25*DOWN)
                md_stns.add(md_vdots)

        sk_stns = sk_stns.scale(0.35).move_to(ORIGIN).to_edge(DOWN).shift(UP)
        sk_stns_box = SurroundingRectangle(sk_stns, color=RED)
        md_stns = md_stns.scale(0.35).move_to(ORIGIN).to_edge(DOWN).shift(UP)
        md_stns_box = SurroundingRectangle(md_stns, color=BLUE)

        self.play(Create(md_stns_box), Write(md_stns))
        self.wait(1)

        rendered_code = Code(file_name="../data/sk100_tokenizer.py", tab_width=4, background="window",
                            language="Python", font="Monospace", insert_line_no=False,
                            style="dracula", line_spacing=1).scale(0.35).next_to(md_stns_box, UP)
        rendered_code.code = remove_invisible_chars(rendered_code.code)
        self.play(Create(rendered_code[0]), Write(rendered_code.code[0:2]), Write(rendered_code.code[2][:31]))
        self.wait(1)
        self.play(ReplacementTransform(VGroup(md_stns_box, md_stns), rendered_code.code[2][31:]))
        self.wait(1)
        self.play(Write(rendered_code.code[3][:11]))
        self.wait(1)
        self.play(Write(rendered_code.code[3][11:]), Write(rendered_code.code[4:]))
        self.wait(1)
        src = d[0,1]
        src_stn = Tex(src).scale(0.5).next_to(rendered_code, DOWN).shift(0.25*DOWN)
        modern = spm.SentencePieceProcessor(model_file="../trained_models/tokenizer/modern_en_vs100.model")
        src_tok = modern.EncodeAsPieces(src)
        src_tok = [piece.lstrip('▁') for piece in src_tok]
        src_tok = [piece for piece in src_tok if piece != '']

        src_words = VGroup(Text(src_tok[0]))
        src_words.add(SurroundingRectangle(src_words[-1], buff=0.25))
        for word in src_tok[1:]:
            src_words.add(Text(word).next_to(src_words[-1], RIGHT))
            src_words.add(SurroundingRectangle(src_words[-1], buff=0.25))
        src_words.scale(0.45).move_to(src_stn)
        self.play(Write(src_stn))
        self.wait(1)
        self.play(ReplacementTransform(src_stn, src_words))
        self.wait(1)
        self.play(FadeOut(src_words))
        self.wait(1)

        rendered_code_ = Code(file_name="../data/sk5000_tokenizer.py", tab_width=4, background="window",
                            language="Python", font="Monospace", insert_line_no=False,
                            style="dracula", line_spacing=1).scale(0.35).move_to(rendered_code)
        rendered_code_.code = remove_invisible_chars(rendered_code_.code)
        self.play(ReplacementTransform(rendered_code, rendered_code_))
        self.wait(1)
        src = d[0,1]
        src_stn = Tex(src).scale(0.5).next_to(rendered_code_, DOWN).shift(0.25*DOWN)
        modern = spm.SentencePieceProcessor(model_file="../trained_models/tokenizer/modern_en.model")
        src_tok = modern.EncodeAsPieces(src)
        src_tok = [piece.lstrip('▁') for piece in src_tok]
        src_tok = [piece for piece in src_tok if piece != '']

        src_words = VGroup(Text(src_tok[0]))
        src_words.add(SurroundingRectangle(src_words[-1], buff=0.25))
        for word in src_tok[1:]:
            src_words.add(Text(word).next_to(src_words[-1], RIGHT))
            src_words.add(SurroundingRectangle(src_words[-1], buff=0.25))
        src_words.scale(0.45).move_to(src_stn)
        self.play(Write(src_stn))
        self.wait(1)
        self.play(ReplacementTransform(src_stn, src_words))
        self.wait(1)
        self.play(FadeOut(rendered_code_))
        self.wait(1)

        rendered_code = Code(file_name="../data/sk5000_tokenizer_call.py", tab_width=4, background="window",
                            language="Python", font="Monospace", insert_line_no=False,
                            style="dracula", line_spacing=1).scale(0.35).move_to(rendered_code_)
        rendered_code.code = remove_invisible_chars(rendered_code.code)
        self.play(Write(rendered_code))
        self.wait(1)
        src_ids = modern.EncodeAsIds(src)
        for i in range(0, len(src_ids)):
            id = Tex(src_ids[i]).scale(0.5).move_to(src_words[i*2])
            self.play(ReplacementTransform(src_words[i*2], id))
        self.wait(1)

        # Transition
        self.play(
            *[FadeOut(mob)for mob in self.mobjects]
        )
        self.wait(1)

                                                                                                                                      

## Padding and Truncating

In [6]:
%%manim -qk -v WARNING Padding

class Padding(Scene):
    def padding(self, tokenized_text, max_seq_len):
        # Padding or trimming to fit the max sequence length
        if len(tokenized_text) < max_seq_len:
            left = max_seq_len - len(tokenized_text)
            padding = [0] * left
            tokenized_text += padding
        else:
            tokenized_text = tokenized_text[:max_seq_len]
        return tokenized_text

    def construct(self):
        # Loading Dataset
        df = pd.read_csv("../data/shk2mod.csv", index_col=0)                           
        df.drop("id", axis=1, inplace=True)                                  
        d = df.to_numpy()
        src, trg = d[:,0], d[:,1]

        # Showcasing dataset
        sk_stns = VGroup(Tex(src[0]))
        md_stns = VGroup(Tex(trg[0]))
        np.random.seed(0)
        idx = np.random.randint(1, len(src), 7)
        for i in range(7):
            if i != 3:
                # Shakespeare sentence
                sk_stn = Tex(src[idx[i]]).next_to(sk_stns[-1], DOWN).shift(0.25*DOWN)
                sk_stns.add(sk_stn)
                # Modern sentence
                md_stn = Tex(trg[idx[i]]).next_to(md_stns[-1], DOWN).shift(0.25*DOWN)
                md_stns.add(md_stn)
            else:
                # Shakespeare vdots
                sk_vdots = Tex(r"\vdots").next_to(sk_stns[-1], DOWN).shift(0.25*DOWN)
                sk_stns.add(sk_vdots)
                # Modern English vdots
                md_vdots = Tex(r"\vdots").next_to(md_stns[-1], DOWN).shift(0.25*DOWN)
                md_stns.add(md_vdots)

        sk_stns = sk_stns.scale(0.35).to_edge(UP).shift(DOWN).to_edge(LEFT)
        sk_stns_box = SurroundingRectangle(sk_stns, color=RED)
        md_stns = md_stns.scale(0.35).to_edge(UP).shift(DOWN).to_edge(RIGHT)
        md_stns_box = SurroundingRectangle(md_stns, color=BLUE)

        self.play(Create(sk_stns_box), Write(sk_stns))
        self.wait(1)

        self.play(Create(md_stns_box), Write(md_stns))
        self.wait(1)

        # Tokenize Dataset
        shakespeare = spm.SentencePieceProcessor(model_file="../trained_models/tokenizer/shakespeare_en.model")
        src_tok = shakespeare.EncodeAsIds(list(src))
        modern = spm.SentencePieceProcessor(model_file="../trained_models/tokenizer/modern_en.model")
        trg_tok = modern.EncodeAsIds(list(trg))
        
        sk_toks = VGroup(Tex(' '.join(map(str, src_tok[0]))))
        md_toks = VGroup(Tex(' '.join(map(str, trg_tok[0]))))

        np.random.seed(0)
        idx = np.random.randint(1, len(src_tok), 7)
        for i in range(7):
            if i != 3:
                # Shakespeare sentence
                sk_stn = Tex(' '.join(map(str, src_tok[idx[i]]))).next_to(sk_toks[-1], DOWN).shift(0.25*DOWN)
                sk_toks.add(sk_stn)
                # Modern sentence
                md_stn = Tex(' '.join(map(str, trg_tok[idx[i]]))).next_to(md_toks[-1], DOWN).shift(0.25*DOWN)
                md_toks.add(md_stn)
            else:
                # Shakespeare vdots
                sk_vdots = Tex(r"\vdots").next_to(sk_toks[-1], DOWN).shift(0.25*DOWN)
                sk_toks.add(sk_vdots)
                # Modern English vdots
                md_vdots = Tex(r"\vdots").next_to(md_toks[-1], DOWN).shift(0.25*DOWN)
                md_toks.add(md_vdots)

        sk_toks = sk_toks.scale(0.35).to_edge(UP).shift(DOWN).to_edge(LEFT)
        sk_toks_box = SurroundingRectangle(sk_toks, color=RED)
        md_toks = md_toks.scale(0.35).to_edge(UP).shift(DOWN).to_edge(RIGHT)
        md_toks_box = SurroundingRectangle(md_toks, color=BLUE)

        self.play(ReplacementTransform(VGroup(sk_stns_box, sk_stns), VGroup(sk_toks_box, sk_toks)))
        self.wait(1)

        self.play(ReplacementTransform(VGroup(md_stns_box, md_stns), VGroup(md_toks_box, md_toks)))
        self.wait(1)

        # Padding Dataset
        sk_toks_ = VGroup(Tex(' '.join(map(str, self.padding(src_tok[0], 20)))))
        md_toks_ = VGroup(Tex(' '.join(map(str, self.padding(trg_tok[0], 20)))))

        np.random.seed(0)
        idx = np.random.randint(1, len(src_tok), 7)
        for i in range(7):
            if i != 3:
                # Shakespeare sentence
                sk_stn = Tex(' '.join(map(str, self.padding(src_tok[idx[i]], 20)))).next_to(sk_toks_[-1], DOWN).shift(0.25*DOWN)
                sk_toks_.add(sk_stn)
                # Modern sentence
                md_stn = Tex(' '.join(map(str, self.padding(trg_tok[idx[i]], 20)))).next_to(md_toks_[-1], DOWN).shift(0.25*DOWN)
                md_toks_.add(md_stn)
            else:
                # Shakespeare vdots
                sk_vdots = Tex(r"\vdots").next_to(sk_toks_[-1], DOWN).shift(0.25*DOWN)
                sk_toks_.add(sk_vdots)
                # Modern English vdots
                md_vdots = Tex(r"\vdots").next_to(md_toks_[-1], DOWN).shift(0.25*DOWN)
                md_toks_.add(md_vdots)

        sk_toks_ = sk_toks_.scale(0.35).to_edge(UP).shift(DOWN).to_edge(LEFT)
        sk_toks_box_ = SurroundingRectangle(sk_toks_, color=RED)
        md_toks_ = md_toks_.scale(0.35).to_edge(UP).shift(DOWN).to_edge(RIGHT)
        md_toks_box_ = SurroundingRectangle(md_toks_, color=BLUE)

        self.play(ReplacementTransform(VGroup(sk_toks_box, sk_toks), VGroup(sk_toks_box_, sk_toks_)))
        self.wait(1)

        self.play(ReplacementTransform(VGroup(md_toks_box, md_toks), VGroup(md_toks_box_, md_toks_)))
        self.wait(1)

        # Transition
        self.play(
            *[FadeOut(mob)for mob in self.mobjects]
        )
        self.wait(1)
        

                                                                                                            

In [7]:
%%manim -qk -v WARNING SpecialTokens

class SpecialTokens(Scene):
    def construct(self):
        # Encoder
        encoder_text = Tex("Encoder").scale(1).shift(3.75*LEFT+1.5*DOWN)
        encoder_box = SurroundingRectangle(encoder_text, color=ORANGE, buff=MED_SMALL_BUFF, fill_opacity=0.15)
        self.play(Create(encoder_box), Write(encoder_text))
        self.wait(1)
        input_text = Tex("You do not meet a man but frowns:", " [PAD]", " [PAD] [PAD] [PAD]").scale(0.45).next_to(encoder_box, DOWN).shift(0.5*DOWN)
        self.play(Write(input_text))
        self.wait(1)
        eos_tok = Tex(" [EOS]").scale(0.45).move_to(input_text[1])
        self.play(ReplacementTransform(input_text[1], eos_tok))
        self.wait(1)
        intxt2enc = Line(input_text.get_edge_center(UP)+0.05*UP, encoder_box.get_edge_center(DOWN)+0.05*DOWN, color=WHITE)
        self.play(Create(intxt2enc))
        self.wait(1)
        encoded_text = Tex("You", " do", " not" , " meet", " a", " man", " but", " frowns:", " [EOS]", " [PAD]", " [PAD]", " [PAD]").scale(0.45).next_to(encoder_box, UP).shift(0.5*UP)
        encoded_text.set_color_by_tex('You', RED_A)
        encoded_text.set_color_by_tex('meet', RED_A)
        encoded_text.set_color_by_tex('man', GREEN_A)
        encoded_text.set_color_by_tex('frowns:', GREEN_A)
        enc2outtxt = Line(encoder_box.get_edge_center(UP)+0.05*UP, encoded_text.get_edge_center(DOWN)+0.05*DOWN, color=WHITE)
        self.play(Create(enc2outtxt))
        self.wait(1)
        self.play(Write(encoded_text))
        self.wait(1)

        # Decoder
        decoder_text = Tex("Decoder").scale(1).next_to(encoded_text, RIGHT).shift(2*RIGHT)
        decoder_box = SurroundingRectangle(decoder_text, color=BLUE_B, buff=MED_SMALL_BUFF, fill_opacity=0.15)
        self.play(Create(decoder_box), Write(decoder_text))
        self.wait(1)

        outtxt2dec = Line(encoded_text.get_edge_center(RIGHT)+0.05*RIGHT, decoder_box.get_edge_center(LEFT)+0.05*LEFT, color=BLUE_A)
        self.play(Create(outtxt2dec))

        decoder_input = Tex("[BOS]", " Every", " man", " you", " meet", " these", " days", " is", " frowning", " [PAD]", " [PAD]", " [PAD]").scale(0.45).next_to(decoder_box, DOWN).shift(0.5*DOWN)
        decoder_input_box = SurroundingRectangle(decoder_input, color=WHITE, buff=MED_SMALL_BUFF)
        decoder_in_text_connect = Line(decoder_input_box.get_edge_center(UP)+0.05*UP, decoder_box.get_edge_center(DOWN)+0.05*DOWN, color=WHITE)
        decoder_output = Tex(" Every", " man", " you", " meet", " these", " days", " is", " frowning", " [EOS]", " [PAD]", " [PAD]", " [PAD]").scale(0.45).next_to(decoder_box, UP).shift(0.5*UP)
        decoder_output_box = SurroundingRectangle(decoder_output, color=WHITE, buff=MED_SMALL_BUFF)
        decoder_out_text_connect = Line(decoder_box.get_edge_center(UP)+0.05*UP, decoder_output_box.get_edge_center(DOWN)+0.05*DOWN, color=WHITE)
        self.play(Create(decoder_input_box), Create(decoder_output_box), Create(decoder_in_text_connect), Create(decoder_out_text_connect))
        self.wait(1)
        for i in range(9):
            self.play(Write(decoder_input[i]), run_time=0.5)
            if i > 0:
                self.play(FadeOut(connection), run_time=0.5)
            self.play(Write(decoder_output[i]), run_time=0.5)
            if i < 8:
                connection = CurvedArrow(decoder_output_box.get_edge_center(RIGHT)+0.05*RIGHT, decoder_input_box.get_edge_center(RIGHT)+0.05*RIGHT, angle=-1.5707963267948966)
                self.play(Create(connection), run_time=0.5)
        self.wait(1)

        self.play(Write(decoder_input[9:]), Write(decoder_output[9:]))
        self.wait(1)

                                                                                                                                        

In [8]:
%%manim -qk -v WARNING SpecialTokens_coded

class SpecialTokens_coded(Scene):
    def construct(self):
        # Transition
        encoder_text = Tex("Encoder").scale(1).shift(3.75*LEFT+1.5*DOWN)
        encoder_box = SurroundingRectangle(encoder_text, color=ORANGE, buff=MED_SMALL_BUFF, fill_opacity=0.15)
        input_text = Tex("You do not meet a man but frowns:", " [EOS]", " [PAD] [PAD] [PAD]").scale(0.45).next_to(encoder_box, DOWN).shift(0.5*DOWN)
        intxt2enc = Line(input_text.get_edge_center(UP)+0.05*UP, encoder_box.get_edge_center(DOWN)+0.05*DOWN, color=WHITE)
        encoded_text = Tex("You", " do", " not" , " meet", " a", " man", " but", " frowns:", " [EOS]", " [PAD]", " [PAD]", " [PAD]").scale(0.45).next_to(encoder_box, UP).shift(0.5*UP)
        encoded_text.set_color_by_tex('You', RED_A)
        encoded_text.set_color_by_tex('meet', RED_A)
        encoded_text.set_color_by_tex('man', GREEN_A)
        encoded_text.set_color_by_tex('frowns:', GREEN_A)
        enc2outtxt = Line(encoder_box.get_edge_center(UP)+0.05*UP, encoded_text.get_edge_center(DOWN)+0.05*DOWN, color=WHITE)

        decoder_text = Tex("Decoder").scale(1).next_to(encoded_text, RIGHT).shift(2*RIGHT)
        decoder_box = SurroundingRectangle(decoder_text, color=BLUE_B, buff=MED_SMALL_BUFF, fill_opacity=0.15)
        outtxt2dec = Line(encoded_text.get_edge_center(RIGHT)+0.05*RIGHT, decoder_box.get_edge_center(LEFT)+0.05*LEFT, color=BLUE_A)

        decoder_input = Tex("[BOS]", " Every", " man", " you", " meet", " these", " days", " is", " frowning", " [PAD]", " [PAD]", " [PAD]").scale(0.45).next_to(decoder_box, DOWN).shift(0.5*DOWN)
        decoder_input_box = SurroundingRectangle(decoder_input, color=WHITE, buff=MED_SMALL_BUFF)
        decoder_in_text_connect = Line(decoder_input_box.get_edge_center(UP)+0.05*UP, decoder_box.get_edge_center(DOWN)+0.05*DOWN, color=WHITE)
        decoder_output = Tex(" Every", " man", " you", " meet", " these", " days", " is", " frowning", " [EOS]", " [PAD]", " [PAD]", " [PAD]").scale(0.45).next_to(decoder_box, UP).shift(0.5*UP)
        decoder_output_box = SurroundingRectangle(decoder_output, color=WHITE, buff=MED_SMALL_BUFF)
        decoder_out_text_connect = Line(decoder_box.get_edge_center(UP)+0.05*UP, decoder_output_box.get_edge_center(DOWN)+0.05*DOWN, color=WHITE)
        self.add(encoder_text, encoder_box, input_text, intxt2enc, 
                encoded_text, enc2outtxt, decoder_text, decoder_box,
                outtxt2dec, decoder_input, decoder_input_box,
                decoder_output, decoder_output_box, decoder_out_text_connect)
        self.wait(1)

        # SRC Processing
        rendered_code = Code(file_name="../data/src_processing.py", tab_width=4, background="window",
                            language="Python", font="Monospace", insert_line_no=False,
                            style="dracula", line_spacing=1).scale(0.43).next_to(encoded_text, UP).to_edge(LEFT)
        rendered_code.code = remove_invisible_chars(rendered_code.code)
        self.play(Create(rendered_code[0]), Create(rendered_code.code[0:2]))
        self.wait(1)
        self.play(Write(rendered_code.code[2:6]), Write(rendered_code.code[9:13]))
        self.wait(1)
        self.play(Write(rendered_code.code[6:9]), Write(rendered_code.code[13:]))
        self.wait(1)
        self.play(FadeOut(rendered_code))
        self.wait(1)

        # TRG IN Processing
        rendered_code = Code(file_name="../data/trgin_processing.py", tab_width=4, background="window",
                            language="Python", font="Monospace", insert_line_no=False,
                            style="dracula", line_spacing=1).scale(0.43).next_to(encoded_text, UP).to_edge(LEFT)
        rendered_code.code = remove_invisible_chars(rendered_code.code)
        self.play(Create(rendered_code[0]), Create(rendered_code.code[0:2]))
        self.wait(1)
        self.play(Write(rendered_code.code[2:4]))
        self.wait(1)
        self.play(Write(rendered_code.code[4:]))
        self.wait(1)
        self.play(FadeOut(rendered_code))
        self.wait(1)

        # TRG OUT Processing
        rendered_code = Code(file_name="../data/trgout_processing.py", tab_width=4, background="window",
                            language="Python", font="Monospace", insert_line_no=False,
                            style="dracula", line_spacing=1).scale(0.43).next_to(encoded_text, UP).to_edge(LEFT)
        rendered_code.code = remove_invisible_chars(rendered_code.code)
        self.play(Create(rendered_code[0]), Create(rendered_code.code[0:2]))
        self.wait(1)
        self.play(Write(rendered_code.code[2:6]), Write(rendered_code.code[9:13]))
        self.wait(1)
        self.play(Write(rendered_code.code[6:9]), Write(rendered_code.code[13:]))
        self.wait(1)
        self.wait(1)

        # Transition
        self.play(
            *[FadeOut(mob)for mob in self.mobjects]
        )
        self.wait(1)

                                                                                                     