In [1]:
from manim import *
import numpy as np

In [2]:
%%manim -v WARNING -qh --save_sections Slide1

class Slide1(Scene):
    def construct(self):
        # opening screen
        open_title = Text("Introduction To Reinforcement Learning", font_size = 45)
        open_subtitle = Text("By Brian Long and Sean O Riordan", font_size = 40)
        open_subsubtitle = Text("AM3064 - Computational Techniques", font_size = 25)
        open_subsubsubtitle = Text("22/02/2022", font_size = 25)
        open_slide = VGroup(open_title, open_subtitle, open_subsubtitle, open_subsubsubtitle).arrange(DOWN)
        self.play(
            Write(open_title),
            FadeIn(open_slide - open_title, shift=UP),
        )
        self.wait()
        self.next_section()
        
        # transform to slide 1
        slide1_title = Text("What is Reinforcement Learning?", font_size = 45, color = BLUE)
        slide1_title.to_corner(UP + LEFT)
        slide1_tite_ul = Underline(slide1_title, color = BLUE)
        dog_img = ImageMobject("assets/dog_img.jpg")
        dog_img.scale_to_fit_width(0.6 * config.frame_width).shift(DOWN / 2)
        self.play(
            Transform(open_title, slide1_title),
            Create(slide1_tite_ul),
            FadeOut(open_slide - open_title),
            FadeIn(dog_img)
        )
        self.wait()
        self.next_section()
        
        slide1_text1 = Text("-The Agent can observe and", t2c={"Agent": YELLOW}, font_size = 25).to_corner(LEFT).shift(UP)
        slide1_text12 = Text("interact with the Environment", t2c={"Environment": YELLOW}, font_size = 25).to_corner(LEFT).shift(UP / 2)
        slide1_text2 = Text("-It can perform Actions and", t2c={"Actions": YELLOW}, font_size = 25).to_corner(LEFT)
        slide1_text22 = Text("receives Rewards from the", t2c={"Rewards": YELLOW}, font_size = 25).to_corner(LEFT).shift(DOWN / 2)
        slide1_text23 = Text("Environment based on", font_size = 25).to_corner(LEFT).shift(DOWN)
        slide1_text24 = Text("its action", font_size = 25).to_corner(LEFT).shift(3 * DOWN / 2)
        slide1_text3 = Text("-The Agent prefers Actions", font_size = 25).to_corner(LEFT).shift(2 * DOWN)
        slide1_text31 = Text("that maximise its Reward", t2c={"maximise": YELLOW}, font_size = 25).to_corner(LEFT).shift(5 * DOWN / 2)
        slide1_par1 = VGroup(slide1_text1, slide1_text12, slide1_text2, slide1_text22, slide1_text23, slide1_text24, slide1_text3, slide1_text31)
        self.play(dog_img.animate.to_corner(RIGHT))
        self.play(Create(slide1_par1))
        self.wait()

                                                                                                                       

In [3]:
%%manim -v WARNING -qh Slide2

class Slide2(Scene):
    def construct(self):
        slide2_title = Text("Machine Learning - The Three Paradigms", font_size = 40, color = BLUE)
        slide2_title.to_corner(UP + LEFT)
        slide2_tite_ul = Underline(slide2_title, color = BLUE)

        venn_ML = Circle(radius = 3, color = RED).shift(DOWN / 2)
        venn_ML_text1 = Text("Machine", font_size = 40, color = RED).shift(4.5 * RIGHT)
        venn_ML_text2 = Text("Learning", font_size = 40, color = RED).shift(4.5 * RIGHT, 2 * DOWN / 3)
        venn_S = Circle(radius = 1.5, color = WHITE).shift(RIGHT)
        venn_S_text = Text("Supervised", font_size = 20, color = WHITE).move_to(venn_S).shift(RIGHT / 2)
        venn_US = Circle(radius = 1.5, color = WHITE).shift(LEFT)
        venn_US_text = Text("Unsupervised", font_size = 20, color = WHITE).move_to(venn_US).shift(LEFT / 2)
        venn_RL = Circle(radius = 1.5, color = WHITE).shift(3 * DOWN / 2)
        venn_RL_text = Text("Reinforcement", font_size = 20, color = WHITE).move_to(venn_RL).shift(DOWN / 2)
        venn_img = VGroup(venn_ML, venn_ML_text1, venn_ML_text2, venn_S, venn_S_text, venn_US, venn_US_text, venn_RL, venn_RL_text)
        self.play(
            Create(slide2_tite_ul),
            Create(slide2_title),
            FadeIn(venn_img)
        )
        self.wait()

                                                                                                                       

In [4]:
%%manim -v WARNING -qh Slide11

class Slide11(Scene):
    def construct(self):
        slide7_title = Text("Temporal Difference Learning", font_size = 45, color = BLUE)
        slide7_title.to_corner(UP + LEFT)
        slide7_title_ul = Underline(slide7_title, color = BLUE)
        self.play(Create(slide7_title), Create(slide7_title_ul))
        self.wait()
        
        slide7_text1 = Text("-A central idea for Agents to learn", t2c={"learn": YELLOW}, font_size = 25).move_to(slide7_title_ul).shift(DOWN).to_corner(LEFT)
        slide7_text2 = Text("-These learning Algorithms Bootstrap, in distinct time-steps", t2c={"Bootstrap": YELLOW, "distinct": YELLOW}, font_size = 25).move_to(slide7_text1).shift(DOWN / 2).to_corner(LEFT)
        slide7_text3 = Text("-Guaranteed to improve or keep same level of performance", t2c={"improve or keep": YELLOW}, font_size = 25).move_to(slide7_text2).shift(DOWN / 2).to_corner(LEFT)
        slide7_par1 = VGroup(slide7_text1, slide7_text2, slide7_text3)
        
        code = '''Input: the Policy to be evaluated

Loop for each step of episode:
    Perform Action given by Policy
    Observe the Reward and State from this Action
    Refine Estimate for Optimal Value Function
    Update State
'''
        rendered_code = Code(code = code, tab_width=4, background="window",
                            language="clean", font="Monospace").shift(1.5 * DOWN)
        
        self.play(Create(slide7_par1), Create(rendered_code))
        self.wait()

                                                                                                                       

In [5]:
%%manim -v WARNING -qh Slide12

class Slide12(Scene):
    def construct(self):
        slide8_title = Text("Q-Learning", font_size = 45, color = BLUE)
        slide8_title.to_corner(UP + LEFT)
        slide8_title_ul = Underline(slide8_title, color = BLUE)
        self.play(Create(slide8_title), Create(slide8_title_ul))
        self.wait()
        
        slide8_tex = MathTex(r"Q\left( S_t, A_t \right) \leftarrow Q\left( S_t, A_t \right) + \alpha\left[ R_{t+1} + \gamma \underset{a}{max} Q\left( S_{t+1}, a \right) - Q\left( S_t, A_t \right) \right]", font_size = 40).shift(1.5 * UP)
        slide8_text1 = Text("where:", font_size = 25, color = RED).move_to(slide8_tex).shift(1 * DOWN).to_corner(LEFT)
        slide8_tex2 = MathTex(r"\alpha", font_size = 45).move_to(slide8_text1).to_corner(LEFT).shift(DOWN / 2, RIGHT / 2)
        slide8_text2 = Text("is the learning rate", t2c={"learning rate": YELLOW}, font_size = 25).move_to(slide8_tex2).to_corner(LEFT).shift(RIGHT)
        slide8_tex3 = MathTex(r"\gamma", font_size = 45).move_to(slide8_tex2).to_corner(LEFT).shift(DOWN / 2, RIGHT / 2)
        slide8_text3 = Text("is the discount factor", t2c={"discount factor": YELLOW}, font_size = 25).move_to(slide8_tex3).to_corner(LEFT).shift(RIGHT)
        slide8_tex4 = MathTex(r"t", font_size = 45).move_to(slide8_tex3).to_corner(LEFT).shift(DOWN / 2, RIGHT / 2)
        slide8_text4 = Text("is the current time-step", t2c={"time-step": YELLOW}, font_size = 25).move_to(slide8_tex4).to_corner(LEFT).shift(RIGHT)
        slide8_par1 = VGroup(slide8_text1, slide8_tex2, slide8_text2, slide8_tex3, slide8_text3, slide8_tex4, slide8_text4)
        self.play(Create(slide8_par1), Write(slide8_tex))
        
        slide8_text5 = Text("-All data is stored in a table with Actions & States", t2c={"stored in a table": YELLOW}, font_size = 25).move_to(slide8_tex4).shift(1 * DOWN).to_corner(LEFT)
        slide8_text6 = Text("-This determines the Action taken in a given State", t2c={"determines": YELLOW}, font_size = 25).move_to(slide8_text5).shift(DOWN / 2).to_corner(LEFT)
        slide8_par2 = VGroup(slide8_text5, slide8_text6)
        self.play(Create(slide8_par2))
        self.wait()


                                                                                                                       