<a href="https://colab.research.google.com/github/prithwis/AGI/blob/main/Chandrayaan_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

![CC-BY-SA](https://licensebuttons.net/l/by-sa/3.0/88x31.png)<br>


![alt text](https://github.com/Praxis-QR/RDWH/raw/main/images/YantraJaalBanner.png)<br>




[Prithwis Mukerjee](http://www.linkedin.com/in/prithwis)<br>

# ChandraYaan

In [None]:
# 1. Update package list and install the NEW opengl names
!apt-get update > /dev/null 2>&1
!apt-get install -y xvfb ffmpeg freeglut3-dev python3-opengl libgl1-mesa-dev libglu1-mesa-dev mesa-utils > /dev/null 2>&1

# 2. Install the system dependencies (SWIG is the key here)
#!apt-get update
!apt-get install -y swig build-essential python3-dev > /dev/null 2>&1

# 3. Upgrade pip and setuptools to handle the build process better
!pip install --upgrade pip setuptools wheel > /dev/null 2>&1

# 4. Now install gymnasium with box2d support
!pip install "gymnasium[box2d]" > /dev/null 2>&1

# 5. Install the Python libraries
!pip install pyvirtualdisplay  pygame opencv-python > /dev/null 2>&1


In [None]:
# 1. Install dependencies (Quiet mode)
#!pip install "gymnasium[box2d]" > /dev/null 2>&1

# 2. Verify installation
import gymnasium as gym
try:
    env = gym.make("LunarLander-v3")
    print("üöÄ Lunar Lander is ready for takeoff!")
    env.close()
except Exception as e:
    print(f"‚ùå Installation issue: {e}")
    print("Try: Menu -> Runtime -> Restart Session")

üöÄ Lunar Lander is ready for takeoff!


  from pkg_resources import resource_stream, resource_exists


In [None]:
import pygame
import cv2
import numpy as np
import os
import numpy as np
import random

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from IPython.display import Video, display


In [None]:
import cv2
import numpy as np
import os

class VideoRecorder:
    def __init__(self, filename='simulation.avi', width=400, height=400, fps=15):
        self.filename = filename
        self.width = width
        # Add 60 pixels to the height for the dedicated dashboard area
        self.dash_height = 60
        self.total_height = height + self.dash_height
        self.fps = fps
        self.fourcc = cv2.VideoWriter_fourcc(*'XVID')
        self.video_writer = None

    def start(self):
        if os.path.exists(self.filename):
            os.remove(self.filename)
        # Crucial: The dimensions here MUST match the frames we write later
        self.video_writer = cv2.VideoWriter(
            self.filename, self.fourcc, self.fps, (self.width, self.total_height)
        )

    def record_frame_with_hud(self, frame_array, reward, step):
        # 1. Prepare the environment frame (the 7x7 grid)
        view = cv2.resize(frame_array, (self.width, self.total_height - self.dash_height))
        # Ensure it is in BGR for OpenCV
        #if len(view.shape) == 3 and view.shape[2] == 3:
        #    view = cv2.cvtColor(view, cv2.COLOR_RGB2BGR)

        # 2. Create the light-colored Dashboard (the background for text)
        # BGR (240, 240, 240) is a nice light gray
        dashboard = np.full((self.dash_height, self.width, 3), 240, dtype=np.uint8)

        # 3. Add SMALLER, DARKER text to the dashboard
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 0.45  # Small and crisp
        thickness = 1
        text_color = (60, 60, 60) # Dark Gray

        cv2.putText(dashboard, f"STEP: {step}", (15, 20),
                    font, font_scale, text_color, thickness, cv2.LINE_AA)
        cv2.putText(dashboard, f"CUMULATIVE REWARD: {reward}", (15, 45),
                    font, font_scale, text_color, thickness, cv2.LINE_AA)

        # 4. Stack them: Dashboard on TOP, Environment on BOTTOM
        combined_frame = np.vstack((dashboard, view))

        # 5. Write to file
        self.video_writer.write(combined_frame)

    def stop(self):
        if self.video_writer:
            self.video_writer.release()

        output_mp4 = self.filename.replace('.avi', '.mp4')
        # Final conversion for browser compatibility
        os.system(f"ffmpeg -y -i {self.filename} -c:v libx264 -pix_fmt yuv420p {output_mp4} -hide_banner -loglevel error")
        return output_mp4

In [None]:
import gymnasium as gym

# Initialize the environment
# 'rgb_array' is required so your VideoRecorder can "see" the frames
env = gym.make("LunarLander-v3", render_mode="rgb_array")

In [None]:
import gymnasium as gym
import cv2
import numpy as np
import os

# 1. Setup Environment
env = gym.make("LunarLander-v3", render_mode="rgb_array")

# 2. Setup Recorder with Lander Dimensions (600x400)
# We call .start() to prevent the NoneType error!
recorder = VideoRecorder(filename='lunar_test.avi', width=600, height=400, fps=30)
recorder.start()

observation, info = env.reset()
total_reward = 0
step_count = 0
terminated = False
truncated = False

print("üöÄ Pilot engaged. Recording 600x400 Lunar Flight...")

while not (terminated or truncated):
    action = env.action_space.sample()
    observation, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    step_count += 1

    # Get the frame
    frame_rgb = env.render()

    # Gymnasium (RGB) -> OpenCV (BGR)
    frame_bgr = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)

    # Use your HUD method
    recorder.record_frame_with_hud(frame_bgr, round(total_reward, 2), step_count)

# 3. Close everything
final_file=recorder.stop() # This runs the ffmpeg conversion too
env.close()

print(f"üèÅ Mission Complete. Video saved as lunar_test.mp4")

# If in Jupyter/Colab
from IPython.display import Video, display
display(Video(final_file, embed=True))

üöÄ Pilot engaged. Recording 600x400 Lunar Flight...
üèÅ Mission Complete. Video saved as lunar_test.mp4


# Heuristic Pilot -- no "learning" here

In [None]:
from gymnasium.envs.box2d.lunar_lander import heuristic

# 1. Setup Environment
env = gym.make("LunarLander-v3", render_mode="rgb_array")

# 2. Setup Recorder (Match your previous working settings)
recorder = VideoRecorder(filename='heuristic_landing.avi', width=600, height=400, fps=30)
recorder.start()

observation, info = env.reset()
total_reward = 0
step_count = 0
terminated = False
truncated = False

print("üöÄ Heuristic Pilot taking the controls...")

while not (terminated or truncated):
    # Instead of random sampling, we use the expert function
    action = heuristic(env.unwrapped, observation)

    observation, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    step_count += 1

    # Render and Record
    frame_rgb = env.render()
    frame_bgr = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)

    # Use your HUD to see the high score climb!
    recorder.record_frame_with_hud(frame_bgr, round(total_reward, 2), step_count)

final_file = recorder.stop()
env.close()

print(f"üèÅ Perfect Landing! Final Score: {total_reward:.2f}")

# If in Jupyter/Colab
from IPython.display import Video, display
display(Video(final_file, embed=True))

üöÄ Heuristic Pilot taking the controls...
üèÅ Perfect Landing! Final Score: 289.22


# Now Neural Networks

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque

# Define the Neural Network "Brain"
class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, action_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

# Initialize Brain
state_dim = 8
action_dim = 4
brain = DQN(state_dim, action_dim)
optimizer = optim.Adam(brain.parameters(), lr=0.001)
criterion = nn.MSELoss()

#Training an Agent

In [None]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.99    # Discount factor
        self.epsilon = 1.0   # Exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = DQN(state_size, action_size)
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        state = torch.FloatTensor(state).unsqueeze(0)
        act_values = self.model(state)
        return torch.argmax(act_values[0]).item()

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                next_state = torch.FloatTensor(next_state).unsqueeze(0)
                target = (reward + self.gamma * torch.max(self.model(next_state)[0]).item())

            state = torch.FloatTensor(state).unsqueeze(0)
            target_f = self.model(state)
            target_f[0][action] = target

            self.optimizer.zero_grad()
            loss = nn.MSELoss()(self.model(state), target_f)
            loss.backward()
            self.optimizer.step()

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

#agent = DQNAgent(8, 4)

In [None]:
import time as python_time

episodes = 500
batch_size = 32

for e in range(episodes):
    state, info = env.reset()
    state = np.reshape(state, [8])

    # Start recorder for the 1st and 50th episode
    if e == 0 or e == episodes-1:
        recorder = VideoRecorder(filename=f'dqn_flight_ep_{e}.avi', width=600, height=400)
        recorder.start()

    total_reward = 0
    for time in range(500):
        action = agent.act(state)
        next_state, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        next_state = np.reshape(next_state, [8])

        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward

        if e == 0 or e == 49:
            frame = cv2.cvtColor(env.render(), cv2.COLOR_RGB2BGR)
            recorder.record_frame_with_hud(frame, round(total_reward, 2), time)

        if done:
            print(f"Episode: {e}/{episodes}, Score: {total_reward:.2f}, Epsilon: {agent.epsilon:.2f}")
            break

    if e == 0 or e == episodes-1:
        recorder.stop()
        python_time.sleep(1)





    agent.replay(batch_size)

# If in Jupyter/Colab
#from IPython.display import Video, display
#display(Video(first_flight, embed=True))
#display(Video(last_flight, embed=True))

Episode: 121/500, Score: -171.05, Epsilon: 0.03
Episode: 122/500, Score: -179.58, Epsilon: 0.03
Episode: 123/500, Score: -169.41, Epsilon: 0.03
Episode: 129/500, Score: -81.88, Epsilon: 0.03
Episode: 175/500, Score: -183.47, Epsilon: 0.02
Episode: 178/500, Score: -157.52, Epsilon: 0.02
Episode: 204/500, Score: -132.80, Epsilon: 0.02
Episode: 205/500, Score: -153.59, Epsilon: 0.02
Episode: 207/500, Score: -132.66, Epsilon: 0.02
Episode: 211/500, Score: -167.12, Epsilon: 0.02
Episode: 212/500, Score: -124.84, Epsilon: 0.02
Episode: 215/500, Score: -83.34, Epsilon: 0.02
Episode: 226/500, Score: -144.53, Epsilon: 0.02
Episode: 229/500, Score: -137.75, Epsilon: 0.02
Episode: 233/500, Score: -123.86, Epsilon: 0.02
Episode: 237/500, Score: -120.89, Epsilon: 0.02
Episode: 238/500, Score: -137.43, Epsilon: 0.01
Episode: 239/500, Score: -106.42, Epsilon: 0.01
Episode: 242/500, Score: -103.12, Epsilon: 0.01
Episode: 243/500, Score: -122.33, Epsilon: 0.01
Episode: 246/500, Score: -121.00, Epsilon:

In [None]:
display(Video("/content/dqn_milestone_0.mp4", embed=True))
#display(Video(last_flight, embed=True))

In [None]:
#display(Video(first_flight, embed=True))
display(Video("/content/dqn_milestone_499.mp4", embed=True))

In [None]:
import numpy as np

episodes = 500
MAX_STEPS = 600 # Safety cap per flight

for e in range(episodes):
    state, info = env.reset()
    state = np.reshape(state, [8])
    total_reward = 0

    # 1. ONLY start the recorder on specific milestones
    should_record = (e == 0 or e == episodes - 1)

    if should_record:
        print(f"üé¨ Recording Milestone Episode: {e}")
        recorder = VideoRecorder(filename=f'dqn_milestone_{e}.avi', width=600, height=400)
        recorder.start()

    for step_count in range(MAX_STEPS):
        action = agent.act(state)
        observation, reward, terminated, truncated, info = env.step(action)

        done = terminated or truncated
        next_state = np.reshape(observation, [8])

        # Agent learns every step, even when not recording
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward

        # 2. ONLY record frames if we are in a milestone episode
        if should_record:
            frame_rgb = env.render()
            frame_bgr = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)
            recorder.record_frame_with_hud(frame_bgr, round(total_reward, 2), step_count)

        if done:
            break

    # 3. ONLY stop/convert the video if we were recording
    if should_record:
        recorder.stop()
        print(f"‚úÖ Video for Episode {e} saved.")

    # Experience Replay (The Brain Update)
    agent.replay(32)

    if e % 50 == 0:
        print(f"Episode: {e}/{episodes} | Score: {total_reward:.2f} | Epsilon: {agent.epsilon:.2f}")

env.close()

üé¨ Recording Milestone Episode: 0
‚úÖ Video for Episode 0 saved.
Episode: 0/500 | Score: -58.39 | Epsilon: 0.01
Episode: 50/500 | Score: -20.17 | Epsilon: 0.01
Episode: 100/500 | Score: -159.06 | Epsilon: 0.01
Episode: 150/500 | Score: -127.83 | Epsilon: 0.01
Episode: 200/500 | Score: -106.13 | Epsilon: 0.01
Episode: 250/500 | Score: -97.23 | Epsilon: 0.01
Episode: 300/500 | Score: -77.35 | Epsilon: 0.01
Episode: 350/500 | Score: -114.64 | Epsilon: 0.01
Episode: 400/500 | Score: -37.73 | Epsilon: 0.01
Episode: 450/500 | Score: -57.53 | Epsilon: 0.01
üé¨ Recording Milestone Episode: 499
‚úÖ Video for Episode 499 saved.


#Tuning to prevent hover

In [None]:
!rm dqn*.*

rm: cannot remove 'dqn*.*': No such file or directory


In [None]:
import numpy as np

agentY = DQNAgent(8, 4)

episodes = 500
#batch_size = 32
MAX_STEPS = 600 # Safety cap per flight

for e in range(episodes):
    state, info = env.reset()
    state = np.reshape(state, [8])
    total_reward = 0

    # 1. ONLY start the recorder on specific milestones
    should_record = (e == 0 or e == episodes - 1)

    if should_record:
        print(f"üé¨ Recording Milestone Episode: {e}")
        recorder = VideoRecorder(filename=f'dqn_milestone_{e}.avi', width=600, height=400)
        recorder.start()

    for step_count in range(MAX_STEPS):
        action = agentY.act(state)
        observation, reward, terminated, truncated, info = env.step(action)

        # Manually punish hovering too high (y is the 2nd element in the observation)
        y_pos = observation[1]
        if y_pos > 1.0:
            reward -= 2.0  # Apply a "stay on screen" penalty

        if action == 2: # Main Engine
            reward -= 0.5 # Increase the cost of firing the big engine

        done = terminated or truncated
        next_state = np.reshape(observation, [8])

        # Inside your loop, after the 'step_count' for loop ends:
        if not (terminated or truncated):
            # This means it timed out while hovering!
            reward -= 100
        #    agent.remember(state, action, reward, next_state, True)

        # Agent learns every step, even when not recording
        agentY.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward

        # 2. ONLY record frames if we are in a milestone episode
        if should_record:
            frame_rgb = env.render()
            frame_bgr = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2BGR)
            recorder.record_frame_with_hud(frame_bgr, round(total_reward, 2), step_count)

        if done:
            break

    # 3. ONLY stop/convert the video if we were recording
    if should_record:
        recorder.stop()
        print(f"‚úÖ Video for Episode {e} saved.")

    # Experience Replay (The Brain Update)
    agentY.replay(64)

    if e % 50 == 0:
        print(f"Episode: {e}/{episodes} | Score: {total_reward:.2f} | Epsilon: {agentY.epsilon:.2f}")

env.close()

üé¨ Recording Milestone Episode: 0
‚úÖ Video for Episode 0 saved.
Episode: 0/500 | Score: -11837.72 | Epsilon: 0.99
Episode: 50/500 | Score: -16969.22 | Epsilon: 0.77
Episode: 100/500 | Score: -5442.04 | Epsilon: 0.60
Episode: 150/500 | Score: -8324.97 | Epsilon: 0.47
Episode: 200/500 | Score: -7798.15 | Epsilon: 0.37
Episode: 250/500 | Score: -9830.41 | Epsilon: 0.28
Episode: 300/500 | Score: -6619.96 | Epsilon: 0.22
Episode: 350/500 | Score: -6941.40 | Epsilon: 0.17
Episode: 400/500 | Score: -6346.28 | Epsilon: 0.13
Episode: 450/500 | Score: -10344.96 | Epsilon: 0.10
üé¨ Recording Milestone Episode: 499
‚úÖ Video for Episode 499 saved.


In [None]:
#display(Video(first_flight, embed=True))
display(Video("/content/dqn_milestone_0.mp4", embed=True))
#display(Video(first_flight, embed=True))
display(Video("/content/dqn_milestone_499.mp4", embed=True))

#Chronobooks <br>
Three science fiction novels by Prithwis Mukerjee. A dystopian Earth. A technocratic society managed by artificial intelligence. Escape and epiphany on Mars. Can man and machine, carbon and silicon explore and escape into other dimensions of existence? An Indic perspective rooted in Advaita Vedanta and the Divine Feminine.  [More information](http://bit.ly/chrono3) <br>
![alt text](https://blogger.googleusercontent.com/img/a/AVvXsEjsZufX_KYaLwAnJP6bUxvDg5RSPn6r8HIZe749nLWX3RuwyshrYEAUpdw03a9WIWRdnzA9epwJOE05eDJ0Ad7kGyfWiUrC2vNuOskb2jA-e8aOZSx8YqzT8mfZi3E4X1Rz3qlEAiv-aTxlCM976BEeTjx4J64ctY3C_FoV4v9aY_U23F8xRqI5Eg=s1600)