<a href="https://colab.research.google.com/github/sarah-mokhtar/RL-Project-2048/blob/main/2048_RLPPO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install stable-baselines3[extra] gymnasium numpy


Collecting stable-baselines3[extra]
  Downloading stable_baselines3-2.7.0-py3-none-any.whl.metadata (4.8 kB)
Downloading stable_baselines3-2.7.0-py3-none-any.whl (187 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m187.2/187.2 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: stable-baselines3
Successfully installed stable-baselines3-2.7.0


In [40]:
import numpy as np
import gymnasium as gym
from gymnasium import spaces


class Game2048Env(gym.Env):


    metadata = {"render_modes": ["ansi"], "render_fps": 60}

    def __init__(self, render_mode=None, target_tile=2048):
        super().__init__()

        self.board_size = 4
        self.target_tile = target_tile
        self.observation_space = spaces.Box(
            low=0,
            high=15,
            shape=(self.board_size, self.board_size),
            dtype=np.int32,
        )
        self.action_space = spaces.Discrete(4)

        self.render_mode = render_mode
        self.board = np.zeros((self.board_size, self.board_size), dtype=np.int32)
        self.score = 0
        self.rng = np.random.default_rng()

    def _slide_and_merge_line(self, line):
        """
        line: 1D np.array of exponents (0 = empty)
        Returns: (new_line, reward_from_merges)
        """
        non_zero = line[line != 0].tolist()
        new = []
        reward = 0
        i = 0
        while i < len(non_zero):
            if i + 1 < len(non_zero) and non_zero[i] == non_zero[i + 1]:
                exp = non_zero[i] + 1
                new.append(exp)
                reward += 2 ** exp
                i += 2
            else:
                new.append(non_zero[i])
                i += 1
        # pad with zeros
        new += [0] * (len(line) - len(new))
        return np.array(new, dtype=np.int32), reward

    def _add_random_tile(self):
        empty_positions = list(zip(*np.where(self.board == 0)))
        if not empty_positions:
            return
        row, col = empty_positions[self.rng.integers(len(empty_positions))]

        if self.rng.random() < 0.9:
            self.board[row, col] = 1
        else:
            self.board[row, col] = 2

    def _can_move(self):
        # If any cell empty -> can move
        if np.any(self.board == 0):
            return True
        # If any horizontal merge possible
        for i in range(self.board_size):
            for j in range(self.board_size - 1):
                if self.board[i, j] == self.board[i, j + 1]:
                    return True
        # If any vertical merge possible
        for j in range(self.board_size):
            for i in range(self.board_size - 1):
                if self.board[i, j] == self.board[i + 1, j]:
                    return True
        return False

    def _get_max_tile(self):
        exp = int(self.board.max())
        return 0 if exp == 0 else 2 ** exp

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        if seed is not None:
            self.rng = np.random.default_rng(seed)

        self.board[:] = 0
        self.score = 0

        self._add_random_tile()
        self._add_random_tile()

        observation = self.board.copy()
        info = {"score": self.score, "max_tile": self._get_max_tile()}
        return observation, info

    def step(self, action):
        assert self.action_space.contains(action), "Invalid action"

        old_board = self.board.copy()
        reward = 0


        if action == 0:  # up
            for col in range(self.board_size):
                line = self.board[:, col]
                new_line, r = self._slide_and_merge_line(line)
                self.board[:, col] = new_line
                reward += r
        elif action == 1:  # down
            for col in range(self.board_size):
                line = self.board[:, col][::-1]
                new_line, r = self._slide_and_merge_line(line)
                self.board[:, col] = new_line[::-1]
                reward += r
        elif action == 2:  # left
            for row in range(self.board_size):
                line = self.board[row, :]
                new_line, r = self._slide_and_merge_line(line)
                self.board[row, :] = new_line
                reward += r
        elif action == 3:  # right
            for row in range(self.board_size):
                line = self.board[row, :][::-1]
                new_line, r = self._slide_and_merge_line(line)
                self.board[row, :] = new_line[::-1]
                reward += r

        moved = not np.array_equal(old_board, self.board)

        if not moved:
            reward -= 1.0
        else:
            self._add_random_tile()

        self.score += reward

        max_tile = self._get_max_tile()
        terminated = False
        if not self._can_move():
            terminated = True
        if max_tile >= self.target_tile:
            terminated = True

        truncated = False

        observation = self.board.copy()
        info = {"score": self.score, "max_tile": max_tile}

        return observation, reward, terminated, truncated, info

    def render(self):
        if self.render_mode == "ansi":
            return self._board_to_string()
        else:
            print(self._board_to_string())

    def _board_to_string(self):
        display = []
        for row in self.board:
            display_row = []
            for exp in row:
                if exp == 0:
                    display_row.append(".")
                else:
                    display_row.append(str(2 ** int(exp)))
            display.append("\t".join(display_row))
        return "\n".join(display)


In [44]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv

# We already defined Game2048Env above


def make_env():
    def _init():
        env = Game2048Env()
        return env
    return _init


# Create one env to check API
env = Game2048Env()
check_env(env, warn=True)

# Vectorized env for PPO
vec_env = DummyVecEnv([make_env()])

model = PPO(
    "MlpPolicy",
    vec_env,
    verbose=1,
    learning_rate=3e-4,
    n_steps=2048,
    batch_size=256,
    gamma=0.99,
    gae_lambda=0.95,
    clip_range=0.2,
    ent_coef=0.01,
)

# üîÅ Training ‚Äì you can increase timesteps later
model.learn(total_timesteps=2000000)

model.save("ppo_2048")
print("Model saved as ppo_2048.zip")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
|    total_timesteps      | 1433600      |
| train/                  |              |
|    approx_kl            | 0.0018848357 |
|    clip_fraction        | 0.0085       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.546       |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | 9.44e+03     |
|    n_updates            | 6990         |
|    policy_gradient_loss | -0.00285     |
|    value_loss           | 2.21e+04     |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 584          |
|    iterations           | 701          |
|    time_elapsed         | 2457         |
|    total_timesteps      | 1435648      |
| train/                  |              |
|    approx_kl            | 0.0014606611 |
|    clip_fraction        | 0.00

In [45]:
import time
from stable_baselines3 import PPO

# Load model
model = PPO.load("ppo_2048")

env = Game2048Env(render_mode="ansi")

obs, info = env.reset()
done = False
step = 0

print("Initial board:")
print(env._board_to_string())

while not done:
    # stochastic actions so you can see exploration / variability
    action, _ = model.predict(obs, deterministic=False)
    obs, reward, terminated, truncated, info = env.step(action)
    done = terminated or truncated

    print(f"\nStep {step}, action={action}, reward={reward}, score={info['score']}")
    print(env._board_to_string())
    time.sleep(0.1)
    step += 1

print("\nEpisode finished.")
print(f"Final score: {info['score']}, max tile: {info['max_tile']}")


Initial board:
.	.	.	.
2	.	.	2
.	.	.	.
.	.	.	.

Step 0, action=0, reward=0, score=0
2	2	.	2
.	.	.	.
.	.	.	.
.	.	.	.

Step 1, action=3, reward=4, score=4
.	.	2	4
.	.	.	.
.	.	2	.
.	.	.	.

Step 2, action=1, reward=4, score=8
.	.	.	.
.	.	.	.
.	2	.	.
.	.	4	4

Step 3, action=0, reward=0, score=8
.	2	4	4
.	.	.	.
.	2	.	.
.	.	.	.

Step 4, action=3, reward=8, score=16
.	.	2	8
.	.	.	.
2	.	.	2
.	.	.	.

Step 5, action=3, reward=4, score=20
.	.	2	8
2	.	.	.
.	.	.	4
.	.	.	.

Step 6, action=1, reward=0, score=20
4	.	.	.
.	.	.	.
.	.	.	8
2	.	2	4

Step 7, action=0, reward=0, score=20
4	.	2	8
2	.	.	4
.	.	.	.
2	.	.	.

Step 8, action=1, reward=4, score=24
.	.	.	.
.	.	.	2
4	.	.	8
4	.	2	4

Step 9, action=2, reward=0, score=24
.	.	.	.
2	2	.	.
4	8	.	.
4	2	4	.

Step 10, action=0, reward=8, score=32
2	2	4	.
8	8	.	.
.	2	.	2
.	.	.	.

Step 11, action=3, reward=24, score=56
.	.	4	4
.	.	.	16
.	.	.	4
2	.	.	.

Step 12, action=1, reward=0, score=56
.	.	.	.
.	.	.	4
.	.	2	16
2	.	4	4

Step 13, action=2, reward=8, score=64
2	

In [1]:
!git config --global user.email "sarah04@mit.edu"
!git config --global user.name "sarah-mokhtar"


In [18]:
!git clone https://github.com/sarah-mokhtar/RL-Project-2048.git



fatal: destination path 'RL-Project-2048' already exists and is not an empty directory.


In [19]:
/content/RL-Project-2048


  content/RL-Project-2048()


NameError: name 'content' is not defined

In [16]:

!cp 2048\ RL.ipynb /content/RL-Project-2048/




cp: cannot stat '2048 RL.ipynb': No such file or directory


In [9]:
%cd RL-Project-2048

!git add .
!git commit -m "PPO"
!git push


[Errno 2] No such file or directory: 'RL-Project-2048'
/content/RL-Project-2048
On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean
fatal: could not read Username for 'https://github.com': No such device or address


In [36]:
# Clone a team/organization repo

from getpass import getpass

# Get your Personal Access Token
print("Get token from: https://github.com/settings/tokens")
print("Make sure 'repo' scope is checked!")
token = getpass('Paste your GitHub Personal Access Token: ')

# Team/Organization repo details
org_or_username = "team-name-or-org"  # The organization/team name
repo_name = "repo-name"  # The repository name

# Clone with authentication
!git clone https://{token}@github.com/{org_or_username}/{repo_name}.git

# Navigate into repo
%cd {repo_name}

# Configure your git identity (important for team repos)
!git config user.email "your-email@example.com"
!git config user.name "Your Name"

print("‚úÖ Team repo cloned successfully!")

In [37]:
!git clone https://github.com/Ali-Backour/2048_RL.git

Cloning into '2048_RL'...
fatal: could not read Username for 'https://github.com': No such device or address
