From a7cfdbfcec6964ac04d7541b81013d00bd0b193c Mon Sep 17 00:00:00 2001 From: Sanyam Bhutani Date: Tue, 21 Oct 2025 11:10:20 -0700 Subject: [PATCH] Update OpenEnv_Tutorial.ipynb --- examples/OpenEnv_Tutorial.ipynb | 664 ++++++++++++++++---------------- 1 file changed, 332 insertions(+), 332 deletions(-) diff --git a/examples/OpenEnv_Tutorial.ipynb b/examples/OpenEnv_Tutorial.ipynb index 894d8648..f875bcb2 100644 --- a/examples/OpenEnv_Tutorial.ipynb +++ b/examples/OpenEnv_Tutorial.ipynb @@ -7,19 +7,19 @@ "source": [ "
\n", "\n", - "\"PyTorch\"\n", + "\"PyTorch\"\n", "\n", "\n", "\n", "# OpenEnv: Production RL Made Simple\n", "\n", - "### *From \"Hello World\" to RL Training in 5 Minutes* ✨\n", + "### *From \"Hello World\" to RL Training in 5 Minutes* \u2728\n", "\n", "---\n", "\n", "**What if RL environments were as easy to use as REST APIs?**\n", "\n", - "That's OpenEnv. Type-safe. Isolated. Production-ready. šŸŽÆ\n", + "That's OpenEnv. Type-safe. Isolated. Production-ready. \ud83c\udfaf\n", "\n", "[![GitHub](https://img.shields.io/badge/GitHub-meta--pytorch%2FOpenEnv-blue?logo=github)](https://github.com/meta-pytorch/OpenEnv)\n", "[![License](https://img.shields.io/badge/License-BSD%203--Clause-green.svg)](https://opensource.org/licenses/BSD-3-Clause)\n", @@ -68,50 +68,50 @@ "id": "cell-1", "metadata": {}, "source": [ - "## šŸ“‹ What You'll Learn\n", + "## \ud83d\udccb What You'll Learn\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
\n", "\n", - "**šŸŽÆ Part 1-2: The Fundamentals**\n", - "- ⚔ RL in 60 seconds\n", - "- šŸ¤” Why existing solutions fall short\n", - "- šŸ’” The OpenEnv solution\n", + "**\ud83c\udfaf Part 1-2: The Fundamentals**\n", + "- \u26a1 RL in 60 seconds\n", + "- \ud83e\udd14 Why existing solutions fall short\n", + "- \ud83d\udca1 The OpenEnv solution\n", "\n", "\n", "\n", - "**šŸ—ļø Part 3-5: The Architecture**\n", - "- šŸ”§ How OpenEnv works\n", - "- šŸ” Exploring real code\n", - "- šŸŽ® OpenSpiel integration example\n", + "**\ud83c\udfd7\ufe0f Part 3-5: The Architecture**\n", + "- \ud83d\udd27 How OpenEnv works\n", + "- \ud83d\udd0d Exploring real code\n", + "- \ud83c\udfae OpenSpiel integration example\n", "\n", "
\n", "\n", - "**šŸŽ® Part 6-8: Hands-On Demo**\n", - "- šŸ”Œ Use existing OpenSpiel environment\n", - "- šŸ¤– Test 4 different policies\n", - "- šŸ‘€ Watch learning happen live\n", + "**\ud83c\udfae Part 6-8: Hands-On Demo**\n", + "- \ud83d\udd0c Use existing OpenSpiel environment\n", + "- \ud83e\udd16 Test 4 different policies\n", + "- \ud83d\udc40 Watch learning happen live\n", "\n", "\n", "\n", - "**šŸ”§ Part 9-10: Going Further**\n", - "- šŸŽ® Switch to other OpenSpiel games\n", - "- ✨ Build your own integration\n", - "- 🌐 Deploy to production\n", + "**\ud83d\udd27 Part 9-10: Going Further**\n", + "- \ud83c\udfae Switch to other OpenSpiel games\n", + "- \u2728 Build your own integration\n", + "- \ud83c\udf10 Deploy to production\n", "\n", "
\n", "\n", - "> šŸ’” **Pro Tip**: This notebook is designed to run top-to-bottom in Google Colab with zero setup!\n", + "> \ud83d\udca1 **Pro Tip**: This notebook is designed to run top-to-bottom in Google Colab with zero setup!\n", ">\n", - "> ā±ļø **Time**: ~5 minutes | šŸ“Š **Difficulty**: Beginner-friendly | šŸŽÆ **Outcome**: Production-ready RL knowledge\n" + "> \u23f1\ufe0f **Time**: ~5 minutes | \ud83d\udcca **Difficulty**: Beginner-friendly | \ud83c\udfaf **Outcome**: Production-ready RL knowledge\n" ] }, { @@ -121,33 +121,33 @@ "source": [ "---\n", "\n", - "## šŸ“‘ Table of Contents\n", + "## \ud83d\udcd1 Table of Contents\n", "\n", "
\n", "\n", - "**Quick Navigation** - Click any section to jump right there! šŸŽÆ\n", + "**Quick Navigation** - Click any section to jump right there! \ud83c\udfaf\n", "\n", "### Foundation\n", - "- [Part 1: RL in 60 Seconds ā±ļø](#part-1)\n", - "- [Part 2: The Problem with Traditional RL 😤](#part-2)\n", - "- [Part 3: Setup šŸ› ļø](#part-3)\n", + "- [Part 1: RL in 60 Seconds \u23f1\ufe0f](#part-1)\n", + "- [Part 2: The Problem with Traditional RL \ud83d\ude24](#part-2)\n", + "- [Part 3: Setup \ud83d\udee0\ufe0f](#part-3)\n", "\n", "### Architecture\n", - "- [Part 4: The OpenEnv Pattern šŸ—ļø](#part-4)\n", - "- [Part 5: Example Integration - OpenSpiel šŸŽ®](#part-5)\n", + "- [Part 4: The OpenEnv Pattern \ud83c\udfd7\ufe0f](#part-4)\n", + "- [Part 5: Example Integration - OpenSpiel \ud83c\udfae](#part-5)\n", "\n", "### Hands-On Demo\n", - "- [Part 6: Interactive Demo šŸŽ®](#part-6)\n", - "- [Part 7: Four Policies šŸ¤–](#part-7)\n", - "- [Part 8: Policy Competition! šŸ†](#part-8)\n", + "- [Part 6: Interactive Demo \ud83c\udfae](#part-6)\n", + "- [Part 7: Four Policies \ud83e\udd16](#part-7)\n", + "- [Part 8: Policy Competition! \ud83c\udfc6](#part-8)\n", "\n", "### Advanced\n", - "- [Part 9: Using Real OpenSpiel šŸŽ®](#part-9)\n", - "- [Part 10: Create Your Own Integration šŸ› ļø](#part-10)\n", + "- [Part 9: Using Real OpenSpiel \ud83c\udfae](#part-9)\n", + "- [Part 10: Create Your Own Integration \ud83d\udee0\ufe0f](#part-10)\n", "\n", "### Wrap Up\n", - "- [Summary: Your Journey šŸŽ“](#summary)\n", - "- [Resources šŸ“š](#resources)\n", + "- [Summary: Your Journey \ud83c\udf93](#summary)\n", + "- [Resources \ud83d\udcda](#resources)\n", "\n", "
\n", "\n", @@ -161,7 +161,7 @@ "source": [ "---\n", "\n", - "# Part 1: RL in 60 Seconds ā±ļø\n", + "# Part 1: RL in 60 Seconds \u23f1\ufe0f\n", "\n", "
\n", "\n", @@ -193,16 +193,16 @@ "source": [ "import random\n", "\n", - "print(\"šŸŽ² \" + \"=\"*58 + \" šŸŽ²\")\n", + "print(\"\ud83c\udfb2 \" + \"=\"*58 + \" \ud83c\udfb2\")\n", "print(\" Number Guessing Game - The Simplest RL Example\")\n", - "print(\"šŸŽ² \" + \"=\"*58 + \" šŸŽ²\")\n", + "print(\"\ud83c\udfb2 \" + \"=\"*58 + \" \ud83c\udfb2\")\n", "\n", "# Environment setup\n", "target = random.randint(1, 10)\n", "guesses_left = 3\n", "\n", - "print(f\"\\nšŸŽÆ I'm thinking of a number between 1 and 10...\")\n", - "print(f\"šŸ’­ You have {guesses_left} guesses. Let's see how random guessing works!\\n\")\n", + "print(f\"\\n\ud83c\udfaf I'm thinking of a number between 1 and 10...\")\n", + "print(f\"\ud83d\udcad You have {guesses_left} guesses. Let's see how random guessing works!\\n\")\n", "\n", "# The RL Loop - Pure random policy (no learning!)\n", "while guesses_left > 0:\n", @@ -210,21 +210,21 @@ " guess = random.randint(1, 10)\n", " guesses_left -= 1\n", " \n", - " print(f\"šŸ’­ Guess #{3-guesses_left}: {guess}\", end=\" → \")\n", + " print(f\"\ud83d\udcad Guess #{3-guesses_left}: {guess}\", end=\" \u2192 \")\n", " \n", " # Reward signal (but we're not using it!)\n", " if guess == target:\n", - " print(\"šŸŽ‰ Correct! +10 points\")\n", + " print(\"\ud83c\udf89 Correct! +10 points\")\n", " break\n", " elif abs(guess - target) <= 2:\n", - " print(\"šŸ”„ Warm! (close)\")\n", + " print(\"\ud83d\udd25 Warm! (close)\")\n", " else:\n", - " print(\"ā„ļø Cold! (far)\")\n", + " print(\"\u2744\ufe0f Cold! (far)\")\n", "else:\n", - " print(f\"\\nšŸ’” Out of guesses. The number was {target}.\")\n", + " print(f\"\\n\ud83d\udc94 Out of guesses. The number was {target}.\")\n", "\n", "print(\"\\n\" + \"=\"*62)\n", - "print(\"šŸ’” This is RL: Observe → Act → Reward → Repeat\")\n", + "print(\"\ud83d\udca1 This is RL: Observe \u2192 Act \u2192 Reward \u2192 Repeat\")\n", "print(\" But this policy is terrible! It doesn't learn from rewards.\")\n", "print(\"=\"*62 + \"\\n\")" ] @@ -237,11 +237,11 @@ "---\n", "\n", "\n", - "# Part 2: The Problem with Traditional RL 😤\n", + "# Part 2: The Problem with Traditional RL \ud83d\ude24\n", "\n", "
\n", "\n", - "## šŸ¤” Why Can't We Just Use OpenAI Gym?\n", + "## \ud83e\udd14 Why Can't We Just Use OpenAI Gym?\n", "\n", "Good question! Gym is great for research, but production needs more...\n", "\n", @@ -255,50 +255,50 @@ "\n", "\n", "Type Safety\n", - "āŒ obs[0][3] - what is this?\n", - "āœ… obs.info_state - IDE knows!\n", + "\u274c obs[0][3] - what is this?\n", + "\u2705 obs.info_state - IDE knows!\n", "\n", "\n", "Isolation\n", - "āŒ Same process (can crash your training)\n", - "āœ… Docker containers (fully isolated)\n", + "\u274c Same process (can crash your training)\n", + "\u2705 Docker containers (fully isolated)\n", "\n", "\n", "Deployment\n", - "āŒ \"Works on my machine\" 🤷\n", - "āœ… Same container everywhere 🐳\n", + "\u274c \"Works on my machine\" \ud83e\udd37\n", + "\u2705 Same container everywhere \ud83d\udc33\n", "\n", "\n", "Scaling\n", - "āŒ Hard to distribute\n", - "āœ… Deploy to Kubernetes ā˜øļø\n", + "\u274c Hard to distribute\n", + "\u2705 Deploy to Kubernetes \u2638\ufe0f\n", "\n", "\n", "Language\n", - "āŒ Python only\n", - "āœ… Any language (HTTP API) 🌐\n", + "\u274c Python only\n", + "\u2705 Any language (HTTP API) \ud83c\udf10\n", "\n", "\n", "Debugging\n", - "āŒ Cryptic numpy errors\n", - "āœ… Clear type errors šŸ›\n", + "\u274c Cryptic numpy errors\n", + "\u2705 Clear type errors \ud83d\udc1b\n", "\n", "\n", "\n", "
\n", "\n", - "## šŸ’” The OpenEnv Philosophy\n", + "## \ud83d\udca1 The OpenEnv Philosophy\n", "\n", "**\"RL environments should be like microservices\"**\n", "\n", "Think of it like this: You don't run your database in the same process as your web server, right? Same principle!\n", "\n", - "- šŸ”’ **Isolated**: Run in containers (security + stability)\n", - "- 🌐 **Standard**: HTTP API, works everywhere\n", - "- šŸ“¦ **Versioned**: Docker images (reproducibility!)\n", - "- šŸš€ **Scalable**: Deploy to cloud with one command\n", - "- šŸ›”ļø **Type-safe**: Catch bugs before they happen\n", - "- šŸ”„ **Portable**: Works on Mac, Linux, Windows, Cloud\n", + "- \ud83d\udd12 **Isolated**: Run in containers (security + stability)\n", + "- \ud83c\udf10 **Standard**: HTTP API, works everywhere\n", + "- \ud83d\udce6 **Versioned**: Docker images (reproducibility!)\n", + "- \ud83d\ude80 **Scalable**: Deploy to cloud with one command\n", + "- \ud83d\udee1\ufe0f **Type-safe**: Catch bugs before they happen\n", + "- \ud83d\udd04 **Portable**: Works on Mac, Linux, Windows, Cloud\n", "\n", "
" ] @@ -311,34 +311,34 @@ "### The Architecture\n", "\n", "```\n", - "ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”\n", - "│ YOUR TRAINING CODE │\n", - "│ │\n", - "│ env = OpenSpielEnv(...) ← Import the client │\n", - "│ result = env.reset() ← Type-safe! │\n", - "│ result = env.step(action) ← Type-safe! │\n", - "│ │\n", - "ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¬ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜\n", - " │\n", - " │ HTTP/JSON (Language-Agnostic)\n", - " │ POST /reset, POST /step, GET /state\n", - " │\n", - "ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā–¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”\n", - "│ DOCKER CONTAINER │\n", - "│ │\n", - "│ ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” │\n", - "│ │ FastAPI Server │ │\n", - "│ │ └─ Environment (reset, step, state) │ │\n", - "│ │ └─ Your Game/Simulation Logic │ │\n", - "│ ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ │\n", - "│ │\n", - "│ Isolated • Reproducible • Secure │\n", - "ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜\n", + "\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n", + "\u2502 YOUR TRAINING CODE \u2502\n", + "\u2502 \u2502\n", + "\u2502 env = OpenSpielEnv(...) \u2190 Import the client \u2502\n", + "\u2502 result = env.reset() \u2190 Type-safe! \u2502\n", + "\u2502 result = env.step(action) \u2190 Type-safe! \u2502\n", + "\u2502 \u2502\n", + "\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n", + " \u2502\n", + " \u2502 HTTP/JSON (Language-Agnostic)\n", + " \u2502 POST /reset, POST /step, GET /state\n", + " \u2502\n", + "\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u25bc\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n", + "\u2502 DOCKER CONTAINER \u2502\n", + "\u2502 \u2502\n", + "\u2502 \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510 \u2502\n", + "\u2502 \u2502 FastAPI Server \u2502 \u2502\n", + "\u2502 \u2502 \u2514\u2500 Environment (reset, step, state) \u2502 \u2502\n", + "\u2502 \u2502 \u2514\u2500 Your Game/Simulation Logic \u2502 \u2502\n", + "\u2502 \u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518 \u2502\n", + "\u2502 \u2502\n", + "\u2502 Isolated \u2022 Reproducible \u2022 Secure \u2502\n", + "\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n", "```\n", "\n", "
\n", "\n", - "**šŸŽÆ Key Insight**: You never see HTTP details - just clean Python methods!\n", + "**\ud83c\udfaf Key Insight**: You never see HTTP details - just clean Python methods!\n", "\n", "```python\n", "env.reset() # Under the hood: HTTP POST to /reset\n", @@ -346,7 +346,7 @@ "env.state() # Under the hood: HTTP GET to /state\n", "```\n", "\n", - "The magic? OpenEnv handles all the plumbing. You focus on RL! ✨\n", + "The magic? OpenEnv handles all the plumbing. You focus on RL! \u2728\n", "\n", "
" ] @@ -358,7 +358,7 @@ "source": [ "---\n", "\n", - "# Part 3: Setup šŸ› ļø\n", + "# Part 3: Setup \ud83d\udee0\ufe0f\n", "\n", "
\n", "\n", @@ -380,30 +380,30 @@ "try:\n", " import google.colab\n", " IN_COLAB = True\n", - " print(\"🌐 Running in Google Colab - Perfect!\")\n", + " print(\"\ud83c\udf10 Running in Google Colab - Perfect!\")\n", "except ImportError:\n", " IN_COLAB = False\n", - " print(\"šŸ’» Running locally - Nice!\")\n", + " print(\"\ud83d\udcbb Running locally - Nice!\")\n", "\n", "if IN_COLAB:\n", - " print(\"\\nšŸ“¦ Cloning OpenEnv repository...\")\n", + " print(\"\\n\ud83d\udce6 Cloning OpenEnv repository...\")\n", " !git clone https://github.com/meta-pytorch/OpenEnv.git > /dev/null 2>&1\n", " %cd OpenEnv\n", " \n", - " print(\"šŸ“š Installing dependencies (this takes ~10 seconds)...\")\n", + " print(\"\ud83d\udcda Installing dependencies (this takes ~10 seconds)...\")\n", " !pip install -q fastapi uvicorn requests\n", " \n", " import sys\n", " sys.path.insert(0, './src')\n", - " print(\"\\nāœ… Setup complete! Everything is ready to go! šŸŽ‰\")\n", + " print(\"\\n\u2705 Setup complete! Everything is ready to go! \ud83c\udf89\")\n", "else:\n", " import sys\n", " from pathlib import Path\n", " sys.path.insert(0, str(Path.cwd().parent / 'src'))\n", - " print(\"āœ… Using local OpenEnv installation\")\n", + " print(\"\u2705 Using local OpenEnv installation\")\n", "\n", - "print(\"\\nšŸš€ Ready to explore OpenEnv and build amazing things!\")\n", - "print(\"šŸ’” Tip: Run cells top-to-bottom for the best experience.\\n\")" + "print(\"\\n\ud83d\ude80 Ready to explore OpenEnv and build amazing things!\")\n", + "print(\"\ud83d\udca1 Tip: Run cells top-to-bottom for the best experience.\\n\")" ] }, { @@ -414,7 +414,7 @@ "---\n", "\n", "\n", - "# Part 4: The OpenEnv Pattern šŸ—ļø\n", + "# Part 4: The OpenEnv Pattern \ud83c\udfd7\ufe0f\n", "\n", "
\n", "\n", @@ -422,16 +422,16 @@ "\n", "```\n", "src/envs/your_env/\n", - "ā”œā”€ā”€ šŸ“ models.py ← Type-safe contracts\n", - "│ (Action, Observation, State)\n", - "│\n", - "ā”œā”€ā”€ šŸ“± client.py ← What YOU import\n", - "│ (HTTPEnvClient implementation)\n", - "│\n", - "└── šŸ–„ļø server/\n", - " ā”œā”€ā”€ environment.py ← Game/simulation logic\n", - " ā”œā”€ā”€ app.py ← FastAPI server\n", - " └── Dockerfile ← Container definition\n", + "\u251c\u2500\u2500 \ud83d\udcdd models.py \u2190 Type-safe contracts\n", + "\u2502 (Action, Observation, State)\n", + "\u2502\n", + "\u251c\u2500\u2500 \ud83d\udcf1 client.py \u2190 What YOU import\n", + "\u2502 (HTTPEnvClient implementation)\n", + "\u2502\n", + "\u2514\u2500\u2500 \ud83d\udda5\ufe0f server/\n", + " \u251c\u2500\u2500 environment.py \u2190 Game/simulation logic\n", + " \u251c\u2500\u2500 app.py \u2190 FastAPI server\n", + " \u2514\u2500\u2500 Dockerfile \u2190 Container definition\n", "```\n", "\n", "
\n", @@ -451,11 +451,11 @@ "from core.http_env_client import HTTPEnvClient\n", "\n", "print(\"=\"*70)\n", - "print(\" 🧩 OPENENV CORE ABSTRACTIONS\")\n", + "print(\" \ud83e\udde9 OPENENV CORE ABSTRACTIONS\")\n", "print(\"=\"*70)\n", "\n", "print(\"\"\"\n", - "šŸ–„ļø SERVER SIDE (runs in Docker):\n", + "\ud83d\udda5\ufe0f SERVER SIDE (runs in Docker):\n", "\n", " class Environment(ABC):\n", " '''Base class for all environment implementations'''\n", @@ -472,7 +472,7 @@ " def state(self) -> State:\n", " '''Get episode metadata'''\n", "\n", - "šŸ“± CLIENT SIDE (your training code):\n", + "\ud83d\udcf1 CLIENT SIDE (your training code):\n", "\n", " class HTTPEnvClient(ABC):\n", " '''Base class for HTTP clients'''\n", @@ -488,8 +488,8 @@ "\"\"\")\n", "\n", "print(\"=\"*70)\n", - "print(\"\\n✨ Same interface on both sides - communication via HTTP!\")\n", - "print(\"šŸŽÆ You focus on RL, OpenEnv handles the infrastructure.\\n\")" + "print(\"\\n\u2728 Same interface on both sides - communication via HTTP!\")\n", + "print(\"\ud83c\udfaf You focus on RL, OpenEnv handles the infrastructure.\\n\")" ] }, { @@ -499,7 +499,7 @@ "source": [ "---\n", "\n", - "# Part 5: Example Integration - OpenSpiel šŸŽ®\n", + "# Part 5: Example Integration - OpenSpiel \ud83c\udfae\n", "\n", "
\n", "\n", @@ -515,7 +515,7 @@ "\n", "\n", "\n", - "**šŸŽÆ Single-Player**\n", + "**\ud83c\udfaf Single-Player**\n", "1. **Catch** - Catch falling ball\n", "2. **Cliff Walking** - Navigate grid\n", "3. **2048** - Tile puzzle\n", @@ -524,8 +524,8 @@ "\n", "\n", "\n", - "**šŸ‘„ Multi-Player**\n", - "5. **Tic-Tac-Toe** - Classic 3Ɨ3\n", + "**\ud83d\udc65 Multi-Player**\n", + "5. **Tic-Tac-Toe** - Classic 3\u00d73\n", "6. **Kuhn Poker** - Imperfect info poker\n", "\n", "\n", @@ -547,7 +547,7 @@ "from envs.openspiel_env.client import OpenSpielEnv\n", "\n", "print(\"=\"*70)\n", - "print(\" šŸ”Œ HOW OPENENV WRAPS OPENSPIEL\")\n", + "print(\" \ud83d\udd0c HOW OPENENV WRAPS OPENSPIEL\")\n", "print(\"=\"*70)\n", "\n", "print(\"\"\"\n", @@ -570,8 +570,8 @@ "\n", "\"\"\")\n", "\n", - "print(\"─\" * 70)\n", - "print(\"\\n✨ Usage (works for ALL OpenEnv environments):\")\n", + "print(\"\u2500\" * 70)\n", + "print(\"\\n\u2728 Usage (works for ALL OpenEnv environments):\")\n", "print(\"\"\"\n", " env = OpenSpielEnv(base_url=\"http://localhost:8000\")\n", " \n", @@ -585,8 +585,8 @@ " # Returns OpenSpielState\n", "\"\"\")\n", "\n", - "print(\"─\" * 70)\n", - "print(\"\\nšŸŽÆ This pattern works for ANY environment you want to wrap!\\n\")" + "print(\"\u2500\" * 70)\n", + "print(\"\\n\ud83c\udfaf This pattern works for ANY environment you want to wrap!\\n\")" ] }, { @@ -605,30 +605,30 @@ "from dataclasses import fields\n", "\n", "print(\"=\"*70)\n", - "print(\" šŸŽ® OPENSPIEL INTEGRATION - TYPE-SAFE MODELS\")\n", + "print(\" \ud83c\udfae OPENSPIEL INTEGRATION - TYPE-SAFE MODELS\")\n", "print(\"=\"*70)\n", "\n", - "print(\"\\nšŸ“¤ OpenSpielAction (what you send):\")\n", - "print(\" \" + \"─\" * 64)\n", + "print(\"\\n\ud83d\udce4 OpenSpielAction (what you send):\")\n", + "print(\" \" + \"\u2500\" * 64)\n", "for field in fields(OpenSpielAction):\n", - " print(f\" • {field.name:20s} : {field.type}\")\n", + " print(f\" \u2022 {field.name:20s} : {field.type}\")\n", "\n", - "print(\"\\nšŸ“„ OpenSpielObservation (what you receive):\")\n", - "print(\" \" + \"─\" * 64)\n", + "print(\"\\n\ud83d\udce5 OpenSpielObservation (what you receive):\")\n", + "print(\" \" + \"\u2500\" * 64)\n", "for field in fields(OpenSpielObservation):\n", - " print(f\" • {field.name:20s} : {field.type}\")\n", + " print(f\" \u2022 {field.name:20s} : {field.type}\")\n", "\n", - "print(\"\\nšŸ“Š OpenSpielState (episode metadata):\")\n", - "print(\" \" + \"─\" * 64)\n", + "print(\"\\n\ud83d\udcca OpenSpielState (episode metadata):\")\n", + "print(\" \" + \"\u2500\" * 64)\n", "for field in fields(OpenSpielState):\n", - " print(f\" • {field.name:20s} : {field.type}\")\n", + " print(f\" \u2022 {field.name:20s} : {field.type}\")\n", "\n", "print(\"\\n\" + \"=\"*70)\n", - "print(\"\\nšŸ’” Type safety means:\")\n", - "print(\" āœ… Your IDE autocompletes these fields\")\n", - "print(\" āœ… Typos are caught before running\")\n", - "print(\" āœ… Refactoring is safe\")\n", - "print(\" āœ… Self-documenting code\\n\")" + "print(\"\\n\ud83d\udca1 Type safety means:\")\n", + "print(\" \u2705 Your IDE autocompletes these fields\")\n", + "print(\" \u2705 Typos are caught before running\")\n", + "print(\" \u2705 Refactoring is safe\")\n", + "print(\" \u2705 Self-documenting code\\n\")" ] }, { @@ -642,9 +642,9 @@ "\n", "The client **inherits from HTTPEnvClient** and implements 3 methods:\n", "\n", - "1. `_step_payload()` - Convert action → JSON\n", - "2. `_parse_result()` - Parse JSON → typed observation \n", - "3. `_parse_state()` - Parse JSON → state\n", + "1. `_step_payload()` - Convert action \u2192 JSON\n", + "2. `_parse_result()` - Parse JSON \u2192 typed observation \n", + "3. `_parse_state()` - Parse JSON \u2192 state\n", "\n", "That's it! The base class handles all HTTP communication.\n", "\n", @@ -660,20 +660,20 @@ "\n", "
\n", "\n", - "# šŸŽ® Part 6: Using Real OpenSpiel\n", + "# \ud83c\udfae Part 6: Using Real OpenSpiel\n", "\n", "### Now let's USE a production environment!\n", "\n", - "We'll play **Catch** using OpenEnv's **OpenSpiel integration** šŸŽÆ
\n", + "We'll play **Catch** using OpenEnv's **OpenSpiel integration** \ud83c\udfaf
\n", "This is a REAL environment running in production at companies!\n", "\n", "
\n", "\n", "**Get ready for:**\n", - "- šŸ”Œ Using existing environments (not building)\n", - "- šŸ¤– Testing policies against real games\n", - "- šŸ“Š Live gameplay visualization\n", - "- šŸŽÆ Production-ready patterns\n", + "- \ud83d\udd0c Using existing environments (not building)\n", + "- \ud83e\udd16 Testing policies against real games\n", + "- \ud83d\udcca Live gameplay visualization\n", + "- \ud83c\udfaf Production-ready patterns\n", "\n", "
" ] @@ -683,18 +683,18 @@ "id": "cell-16", "metadata": {}, "source": [ - "## The Game: Catch šŸ”“šŸ“\n", + "## The Game: Catch \ud83d\udd34\ud83c\udfd3\n", "\n", "\n", "\n", "\n", "\n", @@ -721,13 +721,13 @@ "\n", "
\n", "\n", - "**šŸŽÆ Why Catch?**\n", + "**\ud83c\udfaf Why Catch?**\n", "- Simple rules (easy to understand)\n", "- Fast episodes (~5 steps)\n", "- Clear success/failure\n", "- Part of OpenSpiel's 70+ games!\n", "\n", - "**šŸ’” The Big Idea:**\n", + "**\ud83d\udca1 The Big Idea:**\n", "Instead of building this from scratch, we'll USE OpenEnv's existing OpenSpiel integration. Same interface, but production-ready!\n", "\n", "
" @@ -748,26 +748,26 @@ ")\n", "from dataclasses import fields\n", "\n", - "print(\"šŸŽ® \" + \"=\"*64 + \" šŸŽ®\")\n", - "print(\" āœ… Importing Real OpenSpiel Environment!\")\n", - "print(\"šŸŽ® \" + \"=\"*64 + \" šŸŽ®\\n\")\n", + "print(\"\ud83c\udfae \" + \"=\"*64 + \" \ud83c\udfae\")\n", + "print(\" \u2705 Importing Real OpenSpiel Environment!\")\n", + "print(\"\ud83c\udfae \" + \"=\"*64 + \" \ud83c\udfae\\n\")\n", "\n", - "print(\"šŸ“¦ What we just imported:\")\n", - "print(\" • OpenSpielEnv - HTTP client for OpenSpiel games\")\n", - "print(\" • OpenSpielAction - Type-safe actions\")\n", - "print(\" • OpenSpielObservation - Type-safe observations\")\n", - "print(\" • OpenSpielState - Episode metadata\\n\")\n", + "print(\"\ud83d\udce6 What we just imported:\")\n", + "print(\" \u2022 OpenSpielEnv - HTTP client for OpenSpiel games\")\n", + "print(\" \u2022 OpenSpielAction - Type-safe actions\")\n", + "print(\" \u2022 OpenSpielObservation - Type-safe observations\")\n", + "print(\" \u2022 OpenSpielState - Episode metadata\\n\")\n", "\n", - "print(\"šŸ“‹ OpenSpielObservation fields:\")\n", - "print(\" \" + \"─\" * 60)\n", + "print(\"\ud83d\udccb OpenSpielObservation fields:\")\n", + "print(\" \" + \"\u2500\" * 60)\n", "for field in fields(OpenSpielObservation):\n", - " print(f\" • {field.name:25s} : {field.type}\")\n", + " print(f\" \u2022 {field.name:25s} : {field.type}\")\n", "\n", "print(\"\\n\" + \"=\"*70)\n", - "print(\"\\nšŸ’” This is REAL OpenEnv code - used in production!\")\n", - "print(\" • Wraps 6 OpenSpiel games (Catch, Tic-Tac-Toe, Poker, etc.)\")\n", - "print(\" • Type-safe actions and observations\")\n", - "print(\" • Works via HTTP (we\\'ll see that next!)\\n\")" + "print(\"\\n\ud83d\udca1 This is REAL OpenEnv code - used in production!\")\n", + "print(\" \u2022 Wraps 6 OpenSpiel games (Catch, Tic-Tac-Toe, Poker, etc.)\")\n", + "print(\" \u2022 Type-safe actions and observations\")\n", + "print(\" \u2022 Works via HTTP (we\\'ll see that next!)\\n\")" ] }, { @@ -782,22 +782,22 @@ "import sys\n", "import os\n", "\n", - "print(\"šŸš€ \" + \"=\"*64 + \" šŸš€\")\n", + "print(\"\ud83d\ude80 \" + \"=\"*64 + \" \ud83d\ude80\")\n", "print(\" Starting OpenSpiel Server (Catch Game)\")\n", - "print(\"šŸš€ \" + \"=\"*64 + \" šŸš€\\n\")\n", + "print(\"\ud83d\ude80 \" + \"=\"*64 + \" \ud83d\ude80\\n\")\n", "\n", "# Check if open_spiel is installed\n", "try:\n", " import pyspiel\n", - " print(\"āœ… OpenSpiel is installed!\\n\")\n", + " print(\"\u2705 OpenSpiel is installed!\\n\")\n", "except ImportError:\n", - " print(\"āš ļø OpenSpiel not found. Installing...\")\n", + " print(\"\u26a0\ufe0f OpenSpiel not found. Installing...\")\n", " import subprocess\n", " subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"-q\", \"open_spiel\"])\n", - " print(\"āœ… OpenSpiel installed!\\n\")\n", + " print(\"\u2705 OpenSpiel installed!\\n\")\n", "\n", "# Start the OpenSpiel server in background\n", - "print(\"⚔ Starting FastAPI server for OpenSpiel Catch...\")\n", + "print(\"\u26a1 Starting FastAPI server for OpenSpiel Catch...\")\n", "print(\" (This uses REAL OpenEnv + OpenSpiel integration)\\n\")\n", "\n", "# Determine the correct path\n", @@ -824,32 +824,32 @@ ")\n", "\n", "# Wait for server to start\n", - "print(\"ā³ Waiting for server to start...\")\n", + "print(\"\u23f3 Waiting for server to start...\")\n", "time.sleep(5)\n", "\n", "# Check if server is running\n", "import requests\n", "try:\n", " response = requests.get('http://localhost:8000/health', timeout=2)\n", - " print(\"\\nāœ… OpenSpiel server is running!\")\n", - " print(\"🌐 Server URL: http://localhost:8000\")\n", - " print(\"šŸ“ Endpoints available:\")\n", - " print(\" • POST /reset\")\n", - " print(\" • POST /step\")\n", - " print(\" • GET /state\")\n", - " print(\"\\nšŸŽÆ This is REAL OpenEnv + OpenSpiel in action!\")\n", - " print(\" • Running actual OpenSpiel Catch game\")\n", - " print(\" • Exposed via FastAPI HTTP server\")\n", - " print(\" • Using OpenEnv's standard interface\\n\")\n", + " print(\"\\n\u2705 OpenSpiel server is running!\")\n", + " print(\"\ud83c\udf10 Server URL: http://localhost:8000\")\n", + " print(\"\ud83d\udccd Endpoints available:\")\n", + " print(\" \u2022 POST /reset\")\n", + " print(\" \u2022 POST /step\")\n", + " print(\" \u2022 GET /state\")\n", + " print(\"\\n\ud83c\udfaf This is REAL OpenEnv + OpenSpiel in action!\")\n", + " print(\" \u2022 Running actual OpenSpiel Catch game\")\n", + " print(\" \u2022 Exposed via FastAPI HTTP server\")\n", + " print(\" \u2022 Using OpenEnv's standard interface\\n\")\n", "except Exception as e:\n", - " print(f\"\\nāŒ Server failed to start: {e}\")\n", - " print(\"\\nšŸ“‹ Checking error output...\")\n", + " print(f\"\\n\u274c Server failed to start: {e}\")\n", + " print(\"\\n\ud83d\udccb Checking error output...\")\n", " server_process.poll()\n", " if server_process.stderr:\n", " stderr = server_process.stderr.read()\n", " if stderr:\n", " print(stderr)\n", - " print(\"\\nšŸ’” Make sure open_spiel is installed:\")\n", + " print(\"\\n\ud83d\udca1 Make sure open_spiel is installed:\")\n", " print(\" pip install open_spiel\")\n", " raise" ] @@ -861,19 +861,19 @@ "metadata": {}, "outputs": [], "source": [ - "print(\"šŸ“± \" + \"=\"*64 + \" šŸ“±\")\n", + "print(\"\ud83d\udcf1 \" + \"=\"*64 + \" \ud83d\udcf1\")\n", "print(\" Connecting to OpenSpiel Server via HTTP\")\n", - "print(\"šŸ“± \" + \"=\"*64 + \" šŸ“±\\n\")\n", + "print(\"\ud83d\udcf1 \" + \"=\"*64 + \" \ud83d\udcf1\\n\")\n", "\n", "# Create HTTP client for OpenSpiel\n", "client = OpenSpielEnv(base_url=\"http://localhost:8000\")\n", "\n", - "print(\"āœ… Client created!\")\n", - "print(\"\\nšŸ’” What just happened:\")\n", - "print(\" • OpenSpielEnv is an HTTPEnvClient subclass\")\n", - "print(\" • It knows how to talk to OpenSpiel servers\")\n", - "print(\" • All communication is type-safe and over HTTP\")\n", - "print(\" • Same client works for ALL OpenSpiel games!\\n\")" + "print(\"\u2705 Client created!\")\n", + "print(\"\\n\ud83d\udca1 What just happened:\")\n", + "print(\" \u2022 OpenSpielEnv is an HTTPEnvClient subclass\")\n", + "print(\" \u2022 It knows how to talk to OpenSpiel servers\")\n", + "print(\" \u2022 All communication is type-safe and over HTTP\")\n", + "print(\" \u2022 Same client works for ALL OpenSpiel games!\\n\")" ] }, { @@ -883,46 +883,46 @@ "metadata": {}, "outputs": [], "source": [ - "print(\"šŸŽ® \" + \"=\"*64 + \" šŸŽ®\")\n", + "print(\"\ud83c\udfae \" + \"=\"*64 + \" \ud83c\udfae\")\n", "print(\" Testing Connection - Playing One Step\")\n", - "print(\"šŸŽ® \" + \"=\"*64 + \" šŸŽ®\\n\")\n", + "print(\"\ud83c\udfae \" + \"=\"*64 + \" \ud83c\udfae\\n\")\n", "\n", "# Reset the environment (HTTP POST /reset)\n", - "print(\"šŸ“¤ Calling client.reset()...\")\n", + "print(\"\ud83d\udce4 Calling client.reset()...\")\n", "print(\" Under the hood: HTTP POST to http://localhost:8000/reset\\n\")\n", "\n", "result = client.reset()\n", "\n", - "print(\"šŸ“„ Received OpenSpielObservation:\")\n", - "print(f\" • info_state: {result.observation.info_state[:10]}... (first 10 values)\")\n", - "print(f\" • legal_actions: {result.observation.legal_actions}\")\n", - "print(f\" • game_phase: {result.observation.game_phase}\")\n", - "print(f\" • done: {result.done}\")\n", + "print(\"\ud83d\udce5 Received OpenSpielObservation:\")\n", + "print(f\" \u2022 info_state: {result.observation.info_state[:10]}... (first 10 values)\")\n", + "print(f\" \u2022 legal_actions: {result.observation.legal_actions}\")\n", + "print(f\" \u2022 game_phase: {result.observation.game_phase}\")\n", + "print(f\" \u2022 done: {result.done}\")\n", "\n", "# Take an action (HTTP POST /step)\n", - "print(\"\\nšŸ“¤ Calling client.step(OpenSpielAction(action_id=1, game_name=\\'catch\\'))...\")\n", + "print(\"\\n\ud83d\udce4 Calling client.step(OpenSpielAction(action_id=1, game_name=\\'catch\\'))...\")\n", "print(\" Under the hood: HTTP POST to http://localhost:8000/step\\n\")\n", "\n", "action = OpenSpielAction(action_id=1, game_name=\"catch\") # STAY\n", "result = client.step(action)\n", "\n", - "print(\"šŸ“„ Received response:\")\n", - "print(f\" • Reward: {result.reward}\")\n", - "print(f\" • Done: {result.done}\")\n", - "print(f\" • legal_actions: {result.observation.legal_actions}\")\n", + "print(\"\ud83d\udce5 Received response:\")\n", + "print(f\" \u2022 Reward: {result.reward}\")\n", + "print(f\" \u2022 Done: {result.done}\")\n", + "print(f\" \u2022 legal_actions: {result.observation.legal_actions}\")\n", "\n", "# Get state (HTTP GET /state)\n", "state = client.state()\n", - "print(f\"\\nšŸ“Š Episode state:\")\n", - "print(f\" • episode_id: {state.episode_id}\")\n", - "print(f\" • step_count: {state.step_count}\")\n", - "print(f\" • game_name: {state.game_name}\")\n", + "print(f\"\\n\ud83d\udcca Episode state:\")\n", + "print(f\" \u2022 episode_id: {state.episode_id}\")\n", + "print(f\" \u2022 step_count: {state.step_count}\")\n", + "print(f\" \u2022 game_name: {state.game_name}\")\n", "\n", "print(\"\\n\" + \"=\"*70)\n", - "print(\"\\nšŸŽ‰ IT WORKS! We\\'re using REAL OpenSpiel via HTTP!\")\n", - "print(\" āœ… Type-safe communication\")\n", - "print(\" āœ… Same interface as any OpenEnv environment\")\n", - "print(\" āœ… Production-ready architecture\\n\")" + "print(\"\\n\ud83c\udf89 IT WORKS! We\\'re using REAL OpenSpiel via HTTP!\")\n", + "print(\" \u2705 Type-safe communication\")\n", + "print(\" \u2705 Same interface as any OpenEnv environment\")\n", + "print(\" \u2705 Production-ready architecture\\n\")" ] }, { @@ -932,7 +932,7 @@ "source": [ "---\n", "\n", - "# Part 7: Four Policies šŸ¤–\n", + "# Part 7: Four Policies \ud83e\udd16\n", "\n", "
\n", "\n", @@ -945,28 +945,28 @@ "
\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", "\n", - "\n", + "\n", "\n", "\n", "\n", "
\n", "\n", "```\n", - "⬜ ⬜ šŸ”“ ⬜ ⬜\n", - "⬜ ⬜ ⬜ ⬜ ⬜ Ball\n", - "⬜ ⬜ ⬜ ⬜ ⬜ falls\n", - "⬜ ⬜ ⬜ ⬜ ⬜ down\n", - "⬜ ⬜ šŸ“ ⬜ ⬜\n", + "\u2b1c \u2b1c \ud83d\udd34 \u2b1c \u2b1c\n", + "\u2b1c \u2b1c \u2b1c \u2b1c \u2b1c Ball\n", + "\u2b1c \u2b1c \u2b1c \u2b1c \u2b1c falls\n", + "\u2b1c \u2b1c \u2b1c \u2b1c \u2b1c down\n", + "\u2b1c \u2b1c \ud83c\udfd3 \u2b1c \u2b1c\n", " Paddle\n", "```\n", "\n", @@ -702,18 +702,18 @@ "\n", "\n", "**Rules:**\n", - "- 5Ɨ5 grid\n", + "- 5\u00d75 grid\n", "- Ball falls from random column\n", "- Move paddle left/right to catch it\n", "\n", "**Actions:**\n", - "- `0` = Move LEFT ā¬…ļø\n", - "- `1` = STAY šŸ›‘\n", - "- `2` = Move RIGHT āž”ļø\n", + "- `0` = Move LEFT \u2b05\ufe0f\n", + "- `1` = STAY \ud83d\uded1\n", + "- `2` = Move RIGHT \u27a1\ufe0f\n", "\n", "**Reward:**\n", - "- `+1` if caught šŸŽ‰\n", - "- `0` if missed 😢\n", + "- `+1` if caught \ud83c\udf89\n", + "- `0` if missed \ud83d\ude22\n", "\n", "
Expected Performance
šŸŽ² Random\ud83c\udfb2 RandomPick random action every step~20% (pure luck)
šŸ›‘ Always Stay\ud83d\uded1 Always StayNever move, hope ball lands in center~20% (terrible!)
🧠 Smart\ud83e\udde0 SmartMove paddle toward ball100% (optimal!)
šŸ“ˆ Learning\ud83d\udcc8 LearningStart random, learn smart strategy~85% (improves over time)
\n", "\n", - "**šŸ’” These policies work with ANY OpenSpiel game!**\n", + "**\ud83d\udca1 These policies work with ANY OpenSpiel game!**\n", "\n", "
" ] @@ -986,7 +986,7 @@ "\n", "class RandomPolicy:\n", " \"\"\"Baseline: Pure random guessing.\"\"\"\n", - " name = \"šŸŽ² Random Guesser\"\n", + " name = \"\ud83c\udfb2 Random Guesser\"\n", "\n", " def select_action(self, obs: OpenSpielObservation) -> int:\n", " return random.choice(obs.legal_actions)\n", @@ -994,7 +994,7 @@ "\n", "class AlwaysStayPolicy:\n", " \"\"\"Bad strategy: Never moves.\"\"\"\n", - " name = \"šŸ›‘ Always Stay\"\n", + " name = \"\ud83d\uded1 Always Stay\"\n", "\n", " def select_action(self, obs: OpenSpielObservation) -> int:\n", " return 1 # STAY\n", @@ -1002,7 +1002,7 @@ "\n", "class SmartPolicy:\n", " \"\"\"Optimal: Move paddle toward ball.\"\"\"\n", - " name = \"🧠 Smart Heuristic\"\n", + " name = \"\ud83e\udde0 Smart Heuristic\"\n", "\n", " def select_action(self, obs: OpenSpielObservation) -> int:\n", " # Parse OpenSpiel observation\n", @@ -1035,7 +1035,7 @@ "\n", "class LearningPolicy:\n", " \"\"\"Simulated RL: Epsilon-greedy exploration.\"\"\"\n", - " name = \"šŸ“ˆ Learning Agent\"\n", + " name = \"\ud83d\udcc8 Learning Agent\"\n", "\n", " def __init__(self):\n", " self.steps = 0\n", @@ -1055,18 +1055,18 @@ " return self.smart_policy.select_action(obs)\n", "\n", "\n", - "print(\"šŸ¤– \" + \"=\"*64 + \" šŸ¤–\")\n", - "print(\" āœ… 4 Policies Created (Adapted for OpenSpiel)!\")\n", - "print(\"šŸ¤– \" + \"=\"*64 + \" šŸ¤–\\n\")\n", + "print(\"\ud83e\udd16 \" + \"=\"*64 + \" \ud83e\udd16\")\n", + "print(\" \u2705 4 Policies Created (Adapted for OpenSpiel)!\")\n", + "print(\"\ud83e\udd16 \" + \"=\"*64 + \" \ud83e\udd16\\n\")\n", "\n", "policies = [RandomPolicy(), AlwaysStayPolicy(), SmartPolicy(), LearningPolicy()]\n", "for i, policy in enumerate(policies, 1):\n", " print(f\" {i}. {policy.name}\")\n", "\n", - "print(\"\\nšŸ’” These policies work with OpenSpielObservation!\")\n", - "print(\" • Read info_state (flattened grid)\")\n", - "print(\" • Use legal_actions\")\n", - "print(\" • Work with ANY OpenSpiel game that exposes these!\\n\")" + "print(\"\\n\ud83d\udca1 These policies work with OpenSpielObservation!\")\n", + "print(\" \u2022 Read info_state (flattened grid)\")\n", + "print(\" \u2022 Use legal_actions\")\n", + "print(\" \u2022 Work with ANY OpenSpiel game that exposes these!\\n\")" ] }, { @@ -1095,14 +1095,14 @@ "\n", " if visualize:\n", " print(f\"\\n{'='*60}\")\n", - " print(f\" šŸŽ® {policy.name}\")\n", - " print(f\" šŸŽ² Playing against OpenSpiel Catch\")\n", + " print(f\" \ud83c\udfae {policy.name}\")\n", + " print(f\" \ud83c\udfb2 Playing against OpenSpiel Catch\")\n", " print('='*60 + '\\n')\n", " time.sleep(delay)\n", "\n", " total_reward = 0\n", " step = 0\n", - " action_names = [\"ā¬…ļø LEFT\", \"šŸ›‘ STAY\", \"āž”ļø RIGHT\"]\n", + " action_names = [\"\u2b05\ufe0f LEFT\", \"\ud83d\uded1 STAY\", \"\u27a1\ufe0f RIGHT\"]\n", "\n", " # THE RL LOOP\n", " while not obs.done:\n", @@ -1119,13 +1119,13 @@ " total_reward += result.reward\n", "\n", " if visualize:\n", - " print(f\"šŸ“ Step {step + 1}: {action_names[action_id]} → Reward: {result.reward}\")\n", + " print(f\"\ud83d\udccd Step {step + 1}: {action_names[action_id]} \u2192 Reward: {result.reward}\")\n", " time.sleep(delay)\n", "\n", " step += 1\n", "\n", " if visualize:\n", - " result_text = \"šŸŽ‰ CAUGHT!\" if total_reward > 0 else \"😢 MISSED\"\n", + " result_text = \"\ud83c\udf89 CAUGHT!\" if total_reward > 0 else \"\ud83d\ude22 MISSED\"\n", " print(f\"\\n{'='*60}\")\n", " print(f\" {result_text} Total Reward: {total_reward}\")\n", " print('='*60)\n", @@ -1133,18 +1133,18 @@ " return total_reward > 0\n", "\n", "\n", - "print(\"šŸ“ŗ \" + \"=\"*64 + \" šŸ“ŗ\")\n", + "print(\"\ud83d\udcfa \" + \"=\"*64 + \" \ud83d\udcfa\")\n", "print(\" Watch Smart Policy Play Against OpenSpiel!\")\n", - "print(\"šŸ“ŗ \" + \"=\"*64 + \" šŸ“ŗ\\n\")\n", + "print(\"\ud83d\udcfa \" + \"=\"*64 + \" \ud83d\udcfa\\n\")\n", "\n", "# Demo: Watch Smart Policy in action\n", "policy = SmartPolicy()\n", "run_episode(client, policy, visualize=True, delay=0.5)\n", "\n", - "print(\"\\nšŸ’” You just watched REAL OpenSpiel Catch being played!\")\n", - "print(\" • Every action was an HTTP call\")\n", - "print(\" • Game logic runs in the server\")\n", - "print(\" • Client only sends actions and receives observations\\n\")" + "print(\"\\n\ud83d\udca1 You just watched REAL OpenSpiel Catch being played!\")\n", + "print(\" \u2022 Every action was an HTTP call\")\n", + "print(\" \u2022 Game logic runs in the server\")\n", + "print(\" \u2022 Client only sends actions and receives observations\\n\")" ] }, { @@ -1154,7 +1154,7 @@ "source": [ "---\n", "\n", - "# Part 8: Policy Competition! šŸ†\n", + "# Part 8: Policy Competition! \ud83c\udfc6\n", "\n", "
\n", "\n", @@ -1181,49 +1181,49 @@ " LearningPolicy(),\n", " ]\n", "\n", - " print(\"\\nšŸ† \" + \"=\"*66 + \" šŸ†\")\n", + " print(\"\\n\ud83c\udfc6 \" + \"=\"*66 + \" \ud83c\udfc6\")\n", " print(f\" POLICY SHOWDOWN - {num_episodes} Episodes Each\")\n", " print(f\" Playing against REAL OpenSpiel Catch!\")\n", - " print(\"šŸ† \" + \"=\"*66 + \" šŸ†\\n\")\n", + " print(\"\ud83c\udfc6 \" + \"=\"*66 + \" \ud83c\udfc6\\n\")\n", "\n", " results = []\n", " for policy in policies:\n", - " print(f\"⚔ Testing {policy.name}...\", end=\" \")\n", + " print(f\"\u26a1 Testing {policy.name}...\", end=\" \")\n", " successes = sum(run_episode(env, policy, visualize=False)\n", " for _ in range(num_episodes))\n", " success_rate = (successes / num_episodes) * 100\n", " results.append((policy.name, success_rate, successes))\n", - " print(f\"āœ“ Done!\")\n", + " print(f\"\u2713 Done!\")\n", "\n", " print(\"\\n\" + \"=\"*70)\n", - " print(\" šŸ“Š FINAL RESULTS\")\n", + " print(\" \ud83d\udcca FINAL RESULTS\")\n", " print(\"=\"*70 + \"\\n\")\n", "\n", " # Sort by success rate (descending)\n", " results.sort(key=lambda x: x[1], reverse=True)\n", "\n", " # Award medals to top 3\n", - " medals = [\"šŸ„‡\", \"🄈\", \"šŸ„‰\", \" \"]\n", + " medals = [\"\ud83e\udd47\", \"\ud83e\udd48\", \"\ud83e\udd49\", \" \"]\n", "\n", " for i, (name, rate, successes) in enumerate(results):\n", " medal = medals[i]\n", - " bar = \"ā–ˆ\" * int(rate / 2)\n", + " bar = \"\u2588\" * int(rate / 2)\n", " print(f\"{medal} {name:25s} [{bar:<50}] {rate:5.1f}% ({successes}/{num_episodes})\")\n", "\n", " print(\"\\n\" + \"=\"*70)\n", - " print(\"\\n✨ Key Insights:\")\n", - " print(\" • Random (~20%): Baseline - pure luck šŸŽ²\")\n", - " print(\" • Always Stay (~20%): Bad strategy - stays center šŸ›‘\")\n", - " print(\" • Smart (100%): Optimal - perfect play! 🧠\")\n", - " print(\" • Learning (~85%): Improves over time šŸ“ˆ\")\n", - " print(\"\\nšŸŽ“ This is Reinforcement Learning + OpenEnv in action:\")\n", + " print(\"\\n\u2728 Key Insights:\")\n", + " print(\" \u2022 Random (~20%): Baseline - pure luck \ud83c\udfb2\")\n", + " print(\" \u2022 Always Stay (~20%): Bad strategy - stays center \ud83d\uded1\")\n", + " print(\" \u2022 Smart (100%): Optimal - perfect play! \ud83e\udde0\")\n", + " print(\" \u2022 Learning (~85%): Improves over time \ud83d\udcc8\")\n", + " print(\"\\n\ud83c\udf93 This is Reinforcement Learning + OpenEnv in action:\")\n", " print(\" 1. We USED existing OpenSpiel environment (didn\\'t build it)\")\n", " print(\" 2. Type-safe communication over HTTP\")\n", " print(\" 3. Same code works for ANY OpenSpiel game\")\n", " print(\" 4. Production-ready architecture\\n\")\n", "\n", "# Run the epic competition!\n", - "print(\"šŸŽ® Starting the showdown against REAL OpenSpiel...\\n\")\n", + "print(\"\ud83c\udfae Starting the showdown against REAL OpenSpiel...\\n\")\n", "evaluate_policies(client, num_episodes=50)" ] }, @@ -1235,11 +1235,11 @@ "---\n", "\n", "\n", - "# Part 9: Switching to Other Games šŸŽ®\n", + "# Part 9: Switching to Other Games \ud83c\udfae\n", "\n", "
\n", "\n", - "## What We Just Used: Real OpenSpiel! šŸŽ‰\n", + "## What We Just Used: Real OpenSpiel! \ud83c\udf89\n", "\n", "In Parts 6-8, we **USED** the existing OpenSpiel Catch environment:\n", "\n", @@ -1266,11 +1266,11 @@ "\n", "\n", "\n", - "**šŸŽÆ This is production code!** Every action was an HTTP call to a real OpenSpiel environment.\n", + "**\ud83c\udfaf This is production code!** Every action was an HTTP call to a real OpenSpiel environment.\n", "\n", "
\n", "\n", - "## šŸŽ® 6 Games Available - Same Interface!\n", + "## \ud83c\udfae 6 Games Available - Same Interface!\n", "\n", "The beauty of OpenEnv? **Same code, different games!**\n", "\n", @@ -1286,10 +1286,10 @@ "\n", "
\n", "\n", - "**šŸŽ® All 6 Games:**\n", + "**\ud83c\udfae All 6 Games:**\n", "\n", - "1. āœ… **`catch`** - What we just used!\n", - "2. **`tic_tac_toe`** - Classic 3Ɨ3\n", + "1. \u2705 **`catch`** - What we just used!\n", + "2. **`tic_tac_toe`** - Classic 3\u00d73\n", "3. **`kuhn_poker`** - Imperfect information poker\n", "4. **`cliff_walking`** - Grid navigation\n", "5. **`2048`** - Tile puzzle\n", @@ -1323,7 +1323,7 @@ "result = client.reset() # Now playing Tic-Tac-Toe!\n", "```\n", "\n", - "**šŸ’” Key Insight**: You don't rebuild anything - you just USE different games with the same client!\n" + "**\ud83d\udca1 Key Insight**: You don't rebuild anything - you just USE different games with the same client!\n" ] }, { @@ -1334,7 +1334,7 @@ "---\n", "\n", "\n", - "# Part 10: Create Your Own Integration šŸ› ļø\n", + "# Part 10: Create Your Own Integration \ud83d\udee0\ufe0f\n", "\n", "
\n", "\n", @@ -1438,7 +1438,7 @@ "\n", "
\n", "\n", - "### šŸŽ“ Examples to Study\n", + "### \ud83c\udf93 Examples to Study\n", "\n", "OpenEnv includes 3 complete examples:\n", "\n", @@ -1456,7 +1456,7 @@ " - Shows complex use case\n", " - Security considerations\n", "\n", - "**šŸ’” Study these to understand the patterns!**\n", + "**\ud83d\udca1 Study these to understand the patterns!**\n", "\n", "
" ] @@ -1470,7 +1470,7 @@ "\n", "
\n", "\n", - "# šŸŽ“ Summary: Your Journey\n", + "# \ud83c\udf93 Summary: Your Journey\n", "\n", "
" ] @@ -1486,19 +1486,19 @@ "\n", "\n", "\n", - "### šŸ“š Concepts\n", + "### \ud83d\udcda Concepts\n", "\n", - "āœ… **RL Fundamentals**\n", + "\u2705 **RL Fundamentals**\n", "- The observe-act-reward loop\n", "- What makes good policies\n", "- Exploration vs exploitation\n", "\n", - "āœ… **OpenEnv Architecture**\n", + "\u2705 **OpenEnv Architecture**\n", "- Client-server separation\n", "- Type-safe contracts\n", "- HTTP communication layer\n", "\n", - "āœ… **Production Patterns**\n", + "\u2705 **Production Patterns**\n", "- Docker isolation\n", "- API design\n", "- Reproducible deployments\n", @@ -1506,19 +1506,19 @@ "\n", "\n", "\n", - "### šŸ› ļø Skills\n", + "### \ud83d\udee0\ufe0f Skills\n", "\n", - "āœ… **Using Environments**\n", + "\u2705 **Using Environments**\n", "- Import OpenEnv clients\n", "- Call reset/step/state\n", "- Work with typed observations\n", "\n", - "āœ… **Building Environments**\n", + "\u2705 **Building Environments**\n", "- Define type-safe models\n", "- Implement Environment class\n", "- Create HTTPEnvClient\n", "\n", - "āœ… **Testing & Debugging**\n", + "\u2705 **Testing & Debugging**\n", "- Compare policies\n", "- Visualize episodes\n", "- Measure performance\n", @@ -1544,45 +1544,45 @@ "\n", "\n", "Type Safety\n", - "āŒ Arrays, dicts\n", - "āœ… Dataclasses\n", - "šŸ† OpenEnv\n", + "\u274c Arrays, dicts\n", + "\u2705 Dataclasses\n", + "\ud83c\udfc6 OpenEnv\n", "\n", "\n", "Isolation\n", - "āŒ Same process\n", - "āœ… Docker\n", - "šŸ† OpenEnv\n", + "\u274c Same process\n", + "\u2705 Docker\n", + "\ud83c\udfc6 OpenEnv\n", "\n", "\n", "Deployment\n", - "āŒ Manual setup\n", - "āœ… K8s-ready\n", - "šŸ† OpenEnv\n", + "\u274c Manual setup\n", + "\u2705 K8s-ready\n", + "\ud83c\udfc6 OpenEnv\n", "\n", "\n", "Language\n", - "āŒ Python only\n", - "āœ… Any (HTTP)\n", - "šŸ† OpenEnv\n", + "\u274c Python only\n", + "\u2705 Any (HTTP)\n", + "\ud83c\udfc6 OpenEnv\n", "\n", "\n", "Reproducibility\n", - "āŒ \"Works on my machine\"\n", - "āœ… Same everywhere\n", - "šŸ† OpenEnv\n", + "\u274c \"Works on my machine\"\n", + "\u2705 Same everywhere\n", + "\ud83c\udfc6 OpenEnv\n", "\n", "\n", "Community\n", - "āœ… Large ecosystem\n", - "🟔 Growing\n", - "šŸ¤ Both!\n", + "\u2705 Large ecosystem\n", + "\ud83d\udfe1 Growing\n", + "\ud83e\udd1d Both!\n", "\n", "\n", "\n", "
\n", "\n", - "**šŸŽÆ The Bottom Line**\n", + "**\ud83c\udfaf The Bottom Line**\n", "\n", "OpenEnv brings **production engineering** to RL:\n", "- Same environments work locally and in production\n", @@ -1601,33 +1601,33 @@ "metadata": {}, "source": [ "\n", - "## šŸ“š Resources\n", + "## \ud83d\udcda Resources\n", "\n", "
\n", "\n", - "### šŸ”— Essential Links\n", + "### \ud83d\udd17 Essential Links\n", "\n", - "- **šŸ  OpenEnv GitHub**: https://github.com/meta-pytorch/OpenEnv\n", - "- **šŸŽ® OpenSpiel**: https://github.com/google-deepmind/open_spiel\n", - "- **⚔ FastAPI Docs**: https://fastapi.tiangolo.com/\n", - "- **🐳 Docker Guide**: https://docs.docker.com/get-started/\n", - "- **šŸ”„ PyTorch**: https://pytorch.org/\n", + "- **\ud83c\udfe0 OpenEnv GitHub**: https://github.com/meta-pytorch/OpenEnv\n", + "- **\ud83c\udfae OpenSpiel**: https://github.com/google-deepmind/open_spiel\n", + "- **\u26a1 FastAPI Docs**: https://fastapi.tiangolo.com/\n", + "- **\ud83d\udc33 Docker Guide**: https://docs.docker.com/get-started/\n", + "- **\ud83d\udd25 PyTorch**: https://pytorch.org/\n", "\n", - "### šŸ“– Documentation Deep Dives\n", + "### \ud83d\udcd6 Documentation Deep Dives\n", "\n", "- **Environment Creation Guide**: `src/envs/README.md`\n", "- **OpenSpiel Integration**: `src/envs/openspiel_env/README.md`\n", "- **Example Scripts**: `examples/`\n", "- **RFC 001**: [Baseline API Specs](https://github.com/meta-pytorch/OpenEnv/pull/26)\n", "\n", - "### šŸŽ“ Community & Support\n", + "### \ud83c\udf93 Community & Support\n", "\n", "**Supported by amazing organizations:**\n", - "- šŸ”„ Meta PyTorch\n", - "- šŸ¤— Hugging Face\n", - "- ⚔ Unsloth AI\n", - "- 🌟 Reflection AI\n", - "- šŸš€ And many more!\n", + "- \ud83d\udd25 Meta PyTorch\n", + "- \ud83e\udd17 Hugging Face\n", + "- \u26a1 Unsloth AI\n", + "- \ud83c\udf1f Reflection AI\n", + "- \ud83d\ude80 And many more!\n", "\n", "**License**: BSD 3-Clause (very permissive!)\n", "\n", @@ -1637,13 +1637,13 @@ "\n", "---\n", "\n", - "### 🌈 What's Next?\n", + "### \ud83c\udf08 What's Next?\n", "\n", - "1. ⭐ **Star the repo** to show support and stay updated\n", - "2. šŸ”„ **Try modifying** the Catch game (make it harder? bigger grid?)\n", - "3. šŸŽ® **Explore** other OpenSpiel games\n", - "4. šŸ› ļø **Build** your own environment integration\n", - "5. šŸ’¬ **Share** what you build with the community!" + "1. \u2b50 **Star the repo** to show support and stay updated\n", + "2. \ud83d\udd04 **Try modifying** the Catch game (make it harder? bigger grid?)\n", + "3. \ud83c\udfae **Explore** other OpenSpiel games\n", + "4. \ud83d\udee0\ufe0f **Build** your own environment integration\n", + "5. \ud83d\udcac **Share** what you build with the community!" ] } ], @@ -1668,4 +1668,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file