Export remaining integrations and differences

openai · May 25, 2018 · ba95d9b · ba95d9b
1 parent d15a741
commit ba95d9b
Show file tree

Hide file tree

Showing 6,155 changed files with 72,010 additions and 11,186 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,7 @@ __pycache__/
 # C extensions
 *.so
 *.dylib
+*.dll
 *.o
 
 # Distribution / packaging
@@ -125,9 +126,14 @@ CPack*Config.cmake
 # Build products
 tests/test-*
 lib*.a
+*.app
+*_autogen
+*.capnp.*
 retro/cores/*-version
+retro/cores/*.json
 retro/VERSION.txt
 
 # Test stuff
 Temporary
 *.mkv
+*.search
diff --git a/.gitmodules b/.gitmodules
@@ -1,7 +1,15 @@
+[submodule "nes"]
+	path = cores/nes
+	url = https://github.com/libretro/libretro-fceumm.git
+        ignore = dirty
 [submodule "genesis"]
 	path = cores/genesis
 	url = https://github.com/libretro/Genesis-Plus-GX.git
         ignore = dirty
+[submodule "snes"]
+	path = cores/snes
+	url = https://github.com/libretro/snes9x.git
+        ignore = dirty
 [submodule "gtest"]
 	path = third-party/gtest
 	url = https://github.com/google/googletest.git
@@ -15,10 +23,28 @@
 	url = https://github.com/pybind/pybind11.git
 	branch = v2.2.1
         ignore = dirty
+[submodule "gb"]
+	path = cores/gb
+	url = https://github.com/libretro/gambatte-libretro.git
+        ignore = dirty
+[submodule "pce"]
+	path = cores/pce
+	url = https://github.com/libretro/beetle-pce-fast-libretro.git
+        ignore = dirty
 [submodule "third-party/luajit"]
 	path = third-party/luajit
 	url = http://luajit.org/git/luajit-2.0.git
         ignore = dirty
+[submodule "gba"]
+	path = cores/gba
+	url = https://github.com/libretro/mgba.git
+	branch = libretro
+        ignore = dirty
 [submodule "third-party/libzip"]
 	path = third-party/libzip
 	url = https://github.com/nih-at/libzip.git
+        ignore = dirty
+[submodule "third-party/capnproto"]
+	path = third-party/capnproto
+	url = https://github.com/capnproto/capnproto.git
+        ignore = dirty
diff --git a/.travis.yml b/.travis.yml
@@ -53,6 +53,8 @@ script:
     fi
   - |
     if [ "$TRAVIS_OS_NAME" == "osx" ]; then
+    brew update > /dev/null
+    brew install qt5 capnp lua@5.1 ccache
     curl -o miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
     bash miniconda.sh -b
     export PATH=/usr/local/opt/ccache/libexec:$HOME/miniconda3/bin:$PATH

diff --git a/CHANGES.md b/CHANGES.md
@@ -1,6 +1,22 @@
 # Changelog
 
-## 0.5.5 (unreleased)
+## 0.6.0
+
+* add cores for GB/C, GBA, GG, NES, SMS, SNES, TurboGrafx
+* add integration UI and searching
+* add basic scenario access to Lua
+* improve testing tooling
+* multi-agent support
+* cleaned up API:
+  * everything involving data, e.g. game and state listing, file lookup and data path handling, has been moved into retro.data
+  * importing retro.data.experiment or retro.data.contrib includes additional games and data that may not be as well-tested
+  * retro.ACTIONS_* and retro.STATE_* have been replaced with retro.Actions.* and retro.State.* enums
+  * retro.data.GameData objects no longer need an associated RetroEmulator object, though some functionality will not work
+* added RetroEnv.get_action_meaning to describe the correlation between actions and buttons
+* fixed d-pad action filtering so e.g. UP+DOWN+LEFT reduces to LEFT instead of NOOP
+* add parallelism, lossless videos, info dict and numpy action dumping to playback_movies
+
+## 0.5.5
 
 * allow Atari height to be different per game
 * update pybind11 dependency

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -33,6 +33,7 @@ if(NOT CMAKE_BUILD_TYPE)
     set(CMAKE_BUILD_TYPE Release)
 endif()
 option(BUILD_TESTS "Should tests be built" ON)
+option(BUILD_UI "Should integration UI be built" OFF)
 option(BUILD_MANYLINUX "Should use static libraries compatible with manylinux1" OFF)
 
 set(BUILD_PYTHON ON CACHE BOOL "Build Python module")
@@ -46,6 +47,10 @@ if(WIN32 OR BUILD_MANYLINUX)
 endif()
 find_package(ZLIB REQUIRED)
 find_package(PkgConfig)
+if(NOT BUILD_MANYLINUX)
+    # CapnProto requires a newer kernel than manylinux1 provides
+    find_package(CapnProto QUIET)
+endif()
 
 pkg_search_module(LIBZIP QUIET libzip)
 
@@ -82,6 +87,7 @@ set(PYLIB_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" CACHE PATH "Build directory th
 file(MAKE_DIRECTORY "${PYLIB_DIRECTORY}/retro/cores")
 
 set(CORES)
+set(COREINFO)
 set(CORE_TARGETS)
 
 function(add_core PLATFORM CORE_NAME)
@@ -93,7 +99,9 @@ function(add_core PLATFORM CORE_NAME)
     set(TARGET_NAME ${CORE_NAME}_libretro${CMAKE_SHARED_LIBRARY_SUFFIX})
     get_filename_component(TARGET_PATH "${PYLIB_DIRECTORY}/retro/cores/${TARGET_NAME}" ABSOLUTE)
     list(APPEND CORES "${TARGET_PATH}")
+    list(APPEND COREINFO "${CMAKE_CURRENT_SOURCE_DIR}/retro/cores/${CORE_NAME}.json")
     set(CORES "${CORES}" PARENT_SCOPE)
+    set(COREINFO "${COREINFO}" PARENT_SCOPE)
     set(SUBDIR)
     if(EXISTS "cores/${PLATFORM}/Makefile.libretro")
         set(MAKEFILE Makefile.libretro)
@@ -128,8 +136,11 @@ function(add_core PLATFORM CORE_NAME)
         WORKING_DIRECTORY "cores/${PLATFORM}/${SUBDIR}"
         DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/retro/cores/${CORE_NAME}-version")
     unset(CORE_LDFLAGS)
+    add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/retro/cores/${CORE_NAME}.json"
+        COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/cores/${PLATFORM}.json" "${CMAKE_CURRENT_SOURCE_DIR}/retro/cores/${CORE_NAME}.json"
+        DEPENDS ${PLATFORM}-submodule)
 
-    add_custom_target(${PLATFORM} ALL DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/retro/cores/${CORE_NAME}-version" "${TARGET_PATH}")
+    add_custom_target(${PLATFORM} ALL DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/retro/cores/${CORE_NAME}-version" "${CMAKE_CURRENT_SOURCE_DIR}/retro/cores/${CORE_NAME}.json" "${TARGET_PATH}")
     list(APPEND CORE_TARGETS "${PLATFORM}")
     set(CORE_TARGETS "${CORE_TARGETS}" PARENT_SCOPE)
 
@@ -139,8 +150,13 @@ function(add_core PLATFORM CORE_NAME)
         WORKING_DIRECTORY "cores/${PLATFORM}/${SUBDIR}")
 endfunction()
 
+add_core(snes snes9x)
 add_core(genesis genesis_plus_gx)
+add_core(nes fceumm)
 add_core(atari2600 stella)
+add_core(gb gambatte)
+add_core(gba mgba)
+add_core(pce mednafen_pce_fast)
 
 set(CLEAN_CORES)
 foreach(CORE IN LISTS CORE_TARGETS)
@@ -151,6 +167,44 @@ add_custom_target(clean-cores DEPENDS ${CLEAN_CORES})
 add_custom_target(cores DEPENDS ${CORE_TARGETS})
 unset(CLEAN_CORES)
 
+if(CMAKE_CROSSCOMPILING)
+    find_program(CAPNP_EXECUTABLE capnp)
+    find_program(CAPNPC_CXX_EXECUTABLE capnpc-c++)
+endif()
+
+if(NOT CapnProto_FOUND AND NOT BUILD_MANYLINUX)
+    if (NOT CMAKE_CROSSCOMPILING OR CAPNP_EXECUTABLE)
+        set(CapnProto_FOUND ON)
+        set(BUILD_TESTING OFF CACHE BOOL "" FORCE)
+        if(CAPNP_EXECUTABLE)
+            set(CAPNP_LITE ON)
+        endif()
+        set(CAPNP_INCLUDE_DIRECTORY third-party/capnproto/c++/src)
+        add_subdirectory(third-party/capnproto/c++)
+    endif()
+endif()
+if(CapnProto_FOUND)
+    add_definitions(-DUSE_CAPNP)
+    if(CAPNP_LIB_CAPNP)
+        add_library(CapnProto::capnp SHARED IMPORTED)
+        add_library(CapnProto::kj SHARED IMPORTED)
+        set_target_properties(CapnProto::kj PROPERTIES IMPORTED_LOCATION "${CAPNP_LIB_KJ}")
+        set_target_properties(CapnProto::capnp PROPERTIES
+            IMPORTED_LOCATION "${CAPNP_LIB_CAPNP}"
+            INTERFACE_LINK_LIBRARIES CapnProto::kj)
+    endif()
+    if(CMAKE_CROSSCOMPILING)
+        if(TARGET CapnProto::capnp_tool)
+            set_target_properties(CapnProto::capnp_tool PROPERTIES IMPORTED_LOCATION "${CAPNP_EXECUTABLE}")
+        endif()
+        if(TARGET CapnProto::capnpc_cpp)
+            set_target_properties(CapnProto::capnpc_cpp PROPERTIES IMPORTED_LOCATION "${CAPNPC_CXX_EXECUTABLE}")
+        endif()
+    endif()
+else()
+    message(WARNING "Could not find CapnProto, disabling search save/load")
+endif()
+
 set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)
 set(BUILD_STATIC_LIBS ON CACHE BOOL "" FORCE)
 
@@ -173,8 +227,10 @@ add_library(retro-base STATIC
     src/memory.cpp
     src/movie.cpp
     src/movie-bk2.cpp
+    src/movie-fm2.cpp
     src/script.cpp
     src/script-lua.cpp
+    src/search.cpp
     src/utils.cpp
     src/zipfile.cpp
     ${LUA_LIBRARY})
@@ -184,6 +240,12 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
     target_link_libraries(retro-base dl)
 endif()
 
+if(CapnProto_FOUND)
+    capnp_generate_cpp(CAPNP_SRC CAPNP_H src/serialize.capnp)
+    add_library(retro-capnp STATIC ${CAPNP_SRC} src/data-capnp.cpp)
+    target_link_libraries(retro-capnp retro-base CapnProto::capnp)
+endif()
+
 include_directories(src retro third-party/pybind11/include third-party third-party/gtest/googletest/include ${PYTHON_INCLUDE_DIRS})
 if(BUILD_PYTHON)
     add_library(retro SHARED src/retro.cpp)
@@ -206,3 +268,18 @@ if(BUILD_TESTS)
     add_subdirectory(third-party/gtest/googlemock)
     add_subdirectory(tests)
 endif()
+
+if(BUILD_UI)
+    add_subdirectory(src/ui)
+endif()
+
+execute_process(COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/setup.py --version OUTPUT_VARIABLE CPACK_PACKAGE_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE)
+set(CPACK_PACKAGE_VENDOR OpenAI)
+set(CPACK_ZIP_COMPONENT_INSTALL ON)
+if(APPLE)
+    set(CPACK_GENERATOR DragNDrop)
+elseif(WIN32)
+    set(CPACK_GENERATOR ZIP)
+endif()
+set(CPACK_COMPONENTS_ALL gym-retro-integration)
+include(CPack)
diff --git a/IntegratorsGuide.md b/IntegratorsGuide.md
@@ -0,0 +1,74 @@
+# Integrator's Guide
+
+## How to integrate a game
+
+1. Open Gym Retro Integration after setting up the UI.
+2. Load a new game — `Command-Shift-O`
+3. Select the ROM of the game you'd like to integrate in the menu.
+4. Name the game.
+5. The game will open. To see what keys correspond to what controls in-game, go to Window > Control.
+6. Using the available controls, select a level, option, mode, character, etc. and take note of these options.
+7. When you are finally at the first playable moment of the game, pause the game (in the integrator, not within the actual game) (`Command-P`), and save the state (`Command-S`). This moment can be hard to find, and you might have to go back through and restart the game (`Command-R`) to find and save that exact state.
+8. Save the state — include the options you chose in the previous menus — e.g. `SailorMoon.QueenBerylsCastle.Easy.Level1.state`
+
+## Integrating
+
+To integrate a game you need to define a done condition and a reward function.  The done condition lets `gym-retro` know when to end a game session, while the reward function provides a simple numeric goal for machine learning agents to maximize.
+
+To define these, you find variables from the game's memory, such as the player's current score and lives remaining, and use those to create the done condition and reward function.  An example done condition is when the `lives` variable is equal to `0`, an example reward function is the change in the `score` variable.
+
+Note: if the game requires that you hit the `Start` button to play, for instance after dying, then you need to modify the scenario file to allow this as `Start` is disallowed by default.  See `data/KidChameleon-Genesis/scenario.json` for an example of this.
+
+### Done Condition
+
+This is usually the easier of the two.  The best done condition to use is the Game Over or Continue screen after you run out of lives.  For some games this is when you have zero lives left, for others `-1` lives, for others, it can be pretty hard.
+
+It's better to have a simple and reliable but slightly incorrect done condition (e.g. ending the game when you still have 1 life left because it's hard to detect the 0 life case) than to have a done condition that is unreliable, such as a `gameover` variable that detects when the gameover screen is present.
+
+If you create a `gameover` variable, make sure to test it with a replay that plays multiple levels in a row to make sure it doesn't fire accidentally.
+
+### Reward Function
+
+Reinforcement learning agents try to maximize the reward function.  The ideal reward function would be that you get 1 point for beating the game.  There's no way to maximize that besides beating the game.
+
+That reward is impractical though, because existing reinforcement learning algorithms are unable to make progress with a reward that is so hard to get.  Instead we can specify some easier to get reward that, if you maximize it, should result in beating the game.
+
+If the game has a score, this is often a good choice.  In some games however, you can get as much score as you want by standing in one place and attacking the same enemy over and over as it respawns.  Because that is so different from beating the game, it's best to have an alternative reward, though these are often very game specific.  Be careful with non-score variables though, they can be tricky to get right, make sure to play multiple levels using the reward to see if it makes sense.
+
+Provided you use the score, define a `score` variable and set the reward such that the reward the agent receives matches the score displayed on the screen, make sure to check that you're not off by a factor of 10 or 100 by comparing to the Cumulative value displayed in the Scenario Information pane.
+
+### What to look for in a game — is a game integratable?
+
+1. Does the game have a score?
+2. Is the score sufficiently dense (nice to have, not a need to have)
+3. Does maximizing the `score` variable chosen accurately represent how a human might play the game?
+4. Would a progress variable be more appropriate?
+5. Does the game end or can you restart in-game ad infinitum?
+
+#### Finding Variables
+
+It's best to keep a consistent pattern for the different types of variables you might add to a game's `data` file.  Here are some tips:
+
+1. It's pretty common for multiple different variables to group themselves together. When narrowing down the search for a particular variable, consider if it seems to be nearby other locations.
+2. Score occasionally is stored in individual locations — e.g. if the score displayed is 123400, 1, 2, 3, 4, 0, 0 all will update separately.  If the score is broken into multiple variables, make sure you have penalties set for the individual digits.
+3. Check for uncommon lengths of 3, 5, etc. Games don't always store score in nice neat lengths of multiples of 2, and making sure the variable is the appropriate length is key — if you go too short, then no progress over a certain score is tracked, if you go too long, then the score can suddenly jump between levels, etc.  If you can't decide between two possible lengths, the shorter length is the safer bet.
+4. Score variable doesn't always include the 0s at the end of the game — while the screen might say 2400, the score variable might only store 24.
+5. It's very uncommon, but occasionally, scores can be transposed by a value of 1 — e.g., while the screen says '123456', the variable is '012345'. Some of these scores start at -1 rather than 0. This can be fixed with lua.
+7. It's very uncommon, but some games track health symbolically rather than with one set #. For example, the starting health bar could be represented by '9999999', which displays as a full health bar, but becomes '99999' after losing two health units.
+8. In defining a game over variable, look for a binary value that switches between 0 and 1 – 0 when the game is in play, 1 when the game is over.
+
+Once you've found a variable, making sure the address and type are correct is important for avoiding issues later.  One of the best ways to do this is to change the value in memory, then change it in the game and make sure it updates correctly.  For instance if you have a variable called "score" and you want to see if it is >d2 or >d4, set the type to >d4 and set the value to the maximum for >d2, 9999, and then increase the score by playing the game.  If the score increases by 1, and the value in the memory viewer is 10000 and the value in the game is 10000, then >d4 is correct.  If the value in the memory viewer or game is 0 or 9999, then it's likely that >d2 is the correct type or that the address is wrong. You may also want to check if >d3 is the correct type by changing the score to 999999 and playing for a bit.
+
+You can also check to see if the data type is correct by watching how it increments and decrements in the search window as you play the game. For example, if the value of the variable jumps from `0` to `255`, it's likely that this is a signed value (represented by `i`) -- unsigned values (represented by `u`) are either positive or zero. When you search for a variable, different formats at the same location will appear next to each other in the search window. For example, at address `16769105`, you might see `>u2`, `>i2`, as well as `>d2` return as search results. Play the game for a little bit, and you might notice that one of the search results increments/decrements in uneven or unusual ways in relation to the other search results at the same address.
+
+eg: |u1 at 7e094d goes from 144 -> 137, |d1 at 7e094d decrements from 90 -> 89, it's probably |d1
+
+If you update the value of a variable but it doesn't have any effect on the game, it's likely that you've found a copy of the variable, not the correct address.  An example would be a lives variable, but setting it to some higher value and then dying in the game reveals that you didn't actually increase the number of lives.  It's often the case that you have to change the value in the game to get things to update (such as losing a life in the previous example).  Ideally you can find the original since it's more likely to be correct, so if you can, find a variable that when updated, updates the corresponding value in the game.  The most common source of this is a high score variable which will have the same value as the normal score variable, but updating it will have no effect on your score.
+
+#### Common Errors
+
+* Wrong type for variable: if your score variable is actually >d2 and you put >d4, you may not notice until you get to some later level and the memory address next to the score is used for something, suddenly giving you a very large score.
+
+* Incorrect done condition: it might be that if you run out of time or die in some unusual way that the done condition is not detected correctly.  Make sure to test unusual ways of ending the game, and make sure that your done condition doesn't fire upon completing a level (unless it's the final level of the game).  If you're able to hit continue after dying, make sure that the game ends before the agent can hit continue.
+
+* Score is used as reward, but it's different from the score displayed in the game: this could happen if you forgot a factor of 10 in the reward, or if you're calculating the score based on some other variables (e.g. the upper and lower digits of the score, or some variable like number of enemies killed * 100) and there is a bug.  If you play the game for awhile and the reward diverges slightly from the in-game score, it's possible that the score digits are not always updated at the same time.  In this case, you can use the change in maximum score as the reward, see `data/GuardianLegend-Nes/script.lua` for an example of this.