From 236abb0db2f67f7124fb2f58bf0f6afe4117e6d0 Mon Sep 17 00:00:00 2001 From: Sam Date: Sun, 30 Dec 2018 18:04:20 -0500 Subject: [PATCH] reward function was too jagged --- gym_micropolis/envs/env.py | 4 ++-- notes/engine.txt | 2 ++ notes/notes | 10 ++++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 notes/engine.txt create mode 100644 notes/notes diff --git a/gym_micropolis/envs/env.py b/gym_micropolis/envs/env.py index 5c84084d..8e684a42 100644 --- a/gym_micropolis/envs/env.py +++ b/gym_micropolis/envs/env.py @@ -230,8 +230,8 @@ def getPopReward(self): zone_variety += 1 if indPop > 0: zone_variety += 1 - zone_bonus = (zone_variety - 1) * curr_pop - curr_pop += zone_bonus + #zone_bonus = (zone_variety - 1) * curr_pop + #curr_pop += zone_bonus #curr_pop = np.log(resPop + 1) + np.log(comPop + 1) + np.log(indPop + 1) diff --git a/notes/engine.txt b/notes/engine.txt new file mode 100644 index 00000000..89058c18 --- /dev/null +++ b/notes/engine.txt @@ -0,0 +1,2 @@ +['__class__', '__del__', '__delattr__', '__dict__', '__doc__', '__format__', '__getattr__', '__getattribute__', '__hash__', '__init__', '__module__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__swig_destroy__', '__swig_getmethods__', '__swig_setmethods__', '__weakref__', '_invokeCallback', 'addRobot', 'addView', 'addZone', 'airportPop', 'animateTiles', 'autoBudget', 'autoBulldoze', 'autoGoto', 'blinkFlag', 'callback', 'callbackData', 'callbackHook', 'canDriveOn', 'cashFlow', 'categoryLast', 'censusChanged', 'changeCensus', 'changeEval', 'checkGrowth', 'checkSpriteCollision', 'checkWet', 'churchPop', 'cityAssessedValue', 'cityClass', 'cityEvaluation', 'cityFileName', 'cityMonth', 'cityMonthLast', 'cityName', 'cityPop', 'cityPopDelta', 'cityPopLast', 'cityScore', 'cityScoreDelta', 'cityTax', 'cityTaxAverage', 'cityTime', 'cityTimeLast', 'cityYear', 'cityYearLast', 'cityYes', 'clearMap', 'clearRobots', 'clearUnnatural', 'clearZones', 'coalPowerPop', 'comCap', 'comHist', 'comHist10Max', 'comHist120Max', 'comLast', 'comPop', 'comRateMap', 'comZonePop', 'commercialImage', 'countProblems', 'crimeAverage', 'crimeHist', 'crimeRamp', 'crimeRateImage', 'crimeRateMap', 'currentYear', 'dataColorMap', 'dataTileEngine', 'description', 'destroyAllSprites', 'destroyMapTile', 'destroySprite', 'didLoadCity', 'didLoadScenario', 'didSaveCity', 'didTool', 'didntLoadCity', 'didntSaveCity', 'disasterEvent', 'disasterWait', 'doAirplaneSprite', 'doAnimation', 'doAutoGoto', 'doBudget', 'doBudgetFromMenu', 'doBudgetNow', 'doBusSprite', 'doCopterSprite', 'doEarthquake', 'doExplosionSprite', 'doInitialEval', 'doLoseGame', 'doMakeSound', 'doMessages', 'doMonsterSprite', 'doNewGame', 'doNotices', 'doPlayNewCity', 'doReallyStartGame', 'doSaveCityAs', 'doScenarioScore', 'doScoreCard', 'doShipSprite', 'doStartLoad', 'doStartScenario', 'doTimeStuff', 'doTool', 'doTornadoSprite', 'doTrainSprite', 'doUpdateHeads', 'doWinGame', 'drawMonth', 'drawValve', 'dumpRobots', 'enableDisasters', 'enableSound', 'evalChanged', 'evalInit', 'explodeSprite', 'expressInterest', 'externalMarket', 'faith', 'fireBomb', 'fireCoverageImage', 'fireEffect', 'fireFund', 'firePercent', 'firePop', 'fireSpend', 'fireStationEffectMap', 'fireStationMap', 'fireStationPop', 'fireValue', 'floodCount', 'formatDelta', 'formatMoney', 'formatNumber', 'formatPercent', 'freePtr', 'gameLevel', 'gameMode', 'generateBus', 'generateCopter', 'generateMap', 'generateNewMetaCity', 'generatePlane', 'generateShip', 'generateSomeCity', 'generateTrain', 'generatedCitySeed', 'generic_engine', 'getAllImageAlphaSize', 'getChar', 'getCommercialImageAlphaSize', 'getCrimeRate', 'getCrimeRateImageAlphaSize', 'getCrimeRateMapBuffer', 'getDataImageAlphaSize', 'getDemands', 'getDir', 'getDistance', 'getFireCoverage', 'getFireCoverageImageAlphaSize', 'getFireCoverageMapBuffer', 'getHistory', 'getHistoryRange', 'getIndString', 'getIndustrialImageAlphaSize', 'getLandValue', 'getLandValueImageAlphaSize', 'getLandValueMapBuffer', 'getMapBuffer', 'getMapImage', 'getMetaData', 'getMicropolisVersion', 'getNextAnimatedTile', 'getPoliceCoverage', 'getPoliceCoverageImageAlphaSize', 'getPoliceCoverageMapBuffer', 'getPollutionDensity', 'getPollutionDensityImageAlphaSize', 'getPollutionDensityMapBuffer', 'getPopulationDensity', 'getPopulationDensityImageAlphaSize', 'getPopulationDensityMapBuffer', 'getPowerGrid', 'getPowerGridImageAlphaSize', 'getPowerGridMapBuffer', 'getProblemNumber', 'getProblemVotes', 'getRateOfGrowth', 'getRateOfGrowthImageAlphaSize', 'getRateOfGrowthMapBuffer', 'getResidentialImageAlphaSize', 'getResource', 'getRobot', 'getSprite', 'getTile', 'getTrafficDensity', 'getTrafficDensityImageAlphaSize', 'getTrafficDensityMapBuffer', 'getTransportationImageAlphaSize', 'graph10Max', 'graph120Max', 'graphDoer', 'handle_autoGoto', 'handle_didGenerateMap', 'handle_didLoadCity', 'handle_didLoadScenario', 'handle_didSaveCity', 'handle_didTool', 'handle_didntLoadCity', 'handle_didntSaveCity', 'handle_invalidateEditors', 'handle_invalidateMaps', 'handle_loseGame', 'handle_makeSound', 'handle_newGame', 'handle_playNewCity', 'handle_reallyStartGame', 'handle_saveCityAs', 'handle_showBudgetAndWait', 'handle_showPicture', 'handle_showZoneStatus', 'handle_simRobots', 'handle_simulateChurch', 'handle_startEarthquake', 'handle_startLoad', 'handle_startScenario', 'handle_update', 'handle_winGame', 'historyInitialized', 'homeDir', 'hospitalPop', 'indCap', 'indHist', 'indHist10Max', 'indHist120Max', 'indLast', 'indPop', 'indZonePop', 'industrialImage', 'initFundingLevel', 'initGame', 'initGamePython', 'initGraphMax', 'initGraphs', 'initSimLoad', 'initSprite', 'interests', 'invalidateMaps', 'invokeCallback', 'isRoad', 'landValueAverage', 'landValueImage', 'landValueMap', 'loadCity', 'loadFile', 'loadFileDir', 'loadMetaCity', 'loadMetaScenario', 'loadScenario', 'makeEarthquake', 'makeExplosion', 'makeExplosionAt', 'makeFire', 'makeFireBombs', 'makeFlood', 'makeImage', 'makeIsland', 'makeMeltdown', 'makeMonster', 'makeMonsterAt', 'makeMutable', 'makeShipHere', 'makeSingleLake', 'makeSound', 'makeSprite', 'makeTornado', 'makeTraffic', 'makeTrafficAt', 'map', 'mapSerial', 'messageNumber', 'messageTimeLast', 'messages', 'metaFileName', 'miscHist', 'moneyHist', 'moveObjects', 'mustDrawBudget', 'mustUpdateFunds', 'mustUpdateOptions', 'newPower', 'newPtr', 'newSprite', 'notices', 'nuclearPowerPop', 'pause', 'phaseCycle', 'plopBRiver', 'plopSRiver', 'policeCoverageImage', 'policeEffect', 'policeFund', 'policePercent', 'policeSpend', 'policeStationEffectMap', 'policeStationMap', 'policeStationPop', 'policeValue', 'pollutionAverage', 'pollutionDensityImage', 'pollutionDensityMap', 'pollutionHist', 'pollutionRamp', 'populationDensityImage', 'populationDensityMap', 'powerGridColorMap', 'powerGridImage', 'powerGridMap', 'poweredZoneCount', 'problemOrder', 'problemVotes', 'railTotal', 'rateColorMap', 'rateOfGrowthImage', 'rateOfGrowthMap', 'readOnly', 'reallyUpdateFunds', 'removeRobot', 'removeView', 'removeZone', 'resCap', 'resHist', 'resHist10Max', 'resHist120Max', 'resLast', 'resPop', 'resZonePop', 'resetCity', 'resetRealTime', 'residentialImage', 'resourceDir', 'resources', 'resume', 'revokeInterest', 'roadEffect', 'roadFund', 'roadPercent', 'roadSpend', 'roadTotal', 'roadValue', 'robotDict', 'robots', 'running', 'saveCity', 'saveCityAs', 'saveFile', 'saveFileDir', 'saveFileName', 'saveMetaCity', 'scenario', 'scenarios', 'scoreDoer', 'scoreType', 'scoreWait', 'seaportPop', 'sendMessage', 'sendMessages', 'sendUpdate', 'setAutoBudget', 'setAutoBulldoze', 'setAutoGoto', 'setCityName', 'setCityTax', 'setCleanCityName', 'setCrimeRate', 'setDemand', 'setDoAnimation', 'setDoMessages', 'setDoNotices', 'setEnableDisasters', 'setEnableSound', 'setFire', 'setFireCoverage', 'setFunds', 'setGameLevel', 'setGameLevelFunds', 'setGameMode', 'setHistory', 'setLandValue', 'setPasses', 'setPoliceCoverage', 'setPollutionDensity', 'setPopulationDensity', 'setPowerGrid', 'setRateOfGrowth', 'setSpeed', 'setTile', 'setTrafficDensity', 'setYear', 'showBudgetWindowAndStartWaiting', 'showValves', 'simCycle', 'simLoops', 'simPass', 'simPasses', 'simPaused', 'simPausedSpeed', 'simRobots', 'simSpeed', 'simSpeedMeta', 'simTick', 'simUpdate', 'simZones', 'speedCycle', 'spend', 'spriteList', 'spriteNotInBounds', 'stadiumPop', 'startFire', 'startFireInZone', 'startTimer', 'startingYear', 'stopTimer', 'stringTables', 'taxFlag', 'taxFund', 'tempMap1', 'tempMap2', 'tempMap3', 'terrainColorMap', 'terrainCreateIsland', 'terrainCurveLevel', 'terrainDensityMap', 'terrainLakeLevel', 'terrainTreeLevel', 'testBounds', 'this', 'tickCount', 'tickEngine', 'tickTimer', 'tilesAnimated', 'timeDelay', 'timerActive', 'timerId', 'title', 'toolDown', 'toolDrag', 'totalFunds', 'totalFundsLast', 'totalPop', 'totalPopLast', 'totalZonePop', 'trafficAverage', 'trafficDensityImage', 'trafficDensityMap', 'transportationImage', 'treeSplash', 'tryOther', 'turnTo', 'unpoweredZoneCount', 'update', 'updateBudget', 'updateDate', 'updateEvaluation', 'updateFundEffects', 'updateFunds', 'updateGameLevel', 'updateGraphs', 'updateHeads', 'updateMaps', 'updateOptions', 'updateUserInterface', 'userData', 'valveFlag', 'views', 'zoneMap'] + diff --git a/notes/notes b/notes/notes new file mode 100644 index 00000000..1f5e2ce9 --- /dev/null +++ b/notes/notes @@ -0,0 +1,10 @@ +- for now: initialize tilemap with padding, corecontrol pads bot actions itself + +- is there any point at which continuous strips of road become worthwhile? Or will the agent be able to effectively maximize population without roads? + - increase difficulty level + - limit funds (and terminate, negatively reward when funds are depleted) + - start off with few enough funds that rushing directly to the optimal city layout is not feasible, so that the agent has to grow its funds (without explicit reward to do so). + - try not to explicitly reward for roads + +1) get success with current architecture on largest possible map size +2) tweak and compare architectures