-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Synchronize rollout with proper replay buffer, reorganize
Improvement on #353 and setup for #354. Rewrite training algorithm (again) to remove the concept of episodes and instead focus on pure learning steps according to the DQN algorithm. Also add a proper replay buffer implementation. Add/rewrite some configs/metrics code to mesh with above. Also reorganize source tree, general housekeeping.
- Loading branch information
1 parent
4c252d4
commit a979442
Showing
62 changed files
with
1,989 additions
and
1,556 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import {Choice} from "../../psbot/handlers/battle/agent"; | ||
import {ReadonlyBattleState} from "../../psbot/handlers/battle/state"; | ||
import {Rng, shuffle} from "../../util/random"; | ||
|
||
/** BattleAgent that chooses actions randomly. */ | ||
export async function randomAgent( | ||
state: ReadonlyBattleState, | ||
choices: Choice[], | ||
random?: Rng, | ||
): Promise<void> { | ||
shuffle(choices, random); | ||
return await Promise.resolve(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import * as tf from "@tensorflow/tfjs"; | ||
import {Experience} from "./Experience"; | ||
|
||
/** {@link Experience} with values converted to {@link tf.Tensor tensors}. */ | ||
export type TensorExperience = { | ||
[T in keyof Experience]: Experience[T] extends number | boolean | ||
? tf.Scalar | ||
: Experience[T] extends Float32Array[] | ||
? tf.Tensor[] | ||
: never; | ||
}; | ||
|
||
/** | ||
* Batched {@link Experience} stacked {@link tf.Tensor tensors}. | ||
* | ||
* Essentially a list of {@link TensorExperience}s but with values converted to | ||
* stacked tensors. | ||
*/ | ||
export type BatchTensorExperience = { | ||
[T in keyof Experience]: Experience[T] extends number | boolean | ||
? tf.Tensor1D | ||
: Experience[T] extends Float32Array[] | ||
? tf.Tensor[] | ||
: never; | ||
}; |
Oops, something went wrong.