This repository has been archived by the owner on Jun 13, 2020. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
second pass at ddpg; is learning now
- Loading branch information
Showing
7 changed files
with
389 additions
and
183 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/usr/bin/env bash | ||
mkdir /tmp/plots | ||
R --vanilla < plots.R | ||
|
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
#!/usr/bin/env python | ||
import argparse, sys, re, json | ||
import numpy as np | ||
from collections import Counter | ||
|
||
f_actions= open("/tmp/actions", "w") | ||
f_actions.write("time type x y\n") | ||
f_q_values = open("/tmp/q_values", "w") | ||
f_q_values.write("time net_type q_value\n") | ||
f_episode_len = open("/tmp/episode_stats", "w") | ||
f_episode_len.write("episode len total_reward\n") | ||
f_eval = open("/tmp/eval", "w") | ||
f_eval.write("time steps total_reward\n") | ||
f_batch_num_terminal = open("/tmp/batch_num_terminal", "w") | ||
f_batch_num_terminal.write("time batch_num_terminals\n") | ||
f_gradient_l2_norms = open("/tmp/gradient_l2_norms", "w") | ||
f_gradient_l2_norms.write("time source l2_norm\n") | ||
f_q_loss = open("/tmp/q_loss", "w") | ||
f_q_loss.write("time q_loss\n") | ||
|
||
freq = Counter() | ||
emit_freq = {"EVAL": 1, "ACTOR_L2_NORM": 20, "CRITIC_L2_NORM": 20, "Q LOSS": 10} | ||
def should_emit(tag): | ||
freq[tag] += 1 | ||
return freq[tag] % (emit_freq[tag] if tag in emit_freq else 100) == 0 | ||
|
||
n_parse_errors = 0 | ||
|
||
time = None | ||
for line in sys.stdin: | ||
if line.startswith("TIME"): | ||
time = line.strip().replace("TIME ", "") | ||
continue | ||
if time is None: | ||
continue | ||
|
||
line = line.strip() | ||
if line.startswith("STATS"): | ||
cols = line.split("\t") | ||
assert len(cols) == 2 | ||
try: | ||
d = json.loads(cols[1]) | ||
if should_emit("EPISODE_LEN"): | ||
episode = d["episode"] | ||
total_reward = d["total_reward"] | ||
episode_len = d["episode_len"] if "episode_len" in d else total_reward | ||
f_episode_len.write("%s %s %s\n" % (episode, episode_len, total_reward)) | ||
except ValueError: | ||
# interleaving output :/ | ||
n_parse_errors += 1 | ||
|
||
elif "actor gradient l2_norm" in line and should_emit("ACTOR_L2_NORM"): | ||
norm = re.sub(".*\[", "", line).replace("]", "") | ||
f_gradient_l2_norms.write("%s actor %s\n" % (time, norm)) | ||
|
||
elif "critic gradient l2_norm" in line and should_emit("CRITIC_L2_NORM"): | ||
norm = re.sub(".*\[", "", line).replace("]", "") | ||
f_gradient_l2_norms.write("%s critic %s\n" % (time, norm)) | ||
|
||
elif line.startswith("ACTIONS") and should_emit("ACTIONS"): | ||
m = re.match("ACTIONS\t\[(.*), (.*)\]\t\[(.*), (.*)\]", line) | ||
if m: | ||
pre_x, pre_y, post_x, post_y = m.groups() | ||
f_actions.write("%s pre %s %s\n" % (time, pre_x, pre_y)) | ||
f_actions.write("%s post %s %s\n" % (time, post_x, post_y)) | ||
|
||
elif line.startswith("EXPECTED_Q_VALUES") and should_emit("EXPECTED_Q_VALUES"): | ||
cols = line.split(" ") | ||
assert len(cols) == 3 | ||
assert cols[0] == "EXPECTED_Q_VALUES" | ||
f_q_values.write("%s main %f\n" % (time, float(cols[1]))) | ||
f_q_values.write("%s target %f\n" % (time, float(cols[2]))) | ||
|
||
elif line.startswith("EVAL") and should_emit("EVAL"): | ||
cols = line.split(" ") | ||
if len(cols) == 2: # OLD FORMAT | ||
tag, steps = cols | ||
assert tag == "EVAL" | ||
total_reward = steps | ||
elif len(cols) == 3: | ||
tag, steps, total_reward = cols | ||
assert tag == "EVAL" | ||
else: | ||
assert False | ||
assert steps >= 0 | ||
assert total_reward >= 0 | ||
f_eval.write("%s %s %s\n" % (time, steps, total_reward)) | ||
|
||
elif line.startswith("NUM_TERMINALS_IN_BATCH") and should_emit("NUM_TERMINALS_IN_BATCH"): | ||
cols = line.split(" ") | ||
assert len(cols) == 2 | ||
assert cols[0] == "NUM_TERMINALS_IN_BATCH" | ||
f_batch_num_terminal.write("%s %f\n" % (time, float(cols[1]))) | ||
|
||
elif line.startswith("Q LOSS") and should_emit("Q LOSS"): | ||
cols = line.split(" ") | ||
assert len(cols) == 3 | ||
assert cols[0] == "Q" | ||
assert cols[1] == "LOSS" # o_O | ||
f_q_loss.write("%s %f\n" % (time, float(cols[2]))) | ||
|
||
print "n_parse_errors", n_parse_errors | ||
print freq | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,51 +1,63 @@ | ||
library(ggplot2) | ||
library(grid) | ||
library(gridExtra) | ||
df = read.delim("/tmp/actions", h=T, sep=" ") | ||
png("/tmp/plots/00a_pre_noise_x_y_scatter.png", width=300, height=300) | ||
ggplot(df[df$type=='pre',], aes(x, y)) + geom_bin2d() + labs(title="x pre noise") | ||
dev.off() | ||
png("/tmp/plots/00b_post_noise_x_y_scatter.png", width=300, height=300) | ||
ggplot(df[df$type=='post',], aes(x, y)) + geom_bin2d() + labs(title="x post noise") | ||
dev.off() | ||
png("/tmp/plots/00c_x_over_time.png", width=640, height=400) | ||
ggplot(df, aes(time, x)) + geom_point(alpha=0.1) + geom_smooth() + facet_grid(type~.) + labs(title="x over time") | ||
dev.off() | ||
png("/tmp/plots/00d_y_over_time.png", width=640, height=400) | ||
ggplot(df, aes(time, y)) + geom_point(alpha=0.1) + geom_smooth() + facet_grid(type~.) + labs(title="yx over time") | ||
dev.off() | ||
|
||
# run parse_foo.sh first | ||
df = read.delim("/tmp/q_values", h=T, sep=" ") | ||
png("/tmp/plots/05a_action_q_values.png", width=640, height=320) | ||
ggplot(df, aes(time, q_value)) + geom_point(alpha=0.2, aes(color=net_type)) + geom_smooth(aes(color=net_type)) + labs(title="q values over time") | ||
dev.off() | ||
|
||
df = read.delim("/tmp/actions_pre_noise", h=F, sep=" ", col.names=c("x", "y")) | ||
png("/tmp/plots/01_x_y_scatter.png") | ||
ggplot(df, aes(x, y)) + geom_point() | ||
df = read.delim("/tmp/episode_stats", h=T, sep=" ") | ||
png("/tmp/plots/06a_episode_len.png", width=640, height=320) | ||
ggplot(df, aes(episode, len)) + geom_point(alpha=0.2) + geom_smooth() + labs(title="episode len") | ||
dev.off() | ||
df$n = 1:nrow(df) | ||
png("/tmp/plots/02_x_pre_noise.png") | ||
ggplot(df, aes(n, x)) + geom_point() + labs(title="x pre noise") | ||
png("/tmp/plots/06b_episode_rewards.png", width=640, height=320) | ||
ggplot(df, aes(episode, total_reward)) + geom_point(alpha=0.2) + geom_smooth() + labs(title="episode total reward") | ||
dev.off() | ||
png("/tmp/plots/03_y_pre_noise.png") | ||
ggplot(df, aes(n, y)) + geom_point() + labs(title="y pre noise") | ||
png("/tmp/plots/06c_episode_stats.png", width=320, height=320) | ||
ggplot(df, aes(len, total_reward)) + geom_point(alpha=0.2) + labs(title="episode step vs reward") | ||
dev.off() | ||
|
||
df = read.delim("/tmp/actions_post_noise", h=F, sep=" ", col.names=c("x", "y")) | ||
png("/tmp/plots/03_x_y_scatter.png") | ||
ggplot(df, aes(x, y)) + geom_point() | ||
df = read.delim("/tmp/eval", h=T, sep=" ") | ||
png("/tmp/plots/07a_eval_episode_len.png", width=640, height=320) | ||
ggplot(df, aes(time, steps)) + geom_point(alpha=0.2) + geom_smooth() + labs(title="eval episode len") | ||
dev.off() | ||
df$n = 1:nrow(df) | ||
png("/tmp/plots/03_x_post_noise.png") | ||
ggplot(df, aes(n, x)) + geom_point() + labs(title="x post noise") | ||
dev.off() | ||
png("/tmp/plots/03_y_post_noise.png") | ||
ggplot(df, aes(n, y)) + geom_point() + labs(title="y post noise") | ||
|
||
png("/tmp/plots/07b_eval_total_reward.png", width=640, height=320) | ||
ggplot(df, aes(time, total_reward)) + geom_point(alpha=0.2) + geom_smooth() + labs(title="eval total reward") | ||
dev.off() | ||
|
||
df = read.delim("/tmp/q_loss", h=F) | ||
df$n = 1:nrow(df) | ||
png("/tmp/plots/04_q_loss.png") | ||
ggplot(df, aes(n, V1)) + geom_point() + | ||
geom_smooth() + labs(title="q loss") | ||
df = read.delim("/tmp/batch_num_terminal", h=T, sep=" ") | ||
png("/tmp/plots/08_batch_num_terminal.png", width=640, height=320) | ||
ggplot(df, aes(time, batch_num_terminals)) + geom_point(alpha=0.2) + geom_smooth() + labs(title="batch num terminal") | ||
dev.off() | ||
|
||
df = read.delim("/tmp/action_q_values", h=F) | ||
summary(df) | ||
df$n = 1:nrow(df) | ||
png("/tmp/plots/05_action_q_values.png") | ||
ggplot(df, aes(n, V1)) + geom_point() + | ||
geom_smooth() + labs(title="q values over time") | ||
df = read.delim("/tmp/gradient_l2_norms", h=T, sep=" ") | ||
png("/tmp/plots/09a_actor_l2_norms.png", width=640, height=320) | ||
ggplot(df[df$source=="actor",], aes(time, l2_norm)) + geom_point(alpha=0.1) + geom_smooth() + labs(title="actor gradient l2 norms") | ||
dev.off() | ||
png("/tmp/plots/09b_critic_l2_norms.png", width=640, height=320) | ||
ggplot(df[df$source=="critic",], aes(time, l2_norm)) + geom_point(alpha=0.1) + geom_smooth() + labs(title="critic gradient l2 norms") | ||
dev.off() | ||
|
||
df = read.delim("/tmp/episode_len", h=F) | ||
df$n = 1:nrow(df) | ||
png("/tmp/plots/06_episode_len.png") | ||
ggplot(df, aes(n, V1)) + geom_point() + | ||
geom_smooth() + labs(title="episode len") | ||
df = read.delim("/tmp/q_loss", h=T, sep=" ") | ||
png("/tmp/plots/10_q_loss.png", width=640, height=320) | ||
ggplot(df, aes(time, q_loss)) + geom_point(alpha=0.1) + geom_smooth() + labs(title="critic training q loss") | ||
dev.off() | ||
|
||
# df = read.delim("/tmp/replay_memory_size", h=F) | ||
# df$n = 1:nrow(df) | ||
# png("/tmp/plots/09_replay_memory_size.png", width=640, height=320) | ||
# ggplot(df, aes(n, V1)) + geom_point() + labs(title="replay memory size") | ||
# dev.off() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters