Skip to content

Commit

Permalink
more documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
lorepozo committed Mar 8, 2017
1 parent c40c2ae commit a47ffdc
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 8 deletions.
31 changes: 28 additions & 3 deletions src/ec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ static PRIMS_ARR: [&'static str; 30] = ["B", "C", "S", "K", "I",
"fnth",
"feach"];

/// course is for loading inputs for use with ec.
mod course {
extern crate serde_json;
extern crate tempdir;
Expand Down Expand Up @@ -96,10 +97,12 @@ mod course {
grammar: Vec<Comb>,
}
impl Course {
/// load the course file corresponding to a particular iteration.
pub fn load(i: u64) -> Course {
let s = read_curriculum(format!("course_{:02}.json", i));
serde_json::from_str(s.as_str()).expect("parsing course file")
}
/// merge a given Course with the grammar of combinators given in the Context.
pub fn merge(&mut self, ctx: &Context) {
let raw_items = ctx.get()
.into_iter()
Expand All @@ -114,6 +117,7 @@ mod course {
self.grammar.append(&mut grammar);
}
}
/// save a Course to a temporary file
pub fn save(&self, i: u64) -> (TempDir, String) {
let tmp_dir = TempDir::new("ec").expect("make temp dir");
let path = tmp_dir.path().join(format!("ec_input_{}", i));
Expand All @@ -128,6 +132,7 @@ mod course {
use self::course::{Course, read_curriculum};


/// results is for parsing output from ec.
mod results {
extern crate serde_json;

Expand Down Expand Up @@ -175,16 +180,20 @@ fn ec_bin() -> String {
}
}

/// embryo returns the embryo (embryo.json in the curriculum/ec directory)
/// for use by the Skn that uses ec.
pub fn embryo() -> Vec<(&'static str, String)> {
let s = read_curriculum(String::from("embryo.json"));
vec![("ec", s)]
}

/// primitives returns the set of expressions that are primitive to ec.
fn primitives() -> HashSet<String> {
PRIMS_ARR.iter().map(|&s| String::from(s)).collect()
}


/// run_ec is the lower-level function that produces the ec results for a
/// given context and course iteration.
fn run_ec(ctx: &Context, i: u64) -> Results {
let mut c = Course::load(i);
c.merge(ctx);
Expand All @@ -193,7 +202,7 @@ fn run_ec(ctx: &Context, i: u64) -> Results {
.arg(path)
.output()
.expect("run ec");
drop(tmp_dir);
drop(tmp_dir); // we can delete the temporary directory after ec has run
if !output.status.success() {
let err = String::from_utf8(output.stderr).unwrap();
panic!("ec failed in iteration {}: {}", i, err)
Expand All @@ -203,6 +212,9 @@ fn run_ec(ctx: &Context, i: u64) -> Results {
Results::from_string(raw_results)
}

/// exprs_in_context takes a set of items in the context as given by
/// Context::get() or Context::explore() and returns the combinators
/// contained in those that are readable by ec.
fn exprs_in_context(ctx: Vec<(usize, &'static str, Rc<String>)>) -> HashMap<String, usize> {
ctx.into_iter()
.filter(|&(_, mech, _)| mech == "ec")
Expand All @@ -215,6 +227,10 @@ fn exprs_in_context(ctx: Vec<(usize, &'static str, Rc<String>)>) -> HashMap<Stri
.collect()
}

/// find_exprs_in_context takes a set of items in the context as given by
/// Context::get() or Context::explore() and a vector of combinators.
/// It returns a vector of the same size as exprs, with Some(id) if a match
/// was found or None otherwise.
fn find_exprs_in_context(ctx: Vec<(usize, &'static str, Rc<String>)>,
exprs: &Vec<&String>)
-> Vec<Option<usize>> {
Expand All @@ -227,12 +243,17 @@ fn find_exprs_in_context(ctx: Vec<(usize, &'static str, Rc<String>)>,
.collect()
}

/// find_expr_in_context is like find_exprs_in_context but for a single
/// combinator.
fn find_expr_in_context(ctx: Vec<(usize, &'static str, Rc<String>)>,
expr: String)
-> Option<usize> {
find_exprs_in_context(ctx, &vec![&expr])[0]
}

/// mech is the ec mechanism as it should be registered/used by an Skn
/// object. It wraps running ec with updating item access counts and adding
/// a new item where appropriate.
pub fn mech(ctx: Context, i: u64) {
// run ec
let results = run_ec(&ctx, i);
Expand All @@ -246,6 +267,7 @@ pub fn mech(ctx: Context, i: u64) {
results.hit_rate,
results.programs.len(),
failures);

// retrieve learned combs
let mut learned: Vec<(String, f64)> = results.grammar
.iter()
Expand All @@ -261,6 +283,7 @@ pub fn mech(ctx: Context, i: u64) {
(r.expr, r.log_probability)
}));
}

// orient to most probable comb
let mut ctx = ctx;
let most_probable = learned.iter()
Expand All @@ -273,6 +296,7 @@ pub fn mech(ctx: Context, i: u64) {
ctx.orient(id);
ctx = ctx.update();
}

// make accesses ~ usage
learned.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap()); // reversed sort
let exprs = &learned.iter().map(|&(ref s, _)| s).collect();
Expand All @@ -294,7 +318,8 @@ pub fn mech(ctx: Context, i: u64) {
for comb in &access_info {
ctx.add_item_count(comb.2, comb.1 as u64);
}
// get probable combs, exclude primitives and combs in context

// add item with probable combs, excluding primitives and combs in context
let prims = primitives();
let exprs_in_ctx = exprs_in_context(ctx.explore());
let new_combs: Vec<String> = learned // already sorted by prob
Expand Down
61 changes: 58 additions & 3 deletions src/knowledge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,19 @@ use rand::distributions::{IndependentSample, Range};
const CTX_MIN_SIZE: usize = 5;
const NET_MAX_SIZE: usize = 128;

/// Item maintains the data and metadata for a single knowledge artifact.
#[derive(Debug)]
struct Item {
/// mechanism name
mech: &'static str,
/// arbitrary data
data: Rc<String>,
/// counts maps an epoch to a number of accesses to this artifact made
/// during that epoch.
counts: HashMap<usize, u64>,
/// adj is the set of adjacent item ids.
adj: HashSet<usize>,
/// id is this item's unique identifier.
id: usize,
}

Expand All @@ -30,23 +37,34 @@ impl Item {
id: id,
}
}
/// increases this item's access count for a given epoch.
fn add_count(&mut self, epoch: usize, count: u64) {
let prev_count: u64 = {
*self.counts.get(&epoch).unwrap_or(&0)
};
self.counts.insert(epoch, count + prev_count);
}
/// gets the count of accesses to this item since the given epoch.
fn recent_count(&self, epoch: usize) -> u64 {
self.counts.iter().filter(|&(e, _)| *e >= epoch).fold(0, |s, (_, c)| s + c)
}
}

/// Context is the interface for mechanisms to utilize the knowledge
/// network.
pub struct Context {
/// net is the network that the context corresponds to.
net: Network,
/// name of the mechanism that's using this particular Context object.
mech: &'static str,
/// the set of item ids in the immediate context.
items: HashSet<usize>,
/// the set of item ids within a small boundary over the immediate
/// context.
frontier: HashSet<usize>,
/// the epoch that this Context object was created in.
initial_epoch: usize,
/// the epoch that this Context currently corresponds to.
current_epoch: usize,
}

Expand All @@ -60,6 +78,9 @@ impl Context {
pub fn explore(&self) -> Vec<(usize, &'static str, Rc<String>)> {
self.net.ids_to_contents(self.items.union(&self.frontier).cloned())
}
/// update will give a new Context object that accounts for any changes
/// that may have happened (such as from .orient() or .grow()) since
/// this Context object was created.
pub fn update(&self) -> Context {
Context { initial_epoch: self.initial_epoch, ..self.net.context(self.mech) }
}
Expand Down Expand Up @@ -95,7 +116,7 @@ struct Network {

impl Network {
/// embryo is a collection of starting items to form an initial clique
/// graph, of the form (mechName, data). Must be non-empty.
/// graph, of the form (mechanism name, data). Must be non-empty.
pub fn new<U>(embryo: U) -> Network
where U: IntoIterator<Item = (&'static str, String)>
{
Expand All @@ -107,7 +128,7 @@ impl Network {
epochs: Vec::new(),
})),
};
{
{ // scope in for this mutable borrow
let mut net = network.net.borrow_mut();
// clique of embryo as base
let mut id = 0;
Expand All @@ -122,6 +143,7 @@ impl Network {
item
})
.collect();
// initial epoch has no accesses and context of entire embyro
net.epochs.push((0, edges, HashSet::new())); // edges ~ embryo ids
// initial size
let size = id;
Expand All @@ -132,6 +154,8 @@ impl Network {
}
network
}
/// item_count increases the count of a given item corresponding to the
/// given epoch.
fn item_count(&self, epoch: usize, id: usize, count: u64) {
let mut net = self.net.borrow_mut();
{
Expand All @@ -140,6 +164,8 @@ impl Network {
}
net.epochs[epoch].2.insert(id);
}
/// ids_to_contexts takes an iterable of item ids and returns a vector
/// of (id, mechanism name, data) corresponding to each given id.
fn ids_to_contents<U>(&self, items: U) -> Vec<(usize, &'static str, Rc<String>)>
where U: IntoIterator<Item = usize>
{
Expand All @@ -151,11 +177,15 @@ impl Network {
})
.collect()
}
/// orient creates a new epoch, centering the context around the given
/// item and using items' access counts since the given epoch to
/// determine where to grow the context.
fn orient(&self, epoch: usize, id: usize) {
let mut net = self.net.borrow_mut();
let mut ctx = HashSet::new();
let n = net.graph.len();
if n < net.context_min_size {
// use the entire network
ctx = (0..n).collect();
} else {
// the context should be sized according to the expected max
Expand Down Expand Up @@ -193,12 +223,16 @@ impl Network {
}
net.epochs.push((id, ctx, HashSet::new()))
}
/// grow adds a new knowledge artifact (Item) to the network, and
/// creates a new epoch with an implicit call to .orient() on the new
/// item.
fn grow(&self, mech: &'static str, data: String, epoch: usize) -> usize {
let id: usize;
{
let mut net = self.net.borrow_mut();
id = net.graph.len();
assert!(id <= net.max_size);

// compute counts for antecedent artifacts
let ids: HashSet<usize> = net.epochs
.iter()
Expand All @@ -219,6 +253,7 @@ impl Network {
counts = ids.iter().map(|&id| (id, 1)).collect();
sum = counts.len() as u64
}

// convert counts to probabilities
let antecedents: HashMap<usize, f64> = counts.iter()
.map(|&(id, cnt)| {
Expand All @@ -227,6 +262,7 @@ impl Network {
})
.collect();
let mut edges = HashSet::new();

// popularity-based subset selection
let uniform = Range::new(0f64, 1.);
let mut rng = rand::thread_rng();
Expand All @@ -248,18 +284,22 @@ impl Network {
}
}
}
// update other end of new edges

// update other end of new edges (undirected network)
for oid in &edges {
let ref mut item = net.graph[*oid];
item.adj.insert(id);
}

// actually add the item
let item = Item::new(mech, edges, data, id);
net.graph.push(item);
}
self.orient(epoch, id);
id
}
/// frontier_of takes a set of item ids and returns the set of item ids
/// corresponding to all adjacent items.
fn frontier_of(&self, items: &HashSet<usize>) -> HashSet<usize> {
let net = self.net.borrow();
let frontier: HashSet<usize> = items.iter()
Expand All @@ -270,6 +310,8 @@ impl Network {
.collect();
frontier.difference(items).cloned().collect()
}
/// context creates a new Context object corresponding to the network's
/// latest epoch.
fn context(&self, mech: &'static str) -> Context {
let net = self.net.borrow();
let epoch = net.epochs.len() - 1;
Expand All @@ -286,6 +328,9 @@ impl Network {
}
}

/// MechanismRegistry maintains a set of mechanisms used by the knowledge
/// network. A mechanism is a function which takes a Context and an
/// iteration number.
struct MechanismRegistry<'a> {
reg: Vec<(&'static str, &'a Fn(Context, u64))>,
}
Expand All @@ -299,6 +344,7 @@ impl<'a> MechanismRegistry<'a> {
}
}

/// Skn maintains a knowledge network and the mechanisms interacting with it.
pub struct Skn<'a> {
network: Network,
reg: MechanismRegistry<'a>,
Expand All @@ -312,6 +358,9 @@ impl<'a> fmt::Debug for Skn<'a> {
}

impl<'a> Skn<'a> {
/// embryo is a non-empty collection of initial knowledge artifacts of
/// the form (mechanism name, data), and iterations is the number of
/// iterations to run each mechanism.
pub fn new<U>(embryo: U, iterations: u64) -> Skn<'a>
where U: IntoIterator<Item = (&'static str, String)>
{
Expand All @@ -321,9 +370,15 @@ impl<'a> Skn<'a> {
t: iterations,
}
}
/// register adds a new mechanism, given by its name and a function
/// which takes a Context and an iteration number, for use with the
/// knowledge network.
pub fn register(&mut self, name: &'static str, mech: &'a Fn(Context, u64)) {
self.reg.register(name, mech);
}
/// run calls each mechanism `iteration` number of times (set when this
/// Skn was created) with a refreshed context on each iteration
/// (according to the latest epoch of the knowledge network).
pub fn run(&self) {
for t in 1..self.t + 1 {
for &(name, mech) in &self.reg.reg {
Expand Down
6 changes: 4 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ mod tests {
use knowledge::{Context, Skn};
use rand::distributions::{IndependentSample, Gamma};

/// a very basic mechanism, great for understanding what a mechanism
/// could look like.
fn basic_mech(ctx: Context, i: u64) {
let items = ctx.get();
let front = ctx.explore();
Expand All @@ -49,10 +51,10 @@ mod tests {
#[test]
fn it_works() {
let t = 20;
let embryo = vec![("test", String::from("data"))];
let embryo = vec![("basic_mech_name", String::from("some data"))];
let mech = basic_mech;
let mut skn = Skn::new(embryo, t);
skn.register("test", &mech);
skn.register("basic_mech_name", &mech);
skn.run();
}
}

0 comments on commit a47ffdc

Please sign in to comment.