Browse files

This is the first commit! The Makefile recursion used to build the si…

…mulator

should probably revised such that making in subdirectories is automatic.
Presently, subdirectory makefiles are done explicitly
  • Loading branch information...
1 parent 301a334 commit 47e5cd709d7cf7c1c6c844a2a1672dc6afab1f9c Jonathan Eastep committed Apr 17, 2008
Showing with 4,756 additions and 1 deletion.
  1. +26 −0 Makefile
  2. +1 −1 README
  3. +40 −0 common/Makefile
  4. +4 −0 common/README
  5. +23 −0 common/core/Makefile
  6. +2 −0 common/core/README
  7. +77 −0 common/core/cache.cc
  8. +468 −0 common/core/cache.h
  9. +98 −0 common/core/core.cc
  10. +105 −0 common/core/core.h
  11. +60 −0 common/core/license.info
  12. +495 −0 common/core/ocache.cc
  13. +193 −0 common/core/ocache.h
  14. +152 −0 common/core/perfmdl.cc
  15. +159 −0 common/core/perfmdl.h
  16. +10 −0 common/doc/README
  17. +378 −0 common/makefile.gnu.config
  18. +23 −0 common/misc/Makefile
  19. +2 −0 common/misc/README
  20. +151 −0 common/misc/utils.cc
  21. +86 −0 common/misc/utils.h
  22. +23 −0 common/network/Makefile
  23. +2 −0 common/network/README
  24. +171 −0 common/network/network.cc
  25. +89 −0 common/network/network.h
  26. +23 −0 common/phys_trans/Makefile
  27. +3 −0 common/phys_trans/README
  28. +76 −0 common/phys_trans/transport.cc
  29. +48 −0 common/phys_trans/transport.h
  30. +15 −0 common/tests/Makefile
  31. +9 −0 common/tests/README
  32. +22 −0 common/tests/pthreads_matmult/Makefile
  33. +289 −0 common/tests/pthreads_matmult/cannon.cc
  34. +2 −0 common/tests/pthreads_matmult/run_via_pin_sim
  35. +23 −0 common/user/Makefile
  36. +1 −0 common/user/README
  37. +29 −0 common/user/capi.cc
  38. +27 −0 common/user/capi.h
  39. +13 −0 pin/Makefile
  40. +1 −0 pin/README
  41. +9 −0 pin/doc/README
  42. +5 −0 pin/doc/optimizations
  43. +56 −0 pin/memos/2008-02-01.txt
  44. +35 −0 pin/memos/2008-02-05.txt
  45. +38 −0 pin/src/Makefile
  46. +6 −0 pin/src/README
  47. +157 −0 pin/src/chip.cc
  48. +139 −0 pin/src/chip.h
  49. +83 −0 pin/src/knobs.h
  50. +527 −0 pin/src/pin_sim.cc
  51. +11 −0 pin/tests/README
  52. +3 −0 pin/tests/perf_cache_models/README
  53. +8 −0 pin/tests/perf_cache_models/runtest.sh
  54. +240 −0 pin/tests/perf_cache_models/test.cc
  55. +11 −0 qemu/Makefile
  56. +9 −0 qemu/doc/README
View
26 Makefile
@@ -0,0 +1,26 @@
+include common/makefile.gnu.config
+
+all:
+ $(MAKE) -C common
+ $(MAKE) -C pin
+ $(MAKE) -C qemu
+
+pinbin:
+ $(MAKE) -C common
+ $(MAKE) -C pin
+
+qemubin:
+ $(MAKE) -C common
+ $(MAKE) -C qemu
+
+clean:
+ $(MAKE) -C common clean
+ $(MAKE) -C pin clean
+ $(MAKE) -C qemu clean
+ -rm -f *.o *.d *.rpo
+
+squeaky: clean
+ $(MAKE) -C common squeaky
+ $(MAKE) -C pin squeaky
+ $(MAKE) -C qemu squeaky
+ -rm -f *~
View
2 README
@@ -1 +1 @@
-This is the carbon simulator project
+This is the Carbon simulator project
View
40 common/Makefile
@@ -0,0 +1,40 @@
+include ./makefile.gnu.config
+
+all: userobjs coreobjs networkobjs phystransobjs miscobjs testobjs
+
+
+userobjs:
+ $(MAKE) -C user
+
+coreobjs:
+ $(MAKE) -C core
+
+networkobjs:
+ $(MAKE) -C network
+
+phystransobjs:
+ $(MAKE) -C phys_trans
+
+miscobjs:
+ $(MAKE) -C misc
+
+testobjs:
+ $(MAKE) -C tests
+
+clean:
+ $(MAKE) -C user clean
+ $(MAKE) -C core clean
+ $(MAKE) -C network clean
+ $(MAKE) -C phys_trans clean
+ $(MAKE) -C misc clean
+ $(MAKE) -C tests clean
+ -rm -f *.o *.d *.rpo
+
+squeaky: clean
+ $(MAKE) -C user squeaky
+ $(MAKE) -C core squeaky
+ $(MAKE) -C network squeaky
+ $(MAKE) -C phys_trans squeaky
+ $(MAKE) -C misc squeaky
+ $(MAKE) -C tests squeaky
+ -rm -f *~
View
4 common/README
@@ -0,0 +1,4 @@
+This folder contains code common to the various simulator implementations.
+
+Style note:
+The files in this folder should not depend upon Pin or QEMU if possible. Or, they should be made portable.
View
23 common/core/Makefile
@@ -0,0 +1,23 @@
+include ../makefile.gnu.config
+
+DBG=-g
+OPT=-O2
+CFLAGS=-c -I$(PIN_HOME)/InstLib -I../../pin/src -I../user -I../network -I../phys_trans -I../misc -I../ -I./ -fomit-frame-pointer -Wall -Werror -Wno-unknown-pragmas $(DBG) $(OPT) -MMD
+LDFLAGS=
+
+SOURCES = cache.cc ocache.cc perfmdl.cc core.cc
+OBJECTS = $(SOURCES:%.cc=%.o)
+
+
+all: $(OBJECTS)
+
+
+## build rules
+
+%.o : %.cc
+ $(CXX) $(CFLAGS) $(PIN_CXXFLAGS) -o $@ $<
+
+clean:
+ -rm -f *.o *.d *.rpo
+squeaky: clean
+ -rm -f *~
View
2 common/core/README
@@ -0,0 +1,2 @@
+This directory is for the functional and performance models
+for the processing core.
View
77 common/core/cache.cc
@@ -0,0 +1,77 @@
+#include "cache.h"
+
+
+/* ================================================================================================ */
+/* CacheBase methods */
+/* ================================================================================================ */
+
+CacheBase::CacheBase(std::string name, UINT32 size, UINT32 line_bytes, UINT32 assoc) :
+ name(name), cache_size(size), line_size(line_bytes), associativity(assoc),
+ line_shift(floorLog2(line_bytes)), set_index_mask((size / (assoc * line_bytes)) - 1)
+{
+
+ ASSERTX(isPower2(line_size));
+ ASSERTX(isPower2(set_index_mask + 1));
+
+ for (UINT32 access_type = 0; access_type < k_ACCESS_TYPE_NUM; access_type++)
+ {
+ access[access_type][false] = 0;
+ access[access_type][true] = 0;
+ }
+}
+
+
+// Stats output method
+
+string CacheBase::statsLong(string prefix, CacheType cache_type) const
+{
+ const UINT32 header_width = 19;
+ const UINT32 number_width = 12;
+
+ string out;
+
+ out += prefix + name + ":" + "\n";
+
+ if (cache_type != k_CACHE_TYPE_ICACHE) {
+ for (UINT32 i = 0; i < k_ACCESS_TYPE_NUM; i++)
+ {
+ const AccessType access_type = AccessType(i);
+
+ std::string type(access_type == k_ACCESS_TYPE_LOAD ? "Load" : "Store");
+
+ out += prefix + ljstr(type + "-Hits: ", header_width)
+ + myDecStr(getHits(access_type), number_width)
+ + " " +fltstr(100.0 * getHits(access_type) / safeFDiv(getAccesses(access_type)), 2, 6)
+ + "%\n";
+
+ out += prefix + ljstr(type + "-Misses: ", header_width)
+ + myDecStr(getMisses(access_type), number_width)
+ + " " +fltstr(100.0 * getMisses(access_type) / safeFDiv(getAccesses(access_type)), 2, 6)
+ + "%\n";
+
+ out += prefix + ljstr(type + "-Accesses: ", header_width)
+ + myDecStr(getAccesses(access_type), number_width)
+ + " " +fltstr(100.0 * getAccesses(access_type) / safeFDiv(getAccesses(access_type)), 2, 6)
+ + "%\n";
+
+ out += prefix + "\n";
+ }
+ }
+
+ out += prefix + ljstr("Total-Hits: ", header_width)
+ + myDecStr(getHits(), number_width)
+ + " " +fltstr(100.0 * getHits() / getAccesses(), 2, 6) + "%\n";
+
+ out += prefix + ljstr("Total-Misses: ", header_width)
+ + myDecStr(getMisses(), number_width)
+ + " " +fltstr(100.0 * getMisses() / getAccesses(), 2, 6) + "%\n";
+
+ out += prefix + ljstr("Total-Accesses: ", header_width)
+ + myDecStr(getAccesses(), number_width)
+ + " " +fltstr(100.0 * getAccesses() / getAccesses(), 2, 6) + "%\n";
+
+ out += "\n";
+
+ return out;
+}
+
View
468 common/core/cache.h
@@ -0,0 +1,468 @@
+// Jonathan Eastep (eastep@mit.edu)
+// 04.07.08
+//
+// It's been significantly modified, but original code by Artur Klauser of
+// Intel and modified by Rodric Rabbah. My changes enable the cache to be
+// dynamically resized (size and associativity) as well as some new statistics
+// tracking
+//
+// RMR (rodric@gmail.com) {
+// - temporary work around because decstr()
+// casts 64 bit ints to 32 bit ones
+// - use safe_fdiv to avoid NaNs in output
+
+
+#ifndef CACHE_H
+#define CACHE_H
+
+#include <iostream>
+
+#include "pin.H"
+#include "utils.h"
+
+#define k_KILO 1024
+#define k_MEGA (k_KILO*k_KILO)
+#define k_GIGA (k_KILO*k_MEGA)
+
+// type of cache hit/miss counters
+typedef UINT64 CacheStats;
+
+
+
+// Cache tag - self clearing on creation
+
+class CacheTag
+{
+ private:
+ ADDRINT the_tag;
+
+ public:
+ CacheTag(ADDRINT tag = ~0) { the_tag = tag; }
+
+ bool operator==(const CacheTag &right) const { return the_tag == right.the_tag; }
+
+ operator ADDRINT() const { return the_tag; }
+};
+
+
+
+// Everything related to cache sets
+
+namespace CACHE_SET
+{
+
+ // Cache set direct mapped
+ class DirectMapped
+ {
+ private:
+ CacheTag the_tag;
+
+ public:
+ DirectMapped(UINT32 assoc = 1)
+ {
+ ASSERTX(assoc == 1);
+ the_tag = CacheTag(~0);
+ }
+
+ VOID setAssociativity(UINT32 assoc) { ASSERTX(assoc == 1); }
+
+ UINT32 getAssociativity(UINT32 assoc) { return 1; }
+
+ UINT32 find(CacheTag tag) { return(the_tag == tag); }
+
+ VOID replace(CacheTag tag) { the_tag = tag; }
+
+ VOID modifyAssociativity(UINT32 assoc) { ASSERTX(assoc == 1); }
+
+ VOID print() { cout << the_tag << endl; }
+ };
+
+
+ // Cache set with round robin replacement
+ template <UINT32 k_MAX_ASSOCIATIVITY = 8>
+ class RoundRobin
+ {
+ private:
+ CacheTag the_tags[k_MAX_ASSOCIATIVITY];
+ UINT32 tags_last_index;
+ UINT32 next_replace_index;
+
+ public:
+
+ RoundRobin(UINT32 assoc = k_MAX_ASSOCIATIVITY):
+ tags_last_index(assoc - 1)
+ {
+ ASSERTX(assoc <= k_MAX_ASSOCIATIVITY);
+ next_replace_index = tags_last_index;
+
+ for (INT32 index = tags_last_index; index >= 0; index--)
+ {
+ the_tags[index] = CacheTag(~0);
+ }
+ }
+
+ VOID setAssociativity(UINT32 assoc)
+ {
+ ASSERTX(assoc <= k_MAX_ASSOCIATIVITY);
+ tags_last_index = assoc - 1;
+ next_replace_index = tags_last_index;
+ }
+
+ UINT32 getAssociativity() { return tags_last_index + 1; }
+
+ UINT32 find(CacheTag tag)
+ {
+ bool result = true;
+
+ for (INT32 index = tags_last_index; index >= 0; index--)
+ {
+ // this is an ugly micro-optimization, but it does cause a
+ // tighter assembly loop for ARM that way ...
+ if(the_tags[index] == tag) goto end;
+ }
+ result = false;
+
+ end:
+ return result;
+ }
+
+ VOID replace(CacheTag tag)
+ {
+ // g++ -O3 too dumb to do CSE on following lines?!
+ const UINT32 index = next_replace_index;
+
+ the_tags[index] = tag;
+ // condition typically faster than modulo
+ next_replace_index = (index == 0 ? tags_last_index : index - 1);
+ }
+
+ VOID modifyAssociativity(UINT32 assoc)
+ {
+ ASSERTX(assoc != 0 && assoc <= k_MAX_ASSOCIATIVITY);
+ UINT32 associativity = getAssociativity();
+
+ if ( assoc > associativity ) {
+ for (UINT32 i = tags_last_index + 1; i < assoc; i++)
+ {
+ the_tags[i] = CacheTag(~0);
+ }
+ tags_last_index = assoc - 1;
+ next_replace_index = tags_last_index;
+ }
+ else
+ {
+ if ( assoc < associativity )
+ {
+ // this is where evictions happen in the real world
+ for (UINT32 i = tags_last_index; i >= assoc; i--)
+ {
+ the_tags[i] = CacheTag(~0);
+ }
+
+ tags_last_index = assoc - 1;
+ if ( next_replace_index > tags_last_index )
+ {
+ next_replace_index = tags_last_index;
+ }
+ }
+ }
+ }
+
+ VOID print()
+ {
+ cout << tags_last_index + 1 << " " << next_replace_index << " ";
+ for (UINT32 i = 0; i < getAssociativity(); i++)
+ {
+ cout << hex << the_tags[i] << " ";
+ }
+ cout << endl;
+ }
+
+ };
+
+};
+// end namespace CACHE_SET
+
+namespace CACHE_ALLOC
+{
+ typedef enum
+ {
+ k_STORE_ALLOCATE,
+ k_STORE_NO_ALLOCATE
+ } StoreAllocation;
+};
+
+
+// Generic cache base class; no allocate specialization, no cache set specialization
+
+class CacheBase
+{
+ public:
+ // types, constants
+ typedef enum
+ {
+ k_ACCESS_TYPE_LOAD,
+ k_ACCESS_TYPE_STORE,
+ k_ACCESS_TYPE_NUM
+ } AccessType;
+
+ typedef enum
+ {
+ k_CACHE_TYPE_ICACHE,
+ k_CACHE_TYPE_DCACHE,
+ k_CACHE_TYPE_NUM
+ } CacheType;
+
+ protected:
+ static const UINT32 k_HIT_MISS_NUM = 2;
+ CacheStats access[k_ACCESS_TYPE_NUM][k_HIT_MISS_NUM];
+
+ protected:
+ // input params
+ const std::string name;
+ UINT32 cache_size;
+ const UINT32 line_size;
+ UINT32 associativity;
+
+ // computed params
+ const UINT32 line_shift;
+ const UINT32 set_index_mask;
+
+ private:
+ CacheStats sumAccess(bool hit) const
+ {
+ CacheStats sum = 0;
+
+ for (UINT32 access_type = 0; access_type < k_ACCESS_TYPE_NUM; access_type++)
+ {
+ sum += access[access_type][hit];
+ }
+
+ return sum;
+ }
+
+ public:
+ // constructors/destructors
+ CacheBase(std::string name, UINT32 size, UINT32 line_bytes, UINT32 assoc);
+
+ // accessors
+ UINT32 getCacheSize() const { return cache_size; }
+ UINT32 getLineSize() const { return line_size; }
+ UINT32 getNumWays() const { return associativity; }
+ UINT32 getNumSets() const { return set_index_mask + 1; }
+
+ // stats
+ CacheStats getHits(AccessType access_type) const { return access[access_type][true]; }
+ CacheStats getMisses(AccessType access_type) const { return access[access_type][false]; }
+ CacheStats getAccesses(AccessType access_type) const
+ { return getHits(access_type) + getMisses(access_type); }
+ CacheStats getHits() const { return sumAccess(true); }
+ CacheStats getMisses() const { return sumAccess(false); }
+ CacheStats getAccesses() const { return getHits() + getMisses(); }
+
+ // utilities
+ VOID splitAddress(const ADDRINT addr, CacheTag& tag, UINT32& set_index) const
+ {
+ tag = addr >> line_shift;
+ set_index = tag & set_index_mask;
+ }
+
+ VOID splitAddress(const ADDRINT addr, CacheTag& tag, UINT32& set_index,
+ UINT32& line_index) const
+ {
+ const UINT32 line_mask = line_size - 1;
+ line_index = addr & line_mask;
+ splitAddress(addr, tag, set_index);
+ }
+
+ string statsLong(string prefix = "",
+ CacheType cache_type = k_CACHE_TYPE_DCACHE) const;
+};
+
+
+
+// Templated cache class with specific cache set allocation policies
+// All that remains to be done here is allocate and deallocate the right
+// type of cache sets.
+
+template <class SET_t, UINT32 k_MAX_SETS, UINT32 k_MAX_SEARCH, UINT32 k_STORE_ALLOCATION>
+class Cache : public CacheBase
+{
+ private:
+ SET_t sets[k_MAX_SETS];
+ UINT64 accesses[k_MAX_SETS];
+ UINT64 misses[k_MAX_SETS];
+ UINT64 total_accesses[k_MAX_SETS];
+ UINT64 total_misses[k_MAX_SETS];
+ UINT32 set_ptrs[k_MAX_SETS+1];
+ UINT32 max_search;
+
+ public:
+ VOID resetCounters()
+ {
+ for(UINT32 i = 0; i < getNumSets(); i++) {
+ accesses[i] = misses[i] = 0;
+ }
+ }
+
+ UINT32 getSearchDepth() const { return max_search; }
+ UINT32 getSetPtr(UINT32 set_index)
+ {
+ ASSERTX( set_index < getNumSets() );
+ return set_ptrs[set_index];
+ }
+ void setSetPtr(UINT32 set_index, UINT32 value)
+ {
+ ASSERTX( set_index < k_MAX_SETS );
+ ASSERTX( (value < getNumSets()) || (value == k_MAX_SETS) );
+ set_ptrs[set_index] = value;
+ }
+
+ // constructors/destructors
+ Cache(std::string name, UINT32 size, UINT32 line_bytes,
+ UINT32 assoc, UINT32 max_search_depth) :
+ CacheBase(name, size, line_bytes, assoc)
+ {
+ ASSERTX(getNumSets() <= k_MAX_SETS);
+ ASSERTX(max_search_depth < k_MAX_SEARCH);
+
+ max_search = max_search_depth;
+
+ //initialization for cache hashing
+ srand( time(NULL) );
+
+ for (UINT32 i = 0; i < getNumSets(); i++)
+ {
+ total_accesses[i] = total_misses[i] = 0;
+ sets[i].setAssociativity(assoc);
+ set_ptrs[i] = k_MAX_SETS;
+ }
+ resetCounters();
+ }
+
+
+ //JME: added for dynamically resizing a cache
+ VOID resize(UINT32 assoc)
+ {
+ // new configuration written out overly explicitly; basically nothing
+ // but the cache size changes
+ // _newNumSets = getNumSets();
+ // _newLineSize = line_size;
+ // _newLineShift = line_shift;
+ // _newSetIndexMask = set_index_mask;
+
+ cache_size = getNumSets() * assoc * line_size;
+ associativity = assoc;
+
+ // since the number of sets stays the same, no lines need to be relocated
+ // internally; instead space for blocks within each set needs to be added
+ // or removed (possibly causing evictions in the real world)
+
+ for (UINT32 i = 0; i < getNumSets(); i++)
+ {
+ sets[i].modifyAssociativity(assoc);
+ }
+ }
+
+
+ // functions for accessing the cache
+
+ // Multi-line cache access from addr to addr+size-1
+ bool accessMultiLine(ADDRINT addr, UINT32 size, AccessType access_type)
+ {
+
+ const ADDRINT high_addr = addr + size;
+ bool all_hit = true;
+
+ const ADDRINT line_bytes = getLineSize();
+ const ADDRINT not_line_mask = ~(line_bytes - 1);
+
+ UINT32 history[k_MAX_SEARCH];
+
+ do
+ {
+ CacheTag tag;
+ UINT32 set_index;
+
+ splitAddress(addr, tag, set_index);
+
+ UINT32 index = set_index;
+ UINT32 depth = 0;
+ bool local_hit;
+
+ do
+ {
+ //if ( depth > 0)
+ //cout << "index = " << index << endl;
+ history[depth] = index;
+ SET_t &set = sets[index];
+ local_hit = set.find(tag);
+ index = set_ptrs[index];
+ } while ( !local_hit && ((++depth) < max_search) && (index < k_MAX_SETS));
+
+ all_hit &= local_hit;
+
+ // on miss, loads always allocate, stores optionally
+ if ( (! local_hit) && ((access_type == k_ACCESS_TYPE_LOAD) ||
+ (k_STORE_ALLOCATION == CACHE_ALLOC::k_STORE_ALLOCATE)) )
+ {
+ UINT32 r_num = rand() % depth;
+ UINT32 which = history[r_num];
+ sets[which].replace(tag);
+ //if ( depth > 1 )
+ //cout << "which = " << which << endl;
+ }
+
+ // start of next cache line
+ addr = (addr & not_line_mask) + line_bytes;
+ }
+ while (addr < high_addr);
+
+ access[access_type][all_hit]++;
+
+ return all_hit;
+ }
+
+ // Single line cache access at addr
+ bool accessSingleLine(ADDRINT addr, AccessType access_type)
+ {
+ UINT32 history[k_MAX_SEARCH];
+
+ CacheTag tag;
+ UINT32 set_index;
+
+ splitAddress(addr, tag, set_index);
+
+ UINT32 index = set_index;
+ UINT32 depth = 0;
+ bool hit;
+
+ do
+ {
+ //cout << "index = " << index << endl;
+ history[depth] = index;
+ SET_t &set = sets[index];
+ //set.print();
+ hit = set.find(tag);
+ index = set_ptrs[index];
+ } while( !hit && ((++depth) < max_search ) && (index < k_MAX_SETS));
+
+ // on miss, loads always allocate, stores optionally
+ if ( (! hit) && ((access_type == k_ACCESS_TYPE_LOAD) ||
+ (k_STORE_ALLOCATION == CACHE_ALLOC::k_STORE_ALLOCATE)) )
+ {
+ UINT32 r_num = rand() % depth;
+ UINT32 which = history[r_num];
+ sets[which].replace(tag);
+ if ( depth > 1 )
+ cout << "which = " << dec << which << endl;
+ }
+
+ access[access_type][hit]++;
+
+ return hit;
+ }
+
+};
+
+#endif
View
98 common/core/core.cc
@@ -0,0 +1,98 @@
+#include "core.h"
+
+using namespace std;
+
+int Core::coreInit(Chip *chip, int tid, int num_mod)
+{
+ the_chip = chip;
+ core_tid = tid;
+ core_num_mod = num_mod;
+
+ network = new Network;
+ network->netInit(chip, tid, num_mod);
+
+ if ( g_knob_enable_performance_modeling )
+ {
+ perf_model = new PerfModel("performance modeler");
+ cout << "Core[" << tid << "]: instantiated performance model" << endl;
+ } else
+ {
+ perf_model = (PerfModel *) NULL;
+ }
+
+ if ( g_knob_enable_dcache_modeling || g_knob_enable_icache_modeling )
+ {
+ ocache = new OCache("organic cache",
+ g_knob_cache_size.Value() * k_KILO,
+ g_knob_line_size.Value(),
+ g_knob_associativity.Value(),
+ g_knob_mutation_interval.Value(),
+ g_knob_dcache_threshold_hit.Value(),
+ g_knob_dcache_threshold_miss.Value(),
+ g_knob_dcache_size.Value() * k_KILO,
+ g_knob_dcache_associativity.Value(),
+ g_knob_dcache_max_search_depth.Value(),
+ g_knob_icache_threshold_hit.Value(),
+ g_knob_icache_threshold_miss.Value(),
+ g_knob_icache_size.Value() * k_KILO,
+ g_knob_icache_associativity.Value(),
+ g_knob_icache_max_search_depth.Value());
+
+ cout << "Core[" << tid << "]: instantiated organic cache model" << endl;
+ cout << ocache->statsLong() << endl;
+ } else
+ {
+ ocache = (OCache *) NULL;
+ }
+
+ return 0;
+}
+
+int Core::coreSendW(int sender, int receiver, char *buffer, int size)
+{
+ // Create a net packet
+ NetPacket packet;
+ packet.sender= sender;
+ packet.receiver= receiver;
+ packet.type = USER;
+ packet.length = size;
+ packet.data = new char[size];
+ for(int i = 0; i < size; i++)
+ packet.data[i] = buffer[i];
+
+ network->netSend(packet);
+ return 0;
+}
+
+int Core::coreRecvW(int sender, int receiver, char *buffer, int size)
+{
+ NetPacket packet;
+ NetMatch match;
+
+ match.sender = sender;
+ match.sender_flag = true;
+ match.type = USER;
+ match.type_flag = true;
+
+ packet = network->netRecv(match);
+
+ if((unsigned)size != packet.length){
+ cout << "ERROR:" << endl
+ << "Received packet length is not as expected" << endl;
+ exit(-1);
+ }
+
+ for(int i = 0; i < size; i++)
+ buffer[i] = packet.data[i];
+
+ return 0;
+}
+
+VOID Core::fini(int code, VOID *v, ofstream& out)
+{
+ if ( g_knob_enable_performance_modeling )
+ perf_model->fini(code, v, out);
+
+ if ( g_knob_enable_dcache_modeling || g_knob_enable_icache_modeling )
+ ocache->fini(code,v,out);
+}
View
105 common/core/core.h
@@ -0,0 +1,105 @@
+// Harshad Kasture
+//
+
+#ifndef CORE_H
+#define CORE_H
+
+#include <iostream>
+#include <fstream>
+#include <string.h>
+
+// JME: not entirely sure why this is needed...
+class Network;
+
+#include "pin.H"
+#include "chip.h"
+#include "network.h"
+#include "perfmdl.h"
+#include "ocache.h"
+
+
+// externally defined vars
+
+extern LEVEL_BASE::KNOB<bool> g_knob_enable_performance_modeling;
+extern LEVEL_BASE::KNOB<bool> g_knob_enable_dcache_modeling;
+extern LEVEL_BASE::KNOB<bool> g_knob_enable_icache_modeling;
+
+extern LEVEL_BASE::KNOB<UINT32> g_knob_cache_size;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_line_size;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_associativity;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_mutation_interval;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_dcache_threshold_hit;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_dcache_threshold_miss;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_dcache_size;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_dcache_associativity;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_dcache_max_search_depth;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_icache_threshold_hit;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_icache_threshold_miss;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_icache_size;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_icache_associativity;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_icache_max_search_depth;
+
+
+class Core
+{
+ private:
+ Chip *the_chip;
+ int core_tid;
+ int core_num_mod;
+ Network *network;
+ PerfModel *perf_model;
+ OCache *ocache;
+
+ public:
+
+ int coreInit(Chip *chip, int tid, int num_mod);
+
+ int coreSendW(int sender, int receiver, char *buffer, int size);
+
+ int coreRecvW(int sender, int receiver, char *buffer, int size);
+
+ VOID fini(int code, VOID *v, ofstream& out);
+
+
+ //performance model wrappers
+
+ inline VOID perfModelRun(PerfModelIntervalStat *interval_stats)
+ { perf_model->run(interval_stats); }
+
+ inline VOID perfModelRun(PerfModelIntervalStat *interval_stats, REG *reads,
+ UINT32 num_reads)
+ { perf_model->run(interval_stats, reads, num_reads); }
+
+ inline VOID perfModelRun(PerfModelIntervalStat *interval_stats, bool dcache_load_hit,
+ REG *writes, UINT32 num_writes)
+ { perf_model->run(interval_stats, dcache_load_hit, writes, num_writes); }
+
+ inline PerfModelIntervalStat* perfModelAnalyzeInterval(const string& parent_routine,
+ const INS& start_ins,
+ const INS& end_ins)
+ { return perf_model->analyzeInterval(parent_routine, start_ins, end_ins); }
+
+ inline VOID perfModelLogICacheLoadAccess(PerfModelIntervalStat *stats, bool hit)
+ { perf_model->logICacheLoadAccess(stats, hit); }
+
+ inline VOID perfModelLogDCacheStoreAccess(PerfModelIntervalStat *stats, bool hit)
+ { perf_model->logDCacheStoreAccess(stats, hit); }
+
+ inline VOID perfModelLogBranchPrediction(PerfModelIntervalStat *stats, bool correct)
+ { perf_model->logBranchPrediction(stats, correct); }
+
+
+ // organic cache wrappers
+
+ inline bool icacheRunLoadModel(ADDRINT i_addr, UINT32 size)
+ { return ocache->runICacheLoadModel(i_addr, size); }
+
+ inline bool dcacheRunLoadModel(ADDRINT d_addr, UINT32 size)
+ { return ocache->runDCacheLoadModel(d_addr, size); }
+
+ inline bool dcacheRunStoreModel(ADDRINT d_addr, UINT32 size)
+ { return ocache->runDCacheStoreModel(d_addr, size); }
+
+};
+
+#endif
View
60 common/core/license.info
@@ -0,0 +1,60 @@
+Below is the license info that was originally included in the code for
+the portable cache simulator available on Rodric Rabbah's webpage. This
+refers to the code in cache.h
+
+http://web.mit.edu/rabbah/www/download/cache-pintools/
+
+The portable organic cache simulator extends that functionality. The
+rest of the simulation code is totally unrelated.
+
+
+
+/*BEGIN_LEGAL
+Intel Open Source License
+
+Copyright (c) 2002-2005 Intel Corporation
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer. Redistributions
+in binary form must reproduce the above copyright notice, this list of
+conditions and the following disclaimer in the documentation and/or
+other materials provided with the distribution. Neither the name of
+the Intel Corporation nor the names of its contributors may be used to
+endorse or promote products derived from this software without
+specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE INTEL OR
+ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+END_LEGAL */
+//
+// @ORIGINAL_AUTHOR: Artur Klauser
+//
+
+/*! @file
+ * This file contains an ISA-portable cache simulator
+ * data cache hierarchies
+ */
+
+// RMR (rodric@gmail.com) {
+// - added command line switches -c -b and -a to
+// configure cache size, block size, and associativity
+// - added command line switch -z to ignore size of all
+// references
+// - added command line switch -ns to ignore stores
+// - renamed some knobs since the previously used "-h"
+// switch conflicts with pin's -h switch for help
+// - print out cache configuration in addition to statistics
+// } RMR
View
495 common/core/ocache.cc
@@ -0,0 +1,495 @@
+#include "ocache.h"
+
+
+/* =================================================== */
+/* OCache method definitions */
+/* =================================================== */
+
+
+// cache evolution related
+
+VOID OCache::evolveNaive()
+{
+ // gives more associativity (and thus cachesize) to the cache with more misses
+ if ( dcache_misses > (icache_misses * 1) )
+ {
+ //cout << dec << "dcacheMisses = " << dcache_misses << " icacheMisses = "
+ // << icache_misses << endl;
+ if ( il1->getNumWays() >= 2 )
+ {
+ il1->resize(il1->getNumWays() - 1);
+ dl1->resize(dl1->getNumWays() + 1);
+ //assume we don't have self-modifying code so no need to flush
+ }
+ }
+ else
+ {
+ if ( icache_misses > (dcache_misses * 1) )
+ {
+ //cout << dec << "dcacheMisses = " << dcache_misses << " icacheMisses = "
+ // << icache_misses << endl;
+ if ( dl1->getNumWays() >= 2 )
+ {
+ dl1->resize(dl1->getNumWays() - 1);
+ il1->resize(il1->getNumWays() + 1);
+ total_resize_evictions += 1 * dl1->getNumSets();
+ }
+ }
+ }
+}
+
+VOID OCache::evolveDataIntensive()
+{
+ //shrink icache so long as shrinking saves more dcache misses than icache misses it adds
+ if ( (last_dcache_misses == 0 && last_icache_misses == 0) )
+ {
+ // initial action
+ if ( il1->getNumWays() >= 2 ) {
+ il1->resize(il1->getNumWays() - 1);
+ dl1->resize(dl1->getNumWays() + 1);
+ }
+ }
+ else
+ {
+ if ( (dcache_misses + icache_misses) < (last_dcache_misses + last_icache_misses) )
+ {
+ //we got improvement
+ if ( il1->getNumWays() >= 2 ) {
+ il1->resize(il1->getNumWays() - 1);
+ dl1->resize(dl1->getNumWays() + 1);
+ }
+ }
+ }
+
+ last_dcache_misses = dcache_misses;
+ last_icache_misses = icache_misses;
+}
+
+VOID OCache::mutationRuntime()
+{
+ //if ( mutation_interval && ((icache_accesses + dcacheAccesses) >= mutation_interval) )
+ //cout << dec << icache_misses << " " << dcache_misses << " " << endl;
+
+ if ( mutation_interval && ((icache_misses + dcache_misses) >= mutation_interval) )
+ {
+ cout << "Mutation Interval Elapsed" << endl
+ << "config before mutation:" << endl
+ << statsLong() << endl;
+ evolveNaive();
+ //evolveDataIntensive();
+
+ resetIntervalCounters();
+ }
+ else
+ {
+ if ( mutation_interval==0 && ((icache_misses + dcache_misses) >= 1000) ) {
+ cout << "Mutation Interval Elapsed" << endl
+ << "config before mutation:" << endl
+ << statsLong() << endl;
+ resetIntervalCounters();
+ }
+ }
+}
+
+
+// cache access related
+
+bool OCache::dCacheLoadMulti(ADDRINT addr, UINT32 size, UINT32 inst_id)
+{
+ // first level D-cache
+ const BOOL dl1_hit = dl1->accessMultiLine(addr, size, CacheBase::k_ACCESS_TYPE_LOAD);
+ const Counter counter = dl1_hit ? k_COUNTER_HIT : k_COUNTER_MISS;
+
+ dcache_profile[inst_id][counter]++;
+ dcacheAccesses++;
+ dcache_total_accesses++;
+
+ if ( !dl1_hit )
+ {
+ dcache_misses++;
+ dcache_total_misses++;
+ }
+ mutationRuntime();
+
+ return dl1_hit;
+}
+
+bool OCache::dCacheLoadMultiFast(ADDRINT addr, UINT32 size)
+{
+ const BOOL dl1_hit = dl1->accessMultiLine(addr, size, CacheBase::k_ACCESS_TYPE_LOAD);
+
+ dcacheAccesses++;
+ dcache_total_accesses++;
+
+ if ( !dl1_hit )
+ {
+ dcache_misses++;
+ dcache_total_misses++;
+ }
+ mutationRuntime();
+
+ return dl1_hit;
+}
+
+bool OCache::dCacheLoadSingle(ADDRINT addr, UINT32 inst_id)
+{
+ // @todo we may access several cache lines for
+ // first level D-cache
+ const BOOL dl1_hit = dl1->accessSingleLine(addr, CacheBase::k_ACCESS_TYPE_LOAD);
+ const Counter counter = dl1_hit ? k_COUNTER_HIT : k_COUNTER_MISS;
+
+ dcache_profile[inst_id][counter]++;
+ dcacheAccesses++;
+ dcache_total_accesses++;
+
+ if ( !dl1_hit )
+ {
+ dcache_misses++;
+ dcache_total_misses++;
+ }
+ mutationRuntime();
+
+ return dl1_hit;
+}
+
+bool OCache::dCacheLoadSingleFast(ADDRINT addr)
+{
+ const BOOL dl1_hit = dl1->accessSingleLine(addr, CacheBase::k_ACCESS_TYPE_LOAD);
+
+ dcacheAccesses++;
+ dcache_total_accesses++;
+
+ if ( !dl1_hit )
+ {
+ dcache_misses++;
+ dcache_total_misses++;
+ }
+ mutationRuntime();
+
+ return dl1_hit;
+}
+
+bool OCache::dCacheStoreMulti(ADDRINT addr, UINT32 size, UINT32 inst_id)
+{
+ // first level D-cache; we only model stores to dcache
+ const BOOL dl1_hit = dl1->accessMultiLine(addr, size, CacheBase::k_ACCESS_TYPE_STORE);
+ const Counter counter = dl1_hit ? k_COUNTER_HIT : k_COUNTER_MISS;
+
+ dcache_profile[inst_id][counter]++;
+ dcacheAccesses++;
+ dcache_total_accesses++;
+
+ if ( !dl1_hit )
+ {
+ dcache_misses++;
+ dcache_total_misses++;
+ }
+ mutationRuntime();
+
+ return dl1_hit;
+}
+
+bool OCache::dCacheStoreMultiFast(ADDRINT addr, UINT32 size)
+{
+ // we only model stores for dcache
+ const BOOL dl1_hit = dl1->accessMultiLine(addr, size, CacheBase::k_ACCESS_TYPE_STORE);
+
+ dcacheAccesses++;
+ dcache_total_accesses++;
+
+ if ( !dl1_hit )
+ {
+ dcache_misses++;
+ dcache_total_misses++;
+ }
+ mutationRuntime();
+
+ return dl1_hit;
+}
+
+bool OCache::dCacheStoreSingle(ADDRINT addr, UINT32 inst_id)
+{
+ // @todo we may access several cache lines for
+ // first level D-cache; we only model stores to dcache
+ const BOOL dl1_hit = dl1->accessSingleLine(addr, CacheBase::k_ACCESS_TYPE_STORE);
+ const Counter counter = dl1_hit ? k_COUNTER_HIT : k_COUNTER_MISS;
+
+ dcache_profile[inst_id][counter]++;
+ dcacheAccesses++;
+ dcache_total_accesses++;
+
+ if ( !dl1_hit )
+ {
+ dcache_misses++;
+ dcache_total_misses++;
+ }
+ mutationRuntime();
+
+ return dl1_hit;
+}
+
+bool OCache::dCacheStoreSingleFast(ADDRINT addr)
+{
+ // we only model stores for dcache
+ const BOOL dl1_hit = dl1->accessSingleLine(addr, CacheBase::k_ACCESS_TYPE_STORE);
+
+ dcacheAccesses++;
+ dcache_total_accesses++;
+
+ if ( !dl1_hit )
+ {
+ dcache_misses++;
+ dcache_total_misses++;
+ }
+ mutationRuntime();
+
+ return dl1_hit;
+}
+
+bool OCache::iCacheLoadMulti(ADDRINT addr, UINT32 size, UINT32 inst_id)
+{
+ // first level I-cache
+ const BOOL il1_hit = il1->accessMultiLine(addr, size, CacheBase::k_ACCESS_TYPE_LOAD);
+ const Counter counter = il1_hit ? k_COUNTER_HIT : k_COUNTER_MISS;
+
+ icache_profile[inst_id][counter]++;
+ icache_accesses++;
+ icache_total_accesses++;
+
+ if ( !il1_hit )
+ {
+ icache_misses++;
+ icache_total_misses++;
+ }
+ mutationRuntime();
+
+ return il1_hit;
+}
+
+bool OCache::iCacheLoadMultiFast(ADDRINT addr, UINT32 size)
+{
+ const BOOL il1_hit = il1->accessMultiLine(addr, size, CacheBase::k_ACCESS_TYPE_LOAD);
+
+ icache_accesses++;
+ icache_total_accesses++;
+
+ if ( !il1_hit )
+ {
+ icache_misses++;
+ icache_total_misses++;
+ }
+ mutationRuntime();
+
+ return il1_hit;
+}
+
+bool OCache::iCacheLoadSingle(ADDRINT addr, UINT32 inst_id)
+{
+ // @todo we may access several cache lines for
+ // first level I-cache
+ const BOOL il1_hit = il1->accessSingleLine(addr, CacheBase::k_ACCESS_TYPE_LOAD);
+ const Counter counter = il1_hit ? k_COUNTER_HIT : k_COUNTER_MISS;
+
+ icache_profile[inst_id][counter]++;
+ icache_accesses++;
+ icache_total_accesses++;
+
+ if ( !il1_hit )
+ {
+ icache_misses++;
+ icache_total_misses++;
+ }
+ mutationRuntime();
+
+ return il1_hit;
+}
+
+bool OCache::iCacheLoadSingleFast(ADDRINT addr)
+{
+ const BOOL il1_hit = il1->accessSingleLine(addr, CacheBase::k_ACCESS_TYPE_LOAD);
+
+ icache_accesses++;
+ icache_total_accesses++;
+
+ if ( !il1_hit )
+ {
+ icache_misses++;
+ icache_total_misses++;
+ }
+ mutationRuntime();
+
+ return il1_hit;
+}
+
+bool OCache::runICacheLoadModel(ADDRINT i_addr, UINT32 size)
+{
+ const BOOL single = (size <= 4) | g_knob_icache_ignore_size;
+
+ if (single) {
+ return iCacheLoadSingleFast(i_addr);
+ }
+ else
+ {
+ return iCacheLoadMultiFast(i_addr, size);
+ }
+}
+
+bool OCache::runDCacheLoadModel(ADDRINT d_addr, UINT32 size)
+{
+ const BOOL single = (size <= 4) | g_knob_dcache_ignore_size;
+
+ if( single )
+ {
+ return dCacheLoadSingleFast(d_addr);
+ }
+ else
+ {
+ return dCacheLoadMultiFast(d_addr, size);
+ }
+}
+
+bool OCache::runDCacheStoreModel(ADDRINT d_addr, UINT32 size)
+{
+ const BOOL single = (size <= 4) | g_knob_dcache_ignore_size;
+
+ if( single )
+ {
+ return dCacheStoreSingleFast(d_addr);
+ }
+ else
+ {
+ return dCacheStoreMultiFast(d_addr,size);
+ }
+}
+
+
+// constructor
+
+OCache::OCache(std::string name, UINT32 size, UINT32 line_bytes, UINT32 assoc, UINT32 mutate_interval,
+ UINT32 dcache_threshold_hit_value, UINT32 dcache_threshold_miss_value, UINT32 dcache_size,
+ UINT32 dcache_associativity, UINT32 dcache_max_search_depth, UINT32 icache_threshold_hit_value,
+ UINT32 icache_threshold_miss_value, UINT32 icache_size, UINT32 icache_associativity,
+ UINT32 icache_max_search_depth):
+ dl1(new RRSACache(name + "_dl1", dcache_size, line_bytes, dcache_associativity, dcache_max_search_depth)),
+ il1(new RRSACache(name + "_il1", icache_size, line_bytes, icache_associativity, icache_max_search_depth)),
+ cache_size(size), line_size(line_bytes), associativity(assoc),
+ mutation_interval(mutate_interval),
+ dcache_total_accesses(0), dcache_total_misses(0),
+ icache_total_accesses(0), icache_total_misses(0),
+ total_resize_evictions(0),
+ last_dcache_misses(0), last_icache_misses(0), name(name)
+{
+ ASSERTX( (size & 1) == 0 );
+ ASSERTX( (assoc & 1) == 0 );
+ ASSERTX( (dcache_size + icache_size) == size );
+ ASSERTX( (dcache_associativity + icache_associativity) == assoc );
+
+ resetIntervalCounters();
+
+ dcache_profile.SetKeyName("d_addr ");
+ dcache_profile.SetCounterName("dcache:miss dcache:hit");
+
+ icache_profile.SetKeyName("i_addr ");
+ icache_profile.SetCounterName("icache:miss icache:hit");
+
+ CounterArray dcache_threshold;
+ dcache_threshold[k_COUNTER_HIT] = dcache_threshold_hit_value;
+ dcache_threshold[k_COUNTER_MISS] = dcache_threshold_miss_value;
+ dcache_profile.SetThreshold( dcache_threshold );
+
+ CounterArray icache_threshold;
+ icache_threshold[k_COUNTER_HIT] = icache_threshold_hit_value;
+ icache_threshold[k_COUNTER_MISS] = icache_threshold_miss_value;
+ icache_profile.SetThreshold( icache_threshold );
+}
+
+
+// miscellaneous
+
+string OCache::statsLong()
+{
+ ostringstream out;
+ out << dec
+ << name << ":" << endl
+ << " cacheSize = " << cache_size << endl
+ << " associativity = " << associativity << endl
+ << " lineSize = " << line_size << endl
+ << " mutationInterval = " << mutation_interval << endl
+ << " totalResizeEvictions = " << total_resize_evictions << endl
+ << endl
+ << " dcache:" << endl
+ << " cacheSize = " << dl1->getCacheSize() << endl
+ << " associativity = " << dl1->getNumWays() << endl
+ << " searchDepth = " << dl1->getSearchDepth() << endl
+ << " lineSize = " << dl1->getLineSize() << endl
+ << " dcacheTotalMisses = " << dcache_total_misses << endl
+ << " dcacheIntervalMisses = " << dcache_misses << endl
+ << " dcacheTotalAccesses = " << dcache_total_accesses << endl
+ << " dcacheIntervalAccesses = " << dcacheAccesses << endl
+ << endl
+ << " icache:" << endl
+ << " cacheSize = " << il1->getCacheSize() << endl
+ << " associativity = " << il1->getNumWays() << endl
+ << " searchDepth = " << il1->getSearchDepth() << endl
+ << " lineSize = " << il1->getLineSize() << endl
+ << " icacheTotalMisses = " << icache_total_misses << endl
+ << " icacheIntervalMisses = " << icache_misses << endl
+ << " icacheTotalAccesses = " << icache_total_accesses << endl
+ << " icacheIntervalAccesses = " << icache_accesses << endl;
+
+ return out.str();
+}
+
+void OCache::fini(int code, VOID *v, ofstream& out)
+{
+
+ // print D-cache profile
+ // @todo what does this print
+ // out << "PIN:MEMLATENCIES 1.0. 0x0" << endl;
+
+ out << "# DCACHE configuration ["
+ << "c = " << dCacheSize() / 1024 << "KB, "
+ << "b = " << dCacheLineSize() << "B, "
+ << "a = " << dCacheAssociativity() << "]" << endl;
+
+ out << "#" << endl
+ << "# DCACHE stats" << endl
+ << "#" << endl;
+
+ out << dCacheStatsLong("# ", CacheBase::k_CACHE_TYPE_DCACHE);
+
+ if( g_knob_dcache_track_loads || g_knob_dcache_track_stores ) {
+ out << "#" << endl
+ << "# LOAD stats" << endl
+ << "#" << endl;
+
+ out << dcache_profile.StringLong();
+ }
+
+ out << endl << endl;
+
+ // print I-cache profile
+ // @todo what does this print
+ // out << "PIN:MEMLATENCIES 1.0. 0x0" << endl;
+
+ out << "# ICACHE configuration ["
+ << "c = " << iCacheSize() / 1024 << "KB, "
+ << "b = " << iCacheLineSize() << "B, "
+ << "a = " << iCacheAssociativity() << "]" << endl;
+
+ out << "#" << endl
+ << "# ICACHE stats" << endl
+ << "#" << endl;
+
+ out << iCacheStatsLong("# ", CacheBase::k_CACHE_TYPE_ICACHE);
+
+ if (g_knob_icache_track_insts) {
+ out << "#" << endl
+ << "# INST stats" << endl
+ << "#" << endl;
+
+ out << icache_profile.StringLong();
+ }
+
+}
+
View
193 common/core/ocache.h
@@ -0,0 +1,193 @@
+// Jonathan Eastep (eastep@mit.edu)
+// 04.09.08
+//
+// This file implements a dynamically adaptive cache. The Organic Cache
+// (OCache) consists of an instruction L1 and data L1 which share a pool
+// of cache banks. Each bank implements a "way" in terms of associativity,
+// so there are n ways split arbitrarily between the icache and dcache.
+// Note: the number of physical sets in the OCache is fixed, so it adjusts
+// simultaneously the cache sizes when it reapportions the banks.
+// The OCache monitors cache access statistics to automatically manage
+// bank reapportioning.
+
+
+#ifndef OCACHE_H
+#define OCACHE_H
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+#include "pin.H"
+#include "cache.h"
+#include "pin_profile.H"
+
+
+/* ===================================================================== */
+/* Externally defined variables */
+/* ===================================================================== */
+
+extern LEVEL_BASE::KNOB<bool> g_knob_icache_ignore_size;
+extern LEVEL_BASE::KNOB<bool> g_knob_dcache_ignore_size;
+extern LEVEL_BASE::KNOB<bool> g_knob_dcache_track_loads;
+extern LEVEL_BASE::KNOB<bool> g_knob_dcache_track_stores;
+extern LEVEL_BASE::KNOB<bool> g_knob_icache_track_insts;
+extern LEVEL_BASE::KNOB<bool> g_knob_enable_dcache_modeling;
+extern LEVEL_BASE::KNOB<bool> g_knob_enable_icache_modeling;
+
+extern LEVEL_BASE::KNOB<UINT32> g_knob_cache_size;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_line_size;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_associativity;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_mutation_interval;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_dcache_threshold_hit;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_dcache_threshold_miss;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_dcache_size;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_dcache_associativity;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_dcache_max_search_depth;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_icache_threshold_hit;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_icache_threshold_miss;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_icache_size;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_icache_associativity;
+extern LEVEL_BASE::KNOB<UINT32> g_knob_icache_max_search_depth;
+
+/* ===================================================================== */
+/* Organic Cache Class */
+/* ===================================================================== */
+
+
+
+// organic cache
+
+class OCache
+{
+ private:
+ typedef Cache<CACHE_SET::RoundRobin<16>, k_MEGA, 64, CACHE_ALLOC::k_STORE_ALLOCATE>
+ RRSACache;
+
+ public:
+ typedef enum
+ {
+ k_COUNTER_MISS = 0,
+ k_COUNTER_HIT = 1,
+ k_COUNTER_NUM
+ } Counter;
+
+ typedef COUNTER_ARRAY<UINT64, k_COUNTER_NUM> CounterArray;
+
+ // holds the counters with misses and hits
+ // conceptually this is an array indexed by instruction address
+ COMPRESSOR_COUNTER<ADDRINT, UINT32, CounterArray> dcache_profile;
+ COMPRESSOR_COUNTER<ADDRINT, UINT32, CounterArray> icache_profile;
+
+ private:
+ RRSACache *dl1;
+ RRSACache *il1;
+
+ UINT32 cache_size;
+ UINT32 line_size;
+ UINT32 associativity;
+
+ UINT32 mutation_interval;
+ UINT64 dcache_total_accesses;
+ UINT32 dcacheAccesses;
+ UINT64 dcache_total_misses;
+ UINT32 dcache_misses;
+ UINT64 icache_total_accesses;
+ UINT32 icache_accesses;
+ UINT64 icache_total_misses;
+ UINT32 icache_misses;
+ UINT64 total_resize_evictions;
+
+ UINT32 last_dcache_misses;
+ UINT32 last_icache_misses;
+
+ string name;
+
+ private:
+
+ inline VOID resetIntervalCounters()
+ {
+ dcache_misses = 0;
+ dcacheAccesses = 0;
+ icache_misses = 0;
+ icache_accesses = 0;
+ }
+
+ // These functions define the concurrent evolution heuristic of the dcache and icache
+ VOID evolveNaive();
+ VOID evolveDataIntensive();
+
+ // This function increments an interval counter (triggered by some cache event)
+ // When the counter reaches a threshold, it fires an evolution function
+ VOID mutationRuntime();
+
+ // These functions access the dcache and icache
+ // You can access either a single line or multi lines
+ // Fast accesses skip adding the access to the profiler
+ bool dCacheLoadMulti(ADDRINT addr, UINT32 size, UINT32 inst_id);
+ bool dCacheLoadMultiFast(ADDRINT addr, UINT32 size);
+ bool dCacheLoadSingle(ADDRINT addr, UINT32 inst_id);
+ bool dCacheLoadSingleFast(ADDRINT addr);
+ bool dCacheStoreMulti(ADDRINT addr, UINT32 size, UINT32 inst_id);
+ bool dCacheStoreMultiFast(ADDRINT addr, UINT32 size);
+ bool dCacheStoreSingle(ADDRINT addr, UINT32 inst_id);
+ bool dCacheStoreSingleFast(ADDRINT addr);
+ bool iCacheLoadMulti(ADDRINT addr, UINT32 size, UINT32 inst_id);
+ bool iCacheLoadMultiFast(ADDRINT addr, UINT32 size);
+ bool iCacheLoadSingle(ADDRINT addr, UINT32 inst_id);
+ bool iCacheLoadSingleFast(ADDRINT addr);
+
+ public:
+
+ // These are just wrappers around the Cache class equivalents for the OCache dcache field
+ UINT32 dCacheSize() { return dl1->getCacheSize(); }
+ UINT32 dCacheLineSize() { return dl1->getLineSize(); }
+ UINT32 dCacheAssociativity() { return dl1->getNumWays(); }
+ UINT32 dCacheGetSetPtr(UINT32 set_index) { return dl1->getSetPtr(set_index); }
+ VOID dCacheSetSetPtr(UINT32 set_index, UINT32 value) { dl1->setSetPtr(set_index, value); }
+ string dCacheStatsLong(string prefix, CacheBase::CacheType type)
+ { return dl1->statsLong(prefix,type); }
+
+ // These are just wrappers around the Cache class equivalents for the OCache icache field
+ UINT32 iCacheSize() { return il1->getCacheSize(); }
+ UINT32 iCacheLineSize() { return il1->getLineSize(); }
+ UINT32 iCacheAssociativity() { return il1->getNumWays(); }
+ UINT32 iCacheGetSetPtr(UINT32 set_index) { return il1->getSetPtr(set_index); }
+ VOID iCacheSetSetPtr(UINT32 set_index, UINT32 value) { il1->setSetPtr(set_index, value); }
+ string iCacheStatsLong(string prefix, CacheBase::CacheType type) { return il1->statsLong(prefix,type); }
+
+ string statsLong();
+
+ // Constructor
+ OCache(std::string name, UINT32 size, UINT32 line_bytes, UINT32 assoc, UINT32 mutate_interval,
+ UINT32 dcache_threshold_hit_value, UINT32 dcache_threshold_miss_value, UINT32 dcache_size,
+ UINT32 dcache_associativity, UINT32 dcache_max_search_depth, UINT32 icache_threshold_hit_value,
+ UINT32 icache_threshold_miss_value, UINT32 icache_size, UINT32 icache_associativity,
+ UINT32 icache_max_search_depth);
+
+ // These functions provide the public interface to accessing the caches
+ bool runICacheLoadModel(ADDRINT i_addr, UINT32 size);
+ bool runDCacheLoadModel(ADDRINT d_addr, UINT32 size);
+ bool runDCacheStoreModel(ADDRINT d_addr, UINT32 size);
+
+ // This function is called at the end of simulation
+ void fini(int code, VOID *v, ofstream& out);
+
+};
+
+
+
+/* ===================================================================== */
+/* Global interface and wrappers: definitions */
+/* ===================================================================== */
+
+
+bool runICacheLoadModel(ADDRINT i_addr, UINT32 size);
+bool runDCacheLoadModel(ADDRINT d_addr, UINT32 size);
+bool runDCacheStoreModel(ADDRINT d_addr, UINT32 size);
+
+VOID oCacheModelInit();
+VOID oCacheModelFini(int code, VOID *v, ofstream& out);
+
+
+#endif
View
152 common/core/perfmdl.cc
@@ -0,0 +1,152 @@
+#include "perfmdl.h"
+
+
+UINT32 PerfModel::getInsMicroOpsCount(const INS& ins)
+{
+ // FIXME: assumes that stack is not supported by special hardware; load and
+ // store microops are assumed to be required.
+
+ bool does_read = INS_IsMemoryRead(ins);
+ bool does_read2 = INS_HasMemoryRead2(ins);
+ bool does_write = INS_IsMemoryWrite(ins);
+
+ UINT32 count = 0;
+
+ // potentially load first operand from mem
+ count += does_read ? 1 : 0;
+
+ // potentially load second operand from mem
+ count += does_read2 ? 1 : 0;
+
+ // perform the op on the operands
+ count += 1;
+
+ // potentially store the result to mem
+ count += does_write ? 1 : 0;
+
+ return count;
+}
+
+
+PerfModelIntervalStat* PerfModel::analyzeInterval(const string& parent_routine,
+ const INS& start_ins, const INS& end_ins)
+{
+ vector< pair<ADDRINT, UINT32> > inst_trace;
+ UINT32 microop_count = 0;
+ UINT32 cycles_subtotal = 0;
+
+ // do some analysis to get the number of cycles (before mem, branch stalls)
+ // fixme: for now we approximate with approx # x86 microops;
+ // need to account for pipeline depth / instruction latencies
+
+ for(INS ins = start_ins; ins!=end_ins; ins = INS_Next(ins))
+ {
+ // debug info
+ // cout << hex << "0x" << INS_Address(BBL_InsTail(bbl)) << dec << ": "
+ // << INS_Mnemonic(BBL_InsTail(bbl)) << endl;
+
+ inst_trace.push_back( pair<ADDRINT, UINT32>(INS_Address(ins), INS_Size(ins)) );
+ UINT32 micro_ops = getInsMicroOpsCount(ins);
+ microop_count += micro_ops;
+ // FIXME
+ cycles_subtotal += micro_ops;
+ }
+
+ // allocate struct for instructs in the basic block to write stats into.
+ // NOTE: if a basic block gets split, this data may become redundant
+ PerfModelIntervalStat *stats = new PerfModelIntervalStat(parent_routine,
+ inst_trace,
+ microop_count,
+ cycles_subtotal);
+ return stats;
+}
+
+
+VOID PerfModel::run(PerfModelIntervalStat *interval_stats)
+{
+ // NOTE: must function such that it can be called more than once per
+ // interval and still work
+
+ UINT32 interval_cycle_count = 0;
+
+ interval_cycle_count += interval_stats->cycles_subtotal;
+ interval_cycle_count += (interval_stats->branch_mispredict ? 10 : 0);
+
+ // Note: dcache load miss penalty is already
+ // accounted for by dependency stalling
+
+ // Note: perfect dcache store queue assumed.
+ // store miss penalty assumed to be zero.
+
+ // icache miss penalty
+ for (list<bool>::iterator it = interval_stats->icache_load_miss_history.begin();
+ it != interval_stats->icache_load_miss_history.end();
+ it++)
+ {
+ // FIXME: this is not a constant. at minimum it should be a
+ // constant exposed to outside world
+ interval_cycle_count += ((*it) ? 10 : 0);
+ }
+
+ cycle_count += interval_cycle_count;
+ microop_issue_count += interval_stats->microops_count;
+
+ // clear out values in case Run gets called again this interval
+ interval_stats->reset();
+
+ //cout << "made it here" << endl;
+}
+
+
+// run method which accounts for load data dependency stalls
+VOID PerfModel::run(PerfModelIntervalStat *interval_stats, REG *reads,
+ UINT32 numReads)
+{
+
+ run(interval_stats);
+
+ UINT64 max = cycle_count;
+ REG max_reg = LEVEL_BASE::REG_LAST;
+
+ for ( UINT32 i = 0; i < numReads; i++ )
+ {
+ REG r = reads[i];
+ UINT64 cycle = scoreboard[r];
+
+ if ( cycle != k_PERFMDL_CYCLE_INVALID ) {
+ if ( cycle > max ) {
+ max = cycle;
+ max_reg = r;
+ }
+
+ // first use encountered so release scoreboard slot
+ scoreboard[r] = k_PERFMDL_CYCLE_INVALID;
+ cout << "removed " << REG_StringShort(r) << " from scoreboard: "
+ << cycle << endl;
+ }
+ }
+
+ if ( max != cycle_count ) {
+ cout << "stalled from " << cycle_count << " to " << max << " on "
+ << REG_StringShort(max_reg) << endl;
+ }
+
+ // stall for latest load dependency if needed
+ cycle_count = max;
+}
+
+
+VOID PerfModel::run(PerfModelIntervalStat *interval_stats, bool dcache_load_hit, REG *writes, UINT32 numWrites)
+{
+ run(interval_stats);
+
+ interval_stats->dcache_load_miss_history.push_back( !dcache_load_hit );
+ if ( g_knob_enable_performance_modeling && !dcache_load_hit ) {
+ for (UINT32 i = 0; i < numWrites; i++) {
+ REG w = writes[i];
+ scoreboard[w] = cycle_count + 100; //FIXME: make this parameterizable
+ cout << "added " << REG_StringShort(w) << " to scoreboard: "
+ << cycle_count << " + 100 = " << scoreboard[w] << endl;
+ }
+ }
+}
View
159 common/core/perfmdl.h
@@ -0,0 +1,159 @@
+// Jonathan Eastep (eastep@mit.edu)
+// 04.07.08
+//
+// This file contains classes and structs for modeling performance in the
+// simulator. It collects stats from the various other models that are
+// running (e.g. the cache model) and loosely models microarchitectural
+// execution to keep a clock updated. To minimize overhead in Pin, as few
+// as possible additional instructions are instrumented exclusively for
+// performance modeling; instead, performance modeling tries to piggy-back
+// on the instrumentation the other models required. Because models don't
+// end up instrumenting every instruction, the performance model uses
+// time warp and batches up as much of the modeling it needs to do as
+// possible.
+
+
+#ifndef PERFMDL_H
+#define PERFMDL_H
+
+#include <list>
+#include <vector>
+#include <utility>
+#include <iostream>
+
+#include "pin.H"
+
+
+/* ===================================================================== */
+/* External References */
+/* ===================================================================== */
+
+extern LEVEL_BASE::KNOB<bool> g_knob_enable_performance_modeling;
+
+
+/* ===================================================================== */
+/* Performance Modeler Classes */
+/* ===================================================================== */
+
+#define k_PERFMDL_CYCLE_INVALID (~ ((UINT64) 0) )
+
+
+// JME. I feel like this class is exempt from some of the style guidelines
+// since it's a class that's more or less used like a struct
+
+class PerfModelIntervalStat {
+ public:
+ // holds instruction addresses and sizes
+ vector< pair<ADDRINT, UINT32> > inst_trace;
+
+ // keeps track of miss status for icache and dcache loads and stores
+ // set true for miss
+ list<bool> icache_load_miss_history;
+ list<bool> dcache_load_miss_history;
+ list<bool> dcache_store_miss_history;
+
+ // set when instrumenting the code to add calls to analysis
+ UINT32 microops_count;
+ UINT32 cycles_subtotal;
+
+ // set true if interval had branch mispredict
+ bool branch_mispredict;
+
+ // set for use in debugging
+ string parent_routine;
+
+
+ // methods
+ PerfModelIntervalStat(const string& parent, const vector< pair<ADDRINT, UINT32> >& trace,
+ UINT32 uops, UINT32 cyc_subtotal):
+ inst_trace(trace), microops_count(uops), cycles_subtotal(cyc_subtotal),
+ branch_mispredict(false), parent_routine(parent)
+ {
+ }
+
+ VOID reset()
+ {
+ // resets everything but inst_trace and parent
+ dcache_load_miss_history.resize(0);
+ dcache_store_miss_history.resize(0);
+ icache_load_miss_history.resize(0);
+ branch_mispredict = false;
+ microops_count = 0;
+ cycles_subtotal = 0;
+ }
+};
+
+
+class PerfModel {
+
+ private:
+ // does not include stalls
+ UINT64 microop_issue_count;
+
+ // this is the local clock for the core
+ UINT64 cycle_count;
+
+ // this is used for finding dependencies on loaded data
+ vector<UINT64> scoreboard;
+
+ // set for debugging purposes
+ string name;
+
+
+ // methods
+
+ UINT32 getInsMicroOpsCount(const INS& ins);
+
+ public:
+
+ PerfModel(string n):
+ microop_issue_count(0), cycle_count(0),
+ scoreboard(LEVEL_BASE::REG_LAST, k_PERFMDL_CYCLE_INVALID), name(n)
+ { }
+
+ UINT64 getCycleCount() { return cycle_count; }
+ UINT64 getMicroOpCount() { return microop_issue_count; }
+
+
+ // These functions are for logging modeling events for which the performance impact
+ // may be lazily evaluated later when the performance model is next run.
+
+ VOID logICacheLoadAccess(PerfModelIntervalStat *stats, bool hit)
+ { stats->icache_load_miss_history.push_back( !hit ); }
+
+ VOID logDCacheStoreAccess(PerfModelIntervalStat *stats, bool hit)
+ { stats->dcache_store_miss_history.push_back( !hit ); }
+
+ VOID logBranchPrediction(PerfModelIntervalStat *stats, bool correct)
+ { stats->branch_mispredict = !correct; }
+
+
+ // Called at first encounter of an interval. Fills out stats for the interval
+
+ PerfModelIntervalStat* analyzeInterval(const string& parent_routine,
+ const INS& start_ins, const INS& end_ins);
+
+
+ // Pin inserts a call to one of the following functions when instrumenting
+ // instructions.
+
+ // the vanilla run method.
+ VOID run(PerfModelIntervalStat *interval_stats);
+
+ // run method which accounts for load data dependency stalls
+ VOID run(PerfModelIntervalStat *interval_stats, REG *reads, UINT32 num_reads);
+
+ // run method which registers destination registers in the scoreboard
+ VOID run(PerfModelIntervalStat *interval_stats, bool dcache_load_hit,
+ REG *writes, UINT32 num_writes);
+
+
+ // this method is called at the end of simulation
+ //FIXME: implement this function
+ VOID fini(int code, VOID *v, ofstream& out)
+ { }
+
+};
+
+
+#endif
View
10 common/doc/README
@@ -0,0 +1,10 @@
+This directory is for simulator documentation. This may include:
+
+ - How to use the simulator
+ - The internal operation of some piece of the simulator
+ - How to extend or modify the simulator
+ - Anything else that we want to write down and remember
+ related to the operation or use of the simulator
+
+Pin- or QEMU-implementation-specific docs should be directed to
+../pin/doc and ../qemu/doc
View
378 common/makefile.gnu.config
@@ -0,0 +1,378 @@
+# These definitions are generated by the kit builder
+KIT=1#
+TARGET=ia32
+TARGET_OS=l
+
+##############################################################
+#
+# Here are some things you might want to configure
+#
+##############################################################
+
+# 0 means off; 1 means on
+DEBUG = 1
+
+# if your tool is not in the kit directory
+# then set this to the pin-2.0-X-Y directory
+PIN_HOME ?= /home/carbon/pin-2.3-16358-gcc.3.2.3-ia32-linux
+
+# Select static or dynamic linking for tool
+# only applies to unix
+#PIN_DYNAMIC = -static
+PIN_DYNAMIC = -ldl
+
+#### Local CAG customizations ####
+
+# Manually specify correct version of GCC
+CC = /usr/bin/gcc
+CXX = /usr/bin/g++
+
+export LD_LIBRARY_PATH = /usr/lib
+
+##############################################################
+#
+# Typical users will not need to change the stuff below here
+#
+##############################################################
+
+##############################################################
+# Set things for all architectures and all OS
+##############################################################
+ifeq ($(DEBUG),1)
+ DBG = -g
+else
+ DBG =
+endif
+
+PIN_CXXFLAGS = -DBIGARRAY_MULTIPLIER=1 -DUSING_XED $(DBG)
+PIN_CXXFLAGS += -fno-strict-aliasing -I$(PIN_HOME)/Include -I$(PIN_HOME)/Include/gen -I$(PIN_HOME)/InstLib
+PIN_LPATHS = -L$(PIN_HOME)/Lib/ -L$(PIN_HOME)/ExtLib/
+PIN_BASE_LIBS :=
+PIN_LDFLAGS = $(DBG)
+NO_LOGO =
+SSE2 = -msse2
+ENABLE_VS = 0
+
+ifeq ($(CCOV),1)
+ # code coverage is on
+ PIN_LDFLAGS += -prof-genx
+ ifneq ($(CCOVDIR),)
+ PIN_LDFLAGS += -prof-dir $(CCOVDIR)
+ endif
+endif
+
+ifeq ($(ENABLE_VS),1)
+ VIRT_SEG_FLAG = -xyzzy -241runtime 0 -xyzzy -virtual_segments 1
+else
+ VIRT_SEG_FLAG =
+endif
+
+
+##############################################################
+# Set the kit versus source tree stuff
+##############################################################
+ifndef KIT
+ KIT = 0
+endif
+
+ifeq (${KIT},0)
+
+ #
+ # Building out of a source tree
+ #
+
+ OS=$(shell uname -s)
+
+ ifeq ($(findstring CYGWIN,$(OS)),CYGWIN)
+ BUILD_OS = w
+ TARGET_OS = w
+ else
+ ifeq ($(OS),Darwin)
+ BUILD_OS = m
+ TARGET_OS = m
+ else
+ BUILD_OS = l
+ TARGET_OS = l
+ endif
+ endif
+
+ # default to building for the host you are on. You can override this on the cmd line.
+ HOST=$(shell uname -m)
+ ifeq (${HOST},x86_64)
+ TARGET=ia32e
+ endif
+ ifeq (${HOST},amd64)
+ TARGET=ia32e
+ endif
+ ifeq (${HOST},i686)
+ TARGET=ia32
+ endif
+ ifeq (${HOST},x86)
+ TARGET=ia32
+ endif
+ ifeq (${HOST},i386)
+ TARGET=ia32
+ endif
+ ifeq (${HOST},ia64)
+ TARGET=ipf
+ endif
+ ifndef TARGET
+ $(error you must define TARGET on the command line)
+ endif
+
+ # If you are building out of a source tree and not a kit
+ # point this to the charm directory
+ PIN_ROOT ?= ../..
+
+ XEDKIT = $(PIN_ROOT)/Source/obj/xed/$(TARGET)_$(TARGET)_$(TARGET_OS)/xed2-kit-$(TARGET)-$(TARGET_OS)
+ PIN_CXXFLAGS += -I$(XEDKIT)/include
+
+ # We override CXX only if it is the default one from Make.
+ # Environment overrides of CXX take precidence.
+
+ ifeq ($(TARGET),ipf)
+ ifeq ($(origin CXX), default)
+ CXX = /usr/intel/pkgs/gcc/3.4/bin/g++
+ endif
+ ifeq ($(origin CC), default)
+ CC = /usr/intel/pkgs/gcc/3.4/bin/gcc
+ endif
+ endif
+
+ ifeq ($(TARGET),ia32)
+ ifeq ($(TARGET_OS),l)
+ ifeq ($(origin CC), default)
+ CC = /usr/intel/pkgs/gcc/4.2.0/bin/gcc
+ endif
+ ifeq ($(origin CXX), default)
+ CXX = /usr/intel/pkgs/gcc/4.2.0/bin/g++
+ endif
+ endif
+ endif
+
+ ifeq ($(TARGET),ia32e)
+ ifeq ($(TARGET_OS),l)
+ ifeq ($(origin CXX), default)
+ CXX = /usr/intel/pkgs/gcc/4.2.0/bin/g++
+ endif
+ ifeq ($(origin CC), default)
+ CC = /usr/intel/pkgs/gcc/4.2.0/bin/gcc
+ endif
+ endif
+ endif
+
+ ifeq ($(TARGET_OS),w)
+ ifneq ($(BUILD_OS),w)
+ CXX = /proj/vssad1/users/swallace/cygwin/bin/i686-pc-mingw32-g++
+ CC = /proj/vssad1/users/swallace/cygwin/bin/i686-pc-mingw32-gcc
+ endif
+ endif
+
+ ifeq ($(TARGET_OS),m)
+ ifneq ($(BUILD_OS),m)
+ CXX = /proj/vssad1/users/swallace/mac/bin/i686-apple-darwin8-g++
+ CC = /proj/vssad1/users/swallace/mac/bin/i686-apple-darwin8-gcc
+ endif
+ endif
+
+ # TODO: add option to pass only ICCVER and find the path to icc from it
+ ifneq ($(ICCDIR),)
+ CXX = $(ICCDIR)/icpc
+ CC = $(ICCDIR)/icc
+ APP_CXXLINK_FLAGS = -i_static -Wl,-rpath=/usr/intel/pkgs/gcc/4.2.0/lib
+ PIN_LDFLAGS += -i_static -Wl,-rpath=/usr/intel/pkgs/gcc/4.2.0/lib
+ PIN_CXXFLAGS += -i_static -gcc-name=/usr/intel/pkgs/gcc/4.2.0/bin/g++
+ PIN_CXXFLAGS += -wd1418 -wd1419 -wd981 -wd383 -wd869 -wd593 -wd266 -wd279 -wd444 -wd168 -wd810 -wd810 -wd181
+ endif
+
+
+ TARGET_SPEC = ${TARGET}_${TARGET_OS}${TARGET_OFORMAT}
+ PIN_LPATHS += -L$(PIN_ROOT)/Source/obj/${TARGET}_${TARGET_SPEC} \
+ -L$(PIN_ROOT)/External/Libdwarf/Lib_${TARGET_SPEC} \
+ -L$(PIN_ROOT)/External/Libelf/Lib_${TARGET_SPEC} \
+ -L$(XEDKIT)/lib
+ PIN_BIN = $(PIN_ROOT)/Source/obj/${TARGET}_${TARGET_SPEC}/pin
+ PIN = ${PIN_BIN} -slow_asserts $(VIRT_SEG_FLAG) $(PIN_FLAGS) $(PIN_USERFLAGS)
+ PIN_LIBNAMES = $(PIN_ROOT)/Source/obj/${TARGET}_${TARGET_SPEC}/libpin.a
+
+
+else
+
+ #
+ # Building out of a kit
+ #
+
+ PIN = $(PIN_HOME)/Bin/pin -slow_asserts $(VIRT_SEG_FLAG) $(PIN_FLAGS) $(PIN_USERFLAGS)
+
+ XEDKIT = $(PIN_HOME)/XED2
+ PIN_LPATHS += -L$(XEDKIT)/lib
+ PIN_CXXFLAGS += -I$(XEDKIT)/include
+
+endif
+
+##############################################################
+# Set the architecture specific stuff
+##############################################################
+
+ifeq ($(TARGET),ia32)
+
+ PIN_CXXFLAGS += -DTARGET_IA32
+ #TOOLADDR=--section-start,.interp=0x70008400
+ # The 400 in the address leaves room for the program headers
+
+ ifeq ($(TARGET_OS),m)
+ ### TOOLADDR setting for Mac
+ # old value that works with MacOS 10.4.1: 0x50048000
+ # old value that works for SPEC but not gui program: 0x16048000
+ TOOLADDR = -Wl,-seg1addr -Wl,0x84048000
+ PIN_BASE_LIBS += -lxed
+ PIN_PTHREAD_LIBS = -lpinpthread
+ ### FIXMAC: __pthread_mutex_init is not yet redefined
+ #PIN_PTHREAD_LIBS_FLAGS = -Wl,-u,__pthread_mutex_init
+ else
+ ### TOOLADDR setting for Linux and Windows
+ TOOLADDR = -Wl,--section-start,.interp=0x05048400
+ PIN_BASE_LIBS += -lxed
+ PIN_PTHREAD_LIBS = -lpinpthread
+ PIN_PTHREAD_LIBS_FLAGS = -Wl,-u,__pthread_mutex_init
+ endif
+endif
+
+ifeq ($(TARGET),ia32e)
+ PIN_CXXFLAGS += -DTARGET_IA32E -DHOST_IA32E
+ PIN_BASE_LIBS += -lxed
+ PIN_PTHREAD_LIBS = -lpinpthread
+ PIN_PTHREAD_LIBS_FLAGS = -Wl,-u,__pthread_mutex_init
+ TOOLADDR = -Wl,--section-start,.interp=0x20048000
+endif
+
+ifeq ($(TARGET),ipf)
+ PIN_CXXFLAGS += -DTARGET_IPF -DHOST_IPF
+ TOOLADDR = -Wl,--section-start,.interp=0x00000c0000000400,--section-start,.init_array=0x00000e0000000400,-defsym,__init_array_start=0x00000e0000000400,-defsym,__preinit_array_start=__init_array_start,-defsym,__preinit_array_end=__preinit_array_start
+ PIN_PTHREAD_LIBS = -lpinpthread
+ PIN_PTHREAD_LIBS_FLAGS = -Wl,-u,__pthread_mutex_init
+endif
+
+##############################################################
+# Set the OS specific variables
+# Some of this refers to architecture dependent variables
+# so this must second
+##############################################################
+
+ifeq ($(TARGET_OS),w)
+### Windows
+
+ PIN_CXXFLAGS += -DTARGET_WINDOWS -mno-cygwin
+
+ #FIXME: make this conditional based on the compiler
+ PIN_BASE_LIBS += -lpinvm -lntdll
+ PIN_LDFLAGS += -Wl,--export-all-symbols
+ PIN_LDFLAGS += -shared -Wl,-wrap,atexit,-wrap,_onexit,-e,_Ptrace_DllMainCRTStartup@12 -mno-cygwin
+ PIN_LDFLAGS += -Wl,--image-base -Wl,0x55000000
+ PINTOOL_SUFFIX = .dll
+ #TESTAPP = C:/cygwin/bin/cp.exe
+ TESTAPP = $(PIN_HOME)/Tests/cp-win.exe
+ APP_CXXFLAGS += -DTARGET_WINDOWS -mno-cygwin
+ PIN_CMP = cmp
+ PIN_DIFF = diff -w
+ APP_CXXFLAGS2 += -mno-cygwin
+ EXEEXT = .exe
+ OBJEXT = obj
+else
+### Linux or Mac
+
+ # This enables the thread safe libc by pulling in pthread.o from libpinpthread.a
+ # Otherwise, you will get the non threadsafe version from libc
+ # It also pulls in malloc_st.o by using malloc
+ ifeq ($(TARGET_OS),m)
+ PIN_CMP = ../mac-cmp
+ else
+ PIN_CMP = cmp
+ endif
+ PIN_DIFF = ${PIN_CMP}
+ ifeq ($(TARGET_OS),l)
+ ### Linux
+ PIN_BASE_LIBS += -ldwarf -lelf ${PIN_DYNAMIC}
+ PIN_CXXFLAGS += -DTARGET_LINUX
+ PIN_LDFLAGS += -Wl,-u,malloc
+ APP_CXXFLAGS += -DTARGET_LINUX
+ else
+ ### Mac
+ PIN_BASE_LIBS += $(PIN_PTHREAD_LIBS) ${PIN_DYNAMIC}
+ PIN_LDFLAGS += $(PIN_PTHREAD_LIBS_FLAGS)
+ # Suppress linker warnings
+ PIN_LDFLAGS += -w -Wl,-multiply_defined -Wl,suppress
+ PIN_CXXFLAGS += -DTARGET_MAC
+ APP_CXXFLAGS += -DTARGET_MAC
+ endif
+
+
+ PIN_LDFLAGS += ${TOOLADDR}
+ PINTOOL_SUFFIX =
+ EXEEXT =
+ OBJEXT = o
+ TESTAPP = /bin/cp
+endif
+
+ifeq ($(PIN_PIE),1)
+ PIN_CXXFLAGS += -fPIE
+ PIN_LDFLAGS += -pie -Wl,-Bsymbolic
+ TOOLADDR =
+endif
+
+SAPIN_LIBS = -lsapin $(PIN_BASE_LIBS)
+PIN_LIBS = -lpin $(PIN_BASE_LIBS)
+
+##############################################################
+# Some final variables
+##############################################################
+
+# put the lpaths before all the libs
+PIN_LDFLAGS += ${PIN_LPATHS}
+OPT = -O3 -fomit-frame-pointer
+NO_OPTIMIZE = -O0
+COPT = -c
+OUTOPT = -o
+OUTEXE = -o
+LINK_OUT = -o
+
+ifeq ($(DEBUG),0)
+ PIN_CXXFLAGS += $(OPT)
+endif
+
+
+##############################################################
+# Rules to make testing easier
+# This testing only checks that the application ran correctly.
+# It does no checking of the results of the tool.
+# If you make the tool self checking and exit with a non zero exit code,
+# then it will detect the error
+# Before the test, we make a .tested and a .failed file. If
+# the test succeeds, we remove the .failed file.
+# find . -name '*.tested'
+# and
+# find . -name '*.failed'
+# will summarize what you tested and what failed
+##############################################################
+%$(PINTOOL_SUFFIX).test : %$(PINTOOL_SUFFIX) %$(PINTOOL_SUFFIX).tested %$(PINTOOL_SUFFIX).failed
+ touch $<.makefile.copy; rm $<.makefile.copy
+ $(PIN) -t $< -- $(TESTAPP) makefile $<.makefile.copy
+ $(PIN_CMP) makefile $<.makefile.copy
+ rm $<.makefile.copy; rm $<.failed
+
+# Some subdirectories do not want the $(PINTOOL_SUFFIX) in their test name.
+%.test : %$(PINTOOL_SUFFIX) %$(PINTOOL_SUFFIX).tested %$(PINTOOL_SUFFIX).failed
+ touch $<.makefile.copy; rm $<.makefile.copy
+ $(PIN) -t $< -- $(TESTAPP) makefile $<.makefile.copy
+ $(PIN_CMP) makefile $<.makefile.copy
+ rm $<.makefile.copy; rm $<.failed
+
+
+%.tested :
+ touch $@
+
+%.failed :
+ touch $@
+
+# otherwise these are deleted if the tool build fails
+.PRECIOUS : %.tested %.failed
View
23 common/misc/Makefile
@@ -0,0 +1,23 @@
+include ../makefile.gnu.config
+
+DBG=-g
+OPT=-O2
+CFLAGS=-c -I$(PIN_HOME)/InstLib -fomit-frame-pointer -Wall -Werror -Wno-unknown-pragmas $(DBG) $(OPT) -MMD
+LDFLAGS=
+
+SOURCES = utils.cc
+OBJECTS = $(SOURCES:%.cc=%.o)
+
+
+all: $(OBJECTS)
+
+
+## build rules
+
+%.o : %.cc
+ $(CXX) $(CFLAGS) $(PIN_CXXFLAGS) -o $@ $<
+
+clean:
+ -rm -f *.o *.d *.rpo
+squeaky: clean
+ -rm -f *~
View
2 common/misc/README
@@ -0,0 +1,2 @@
+This directory is for miscellaneous helper functions uses throughout the
+simulator code.
View
151 common/misc/utils.cc
@@ -0,0 +1,151 @@
+#include "utils.h"
+
+
+/* ================================================================ */
+/* Utility function definitions */
+/* ================================================================ */
+
+string myDecStr(UINT64 v, UINT32 w)
+{
+ ostringstream o;
+ o.width(w);
+ o << v;
+ string str(o.str());
+ return str;
+}
+
+
+bool isPower2(UINT32 n)
+{ return ((n & (n - 1)) == 0); }
+
+
+INT32 floorLog2(UINT32 n)
+{
+ INT32 p = 0;
+
+ if (n == 0) return -1;
+
+ if (n & 0xffff0000) { p += 16; n >>= 16; }
+ if (n & 0x0000ff00) { p += 8; n >>= 8; }
+ if (n & 0x000000f0) { p += 4; n >>= 4; }
+ if (n & 0x0000000c) { p += 2; n >>= 2; }
+ if (n & 0x00000002) { p += 1; }
+
+ return p;
+}
+
+
+INT32 ceilLog2(UINT32 n)
+{ return floorLog2(n - 1) + 1; }
+
+
+
+/* ================================================================ */
+/* Bit vector class method definitions */
+/* ================================================================ */
+
+
+void BitVector::reset()
+{
+ for( UINT32 i = 0; i < ((size + 64 - 1) >> 6); i++)
+ words[i] = 0;
+}
+
+bool BitVector::at(UINT32 bit)
+{
+ assert( bit < size);
+
+ UINT32 index = bit >> 6;
+ UINT64 shift = bit & 63;
+ UINT64 one = 1;
+ UINT64 mask = one << shift;
+ return (words[index] & mask) ? true : false;
+}
+
+void BitVector::set(UINT32 bit)
+{
+ assert( bit < size );
+
+ UINT32 index = bit >> 6;
+ UINT64 shift = bit & 63;
+ UINT64 one = 1;
+ UINT64 mask = one << shift;
+ words[index] |= mask;
+}
+
+void BitVector::clear(UINT32 bit)
+{
+ assert( bit < size );
+
+ UINT32 index = bit >> 6;
+ UINT64 shift = bit & 63;
+ UINT64 one = 1;
+ UINT64 mask = ~(one << shift);
+ words[index] &= mask;
+}
+
+void BitVector::set(const BitVector& vec2)
+{
+ assert( size == vec2.size );
+
+ for (UINT32 i = 0; i < ((size + 64 - 1) >> 6); i++)
+ words[i] |= vec2.words[i];
+}
+
+void BitVector::clear(const BitVector& vec2)
+{
+ assert( size == vec2.size );
+
+ for (UINT32 i = 0; i < ((size + 64 - 1) >> 6); i++)
+ words[i] &= ~vec2.words[i];
+}
+
+bool BitVector::test(const BitVector& vec2)
+{
+ assert( vec2.size == size );
+
+ for (UINT32 i = 0; i < ((size + 64 - 1) >> 6); i++) {
+ if ( vec2.words[i] & words[i] )
+ return true;
+ }
+
+ return false;
+}
+
+
+#if BITVECT_DEBUG
+
+void BitVector::debug()
+{
+ assert( size > 68 );
+
+ set(66);
+ cout << "set(66) -> " << at(66) << endl;
+
+ clear(66);
+ cout << "clear(66) -> " << at(66) << endl;
+
+ set(66);
+ set(68);
+
+ BitVector vec2(size);
+ vec2.set(44);
+
+ cout << "test( (1<<66 | 1<<68), (1<<44) ) -> " << test(vec2) << endl;
+
+ vec2.set(66);
+ cout << "test( (1<<66 | 1<<68), (1<<66) | (1<<44) ) -> "
+ << test(vec2) << endl;
+
+ clear(vec2);
+ cout << "test( (1<<68), (1<<66) | (1<<44) ) -> " << test(vec2)
+ << endl;
+
+ cout << "at(66) = " << at(66) << "; at(68) = " << at(68) << endl;
+
+ cout << endl;
+
+ //reset();
+}
+
+#endif