From 7c125014e78a8d672fe114671dcf98dafdc121a9 Mon Sep 17 00:00:00 2001 From: Aaron Levine Date: Wed, 28 Oct 2020 11:50:09 -0600 Subject: [PATCH] removing elements not to be released, and setting release number --- configure.ac | 2 +- src/sst/elements/GNA/GNA.cc | 369 ------------ src/sst/elements/GNA/GNA.h | 124 ---- src/sst/elements/GNA/Makefile.am | 38 -- src/sst/elements/GNA/README | 23 - src/sst/elements/GNA/gna_lib.h | 95 --- src/sst/elements/GNA/neuron.h | 91 --- src/sst/elements/GNA/sts.cc | 65 -- src/sst/elements/GNA/sts.h | 56 -- .../GNA/tests/refFiles/test_GNA_1.out | 170 ------ src/sst/elements/GNA/tests/sweep.csh | 15 - src/sst/elements/GNA/tests/sweep2.csh | 15 - src/sst/elements/GNA/tests/test.py | 67 --- src/sst/elements/GNA/tests/test.ref.out | 170 ------ src/sst/elements/GNA/tests/test_GNA_1.py | 67 --- .../GNA/tests/testsuite_default_GNA.py | 79 --- src/sst/elements/Opal/Makefile.am | 55 -- src/sst/elements/Opal/Opal.cc | 564 ------------------ src/sst/elements/Opal/Opal.h | 388 ------------ src/sst/elements/Opal/Opal_Event.h | 149 ----- src/sst/elements/Opal/PageFaultHandler.cc | 82 --- src/sst/elements/Opal/PageFaultHandler.h | 62 -- src/sst/elements/Opal/README | 20 - src/sst/elements/Opal/arielmemmgr_opal.cc | 118 ---- src/sst/elements/Opal/arielmemmgr_opal.h | 79 --- src/sst/elements/Opal/configure.m4 | 6 - src/sst/elements/Opal/mempool.cc | 302 ---------- src/sst/elements/Opal/mempool.h | 159 ----- src/sst/elements/Opal/opalMemNIC.cc | 134 ----- src/sst/elements/Opal/opalMemNIC.h | 117 ---- src/sst/elements/Opal/tests/app/makefile | 19 - src/sst/elements/Opal/tests/app/opal_mlm.c | 82 --- src/sst/elements/Opal/tests/app/opal_test.c | 62 -- .../elements/Opal/tests/basic_1node_1smp.py | 416 ------------- .../elements/Opal/tests/basic_2node_1smp.py | 431 ------------- .../refFiles/test_Opal_basic_1node_1smp.out | 125 ---- .../refFiles/test_Opal_basic_2node_1smp.out | 234 -------- src/sst/elements/serrano/Makefile.am | 29 - src/sst/elements/serrano/scircq.h | 85 --- src/sst/elements/serrano/sercgunit.h | 74 --- src/sst/elements/serrano/seriterunit.h | 161 ----- src/sst/elements/serrano/serprintunit.h | 88 --- src/sst/elements/serrano/serrano.cc | 256 -------- src/sst/elements/serrano/serrano.h | 60 -- src/sst/elements/serrano/serstdunit.h | 178 ------ src/sst/elements/serrano/smsg.h | 74 --- .../elements/serrano/tests/graphs/sum.graph | 9 - .../elements/serrano/tests/test_serrano.py | 13 - 48 files changed, 1 insertion(+), 6076 deletions(-) delete mode 100644 src/sst/elements/GNA/GNA.cc delete mode 100644 src/sst/elements/GNA/GNA.h delete mode 100644 src/sst/elements/GNA/Makefile.am delete mode 100644 src/sst/elements/GNA/README delete mode 100755 src/sst/elements/GNA/gna_lib.h delete mode 100644 src/sst/elements/GNA/neuron.h delete mode 100644 src/sst/elements/GNA/sts.cc delete mode 100644 src/sst/elements/GNA/sts.h delete mode 100644 src/sst/elements/GNA/tests/refFiles/test_GNA_1.out delete mode 100755 src/sst/elements/GNA/tests/sweep.csh delete mode 100755 src/sst/elements/GNA/tests/sweep2.csh delete mode 100644 src/sst/elements/GNA/tests/test.py delete mode 100644 src/sst/elements/GNA/tests/test.ref.out delete mode 100644 src/sst/elements/GNA/tests/test_GNA_1.py delete mode 100644 src/sst/elements/GNA/tests/testsuite_default_GNA.py delete mode 100644 src/sst/elements/Opal/Makefile.am delete mode 100644 src/sst/elements/Opal/Opal.cc delete mode 100644 src/sst/elements/Opal/Opal.h delete mode 100644 src/sst/elements/Opal/Opal_Event.h delete mode 100644 src/sst/elements/Opal/PageFaultHandler.cc delete mode 100644 src/sst/elements/Opal/PageFaultHandler.h delete mode 100644 src/sst/elements/Opal/README delete mode 100644 src/sst/elements/Opal/arielmemmgr_opal.cc delete mode 100644 src/sst/elements/Opal/arielmemmgr_opal.h delete mode 100644 src/sst/elements/Opal/configure.m4 delete mode 100644 src/sst/elements/Opal/mempool.cc delete mode 100644 src/sst/elements/Opal/mempool.h delete mode 100644 src/sst/elements/Opal/opalMemNIC.cc delete mode 100644 src/sst/elements/Opal/opalMemNIC.h delete mode 100644 src/sst/elements/Opal/tests/app/makefile delete mode 100644 src/sst/elements/Opal/tests/app/opal_mlm.c delete mode 100644 src/sst/elements/Opal/tests/app/opal_test.c delete mode 100644 src/sst/elements/Opal/tests/basic_1node_1smp.py delete mode 100644 src/sst/elements/Opal/tests/basic_2node_1smp.py delete mode 100644 src/sst/elements/Opal/tests/refFiles/test_Opal_basic_1node_1smp.out delete mode 100644 src/sst/elements/Opal/tests/refFiles/test_Opal_basic_2node_1smp.out delete mode 100644 src/sst/elements/serrano/Makefile.am delete mode 100644 src/sst/elements/serrano/scircq.h delete mode 100644 src/sst/elements/serrano/sercgunit.h delete mode 100644 src/sst/elements/serrano/seriterunit.h delete mode 100644 src/sst/elements/serrano/serprintunit.h delete mode 100644 src/sst/elements/serrano/serrano.cc delete mode 100644 src/sst/elements/serrano/serrano.h delete mode 100644 src/sst/elements/serrano/serstdunit.h delete mode 100644 src/sst/elements/serrano/smsg.h delete mode 100644 src/sst/elements/serrano/tests/graphs/sum.graph delete mode 100644 src/sst/elements/serrano/tests/test_serrano.py diff --git a/configure.ac b/configure.ac index fc87ea8989..7333f0029f 100644 --- a/configure.ac +++ b/configure.ac @@ -3,7 +3,7 @@ # # Set Version of SST Elements Library -AC_INIT([SST Elements Library], [-dev], [sst@sandia.gov]) +AC_INIT([SST Elements Library], [10.1.0], [sst@sandia.gov]) AC_PREREQ([2.59]) AC_COPYRIGHT([Copyright Sandia National Laboratories, 2004-2020]) diff --git a/src/sst/elements/GNA/GNA.cc b/src/sst/elements/GNA/GNA.cc deleted file mode 100644 index 0899c7049b..0000000000 --- a/src/sst/elements/GNA/GNA.cc +++ /dev/null @@ -1,369 +0,0 @@ -// Copyright 2018-2020 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2018-2020, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#include -#include "GNA.h" - -#include -#include -#include -#include - -using namespace SST; -//using namespace SST::MemHierarchy; -using namespace SST::GNAComponent; - -GNA::GNA(ComponentId_t id, Params& params) : - Component(id), state(IDLE), now(0), numFirings(0), numDeliveries(0) -{ - uint32_t outputLevel = params.find("verbose", 0); - out.init("GNA:@p:@l: ", outputLevel, 0, Output::STDOUT); - - // get parameters - numNeurons = params.find("neurons", 32); - if (numNeurons <= 0) { - out.fatal(CALL_INFO, -1,"number of neurons invalid\n"); - } - BWPpTic = params.find("BWPperTic", 2); - if (BWPpTic <= 0) { - out.fatal(CALL_INFO, -1,"BWPperTic invalid\n"); - } - STSDispatch = params.find("STSDispatch", 2); - if (BWPpTic <= 0) { - out.fatal(CALL_INFO, -1,"STSDispatch invalid\n"); - } - STSParallelism = params.find("STSParallelism", 2); - if (BWPpTic <= 0) { - out.fatal(CALL_INFO, -1,"STSParallelism invalid\n"); - } - maxOutMem = params.find("MaxOutMem", STSParallelism); - if (BWPpTic <= 0) { - out.fatal(CALL_INFO, -1,"MaxOutMem invalid\n"); - } - - //set our clock - std::string clockFreq = params.find("clock", "1GHz"); - clockHandler = new Clock::Handler(this, &GNA::clockTic); - clockTC = registerClock(clockFreq, clockHandler); - - // tell the simulator not to end without us - registerAsPrimaryComponent(); - primaryComponentDoNotEndSim(); - - // init memory - memory = loadUserSubComponent("memory", ComponentInfo::SHARE_NONE, clockTC, new Interfaces::SimpleMem::Handler(this, &GNA::handleEvent)); - if (!memory) { - params.insert("port", "mem_link"); - memory = loadAnonymousSubComponent("memHierarchy.memInterface", "memory", 0, - ComponentInfo::SHARE_PORTS, params, clockTC, new Interfaces::SimpleMem::Handler(this, &GNA::handleEvent)); - } - if (!memory) - out.fatal(CALL_INFO, -1, "Unable to load memHierarchy.memInterface subcomponent\n"); -} - -GNA::GNA() : Component(-1) -{ - // for serialization only -} - - -void GNA::init(unsigned int phase) { - using namespace Neuron_Loader_Types; - using namespace White_Matter_Types; - - // init memory - memory->init(phase); - - // Everything below we only do once - if (phase != 0) { - return; - } - - // create STS units - for(int i = 0; i < STSParallelism; ++i) { - STSUnits.push_back(STS(this,i)); - } - - // initialize neurons - neurons = new neuron[numNeurons]; - - SST::RNG::MarsagliaRNG rng(1,13); - - // - // neurons -#if 0 - for (int nrn_num=0;nrn_num<=8;nrn_num++) - neurons[nrn_num].configure((T_NctFl){1000,-2.0,0.0}); - for (int nrn_num=9;nrn_num<=11;nrn_num++) - neurons[nrn_num].configure((T_NctFl){ 750,-2.0,0.0}); - for (int nrn_num=12;nrn_num<=12;nrn_num++) - neurons[nrn_num].configure((T_NctFl){1000,-2.0,0.0}); - for (int nrn_num=13;nrn_num<=15;nrn_num++) - neurons[nrn_num].configure((T_NctFl){ 750,-2.0,0.0}); - for (int nrn_num=16;nrn_num<=23;nrn_num++) - neurons[nrn_num].configure((T_NctFl){ 500,-2.0,0.0}); - for (int nrn_num=24;nrn_num<=31;nrn_num++) - neurons[nrn_num].configure((T_NctFl){1500,-2.0,0.0}); -#else - for (int nrn_num=0;nrn_num - // White matter list - uint64_t startAddr = 0x10000; - int countLinks = 0; - for (int n = 0; n < numNeurons; ++n) { - using namespace Interfaces; - // most neurons connect to 1-4, 1% connect to 15 - uint16_t roll = rng.generateNextUInt32() % 100; - uint numCon = 1; - bool local = 1; - if (roll == 0) { - numCon = 15; - if (rng.generateNextUInt32() % 100) local = 0; - } else { - numCon = 1 + (rng.generateNextUInt32() % 4); - local = 1; - } - - countLinks += numCon; - neurons[n].setWML(startAddr,numCon); - for (int nn=0; nn= numNeurons) - targ = 0; - - uint64_t reqAddr = startAddr+nn*sizeof(T_Wme); - SimpleMem::Request *req = - new SimpleMem::Request(SimpleMem::Request::Write, reqAddr, - sizeof(T_Wme)); - req->data.resize(sizeof(T_Wme)); - uint32_t str = 300+(rng.generateNextUInt32() % 700); - if (targ == 0) str = 1; - uint32_t tmpOff = 2 + (rng.generateNextUInt32() % 12); - if (!local) { - tmpOff /= 2; - } - req->data[0] = (str>>8) & 0xff; // Synaptic Str upper - req->data[1] = (str) & 0xff; // Synaptic Str lower - req->data[2] = (tmpOff>>8) & 0xff; // temp offset upper - req->data[3] = (tmpOff) & 0xff; // temp offset lower - req->data[4] = (targ>>8) & 0xff; // address upper - req->data[5] = (targ) & 0xff; // address lower - req->data[6] = 0; // valid - req->data[7] = 0; // valid - //printf("Writing n%d to targ%d at %p\n", n, targ, (void*)reqAddr); - memory->sendInitData(req); - } - assert(sizeof(T_Wme) == 8); - startAddr += numCon * sizeof(T_Wme); - } - - printf("Constructed %d neurons with %d links\n", numNeurons, countLinks); - - // brain wave pulses -#if 0 - int bwpl_len = 16; - Ctrl_And_Stat_Types::T_BwpFl* bwpl = (Ctrl_And_Stat_Types::T_BwpFl*)calloc(bwpl_len,sizeof(Ctrl_And_Stat_Types::T_BwpFl)); - bwpl[0] = (Ctrl_And_Stat_Types::T_BwpFl){1001,0,0}; - bwpl[1] = (Ctrl_And_Stat_Types::T_BwpFl){1001,0,1}; - bwpl[2] = (Ctrl_And_Stat_Types::T_BwpFl){1001,0,2}; - bwpl[3] = (Ctrl_And_Stat_Types::T_BwpFl){1001,0,3}; - bwpl[4] = (Ctrl_And_Stat_Types::T_BwpFl){1001,4,0}; - bwpl[5] = (Ctrl_And_Stat_Types::T_BwpFl){1001,4,1}; - bwpl[6] = (Ctrl_And_Stat_Types::T_BwpFl){1001,4,2}; - bwpl[7] = (Ctrl_And_Stat_Types::T_BwpFl){1,4,3}; - bwpl[8] = (Ctrl_And_Stat_Types::T_BwpFl){1,0,4}; - bwpl[9] = (Ctrl_And_Stat_Types::T_BwpFl){1,0,5}; - bwpl[10] = (Ctrl_And_Stat_Types::T_BwpFl){1,0,6}; - bwpl[11] = (Ctrl_And_Stat_Types::T_BwpFl){1,0,7}; - bwpl[12] = (Ctrl_And_Stat_Types::T_BwpFl){1,4,4}; - bwpl[13] = (Ctrl_And_Stat_Types::T_BwpFl){1,4,5}; - bwpl[14] = (Ctrl_And_Stat_Types::T_BwpFl){1,4,6}; - bwpl[15] = (Ctrl_And_Stat_Types::T_BwpFl){1,4,7}; -#else - int bwpl_len = 2; - Ctrl_And_Stat_Types::T_BwpFl* bwpl = (Ctrl_And_Stat_Types::T_BwpFl*)calloc(bwpl_len,sizeof(Ctrl_And_Stat_Types::T_BwpFl)); - for (int i = 0; i < bwpl_len; ++i) { - int targ = rng.generateNextUInt32() % numNeurons; - bwpl[i] = (Ctrl_And_Stat_Types::T_BwpFl){2001,targ,i*61}; - } -#endif - for (int i = 0; i < bwpl_len; ++i) { - BWPs.insert(std::pair(bwpl[i].TmpSft, bwpl[i])); - } -} - -// handle incoming memory -void GNA::handleEvent(Interfaces::SimpleMem::Request * req) -{ - std::map::iterator i = requests.find(req->id); - if (i == requests.end()) { - out.fatal(CALL_INFO, -1, "Request ID (%" PRIx64 ") not found in outstanding requests!\n", req->id); - } else { - // handle event - STS* requestor = i->second; - requestor->returnRequest(req); - // clean up - requests.erase(i); - } -} - -void GNA::deliver(float val, int targetN, int time) { - // AFR: should really throttle this in some way - numDeliveries++; - if(targetN < numNeurons) { - neurons[targetN].deliverSpike(val, time); - //printf("deliver %f to %d @ %d\n", val, targetN, time); - } else { - out.fatal(CALL_INFO, -1,"Invalid Neuron Address\n"); - } -} - -// returns true if no more to deliver -bool GNA::deliverBWPs() { - int tries = BWPpTic; - - while (tries > 0) { - BWPBuf_t::iterator i = BWPs.find(now); - if (i != BWPs.end()) { - // deliver it - const Ctrl_And_Stat_Types::T_BwpFl &pulse = i->second; - printf("BWP st%.1f to %d\n", pulse.InpValFl, pulse.InpNrn); - deliver(pulse.InpValFl, pulse.InpNrn, pulse.TmpSft); - BWPs.erase(i); - } else { - return true; - } - tries--; - } - - if (BWPs.find(now) == BWPs.end()) { - return true; - } else { - return false; - } -} - -// find a free STS unit to assign the spike to -void GNA::assignSTS() { - int remainDispatches = STSDispatch; - - // try to find a free unit - for(auto &e: STSUnits) { - if (firedNeurons.empty()) return; - if (e.isFree()) { - e.assign(firedNeurons.front()); - firedNeurons.pop_front(); - remainDispatches--; - } - if (remainDispatches == 0) return; - } -} - -void GNA::processFire() { - // has to: deliver incoming brain wave pulses, assign neuron - // firings to lookup units (spike transfer structures), process - // neuron firings into activations - - // brain wave pulses: Deliver all before moving on - bool BWPDone = deliverBWPs(); - - bool allSpikesDelivered = true; - if (BWPDone) { - // assign firings to STSs - assignSTS(); - - // process neuron firings into activations - for(auto &e: STSUnits) { - e.advance(now); - bool unitDone = e.isFree(); - allSpikesDelivered &= unitDone; - } - } - - // do we move on? - if (BWPDone && allSpikesDelivered & firedNeurons.empty()) { - state = LIF; - } -} - -// run LIF on all neurons -void GNA::lifAll() { - for (uint n = 0; n < numNeurons; ++n) { - bool fired = neurons[n].lif(now); - if (fired) { - //printf(" %d fired\n", n); - firedNeurons.push_back(n); - } - } -} - -bool GNA::clockTic( Cycle_t ) -{ - // send some outgoing mem reqs - int maxOut = maxOutMem; - //if((!outgoingReqs.empty()) && (now & 0x3f) == 0) { - // printf(" outRqst Q %d\n", outgoingReqs.size()); - //} - while(!outgoingReqs.empty() && maxOut > 0) { - memory->sendRequest(outgoingReqs.front()); - outgoingReqs.pop(); - maxOut--; - } - - - switch(state) { - case IDLE: - state = PROCESS_FIRE; // for now - break; - case PROCESS_FIRE: - processFire(); - break; - case LIF: - lifAll(); - now++; - state = PROCESS_FIRE; - numFirings += firedNeurons.size(); - if ((now & 0x3f) == 0) - printf("%lu neurons fired @ %d\n", firedNeurons.size(), now); - if (firedNeurons.size() == 0 && now > 100) { - primaryComponentOKToEndSim(); - } - break; - default: - out.fatal(CALL_INFO, -1,"Invalid GNA state\n"); - } - - // return false so we keep going - return false; -} - - diff --git a/src/sst/elements/GNA/GNA.h b/src/sst/elements/GNA/GNA.h deleted file mode 100644 index 5138afa4ea..0000000000 --- a/src/sst/elements/GNA/GNA.h +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright 2018-2020 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2018-2020, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _GNA_H -#define _GNA_H - -#ifndef __STDC_FORMAT_MACROS -#define __STDC_FORMAT_MACROS -#endif -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include "gna_lib.h" -#include "neuron.h" -#include "sts.h" - -namespace SST { -namespace GNAComponent { - - -class GNA : public SST::Component { -public: -/* Element Library Info */ - SST_ELI_REGISTER_COMPONENT(GNA, "GNA", "GNA", SST_ELI_ELEMENT_VERSION(1,0,0), - "Spiking Temportal Processing Unit", COMPONENT_CATEGORY_PROCESSOR) - - SST_ELI_DOCUMENT_PARAMS( - {"verbose", "(uint) Determine how verbose the output from the CPU is", "0"}, - {"clock", "(string) Clock frequency", "1GHz"}, - {"BWPperTic", "Max # of Brain Wave Pulses which can be delivered each clock cycle","2"}, - {"STSDispatch", "Max # spikes that can be dispatched to the STS in a clock cycle","2"}, - {"STSParallelism", "Max # spikes the STS can process in parallelism ","2"}, - {"MaxOutMem", "Maximum # of outgoing memory requests per cycle","STSParallelism"}, - {"neurons", "(uint) number of neurons", "32"} - ) - - SST_ELI_DOCUMENT_PORTS( {"mem_link", "Connection to memory", { "memHierarchy.MemEventBase" } } ) - - SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( {"memory", "Interface to memory (e.g., caches)", "SST::Interfaces::SimpleMem"} ) - - /* Begin class definiton */ - GNA(SST::ComponentId_t id, SST::Params& params); - void finish() { - printf("Completed %d neuron firings\n", numFirings); - printf("Completed %d spike deliveries\n", numDeliveries); - } - -public: - void deliver(float val, int targetN, int time); - neuron* getNeuron(int n) {return &neurons[n];} - void readMem(Interfaces::SimpleMem::Request *req, STS *requestor) { - // queue the request to send later - outgoingReqs.push(req); - // record who it came from - requests.insert(std::make_pair(req->id, requestor)); - } - -private: - GNA(); // for serialization only - GNA(const GNA&); // do not implement - void operator=(const GNA&); // do not implement - void init(unsigned int phase); - - void handleEvent( SST::Interfaces::SimpleMem::Request * req ); - virtual bool clockTic( SST::Cycle_t ); - bool deliverBWPs(); - void assignSTS(); - void processFire(); - void lifAll(); - - typedef enum {IDLE, PROCESS_FIRE, LIF, LAST_STATE} gnaState_t; - gnaState_t state; - - Output out; - Interfaces::SimpleMem * memory; - uint numNeurons; - uint BWPpTic; - uint STSDispatch; - uint STSParallelism; - uint maxOutMem; - uint now; - uint numFirings; - uint numDeliveries; - queue outgoingReqs; - - neuron *neurons; - vector STSUnits; - - typedef multimap BWPBuf_t; - // brain wave pulse buffer - BWPBuf_t BWPs; - - std::deque firedNeurons; - std::map requests; - - TimeConverter *clockTC; - Clock::HandlerBase *clockHandler; - -}; - -} -} -#endif /* _GNA_H */ diff --git a/src/sst/elements/GNA/Makefile.am b/src/sst/elements/GNA/Makefile.am deleted file mode 100644 index 5715e2901a..0000000000 --- a/src/sst/elements/GNA/Makefile.am +++ /dev/null @@ -1,38 +0,0 @@ -# -*- Makefile -*- -# -# - -AM_CPPFLAGS = \ - $(MPI_CPPFLAGS) \ - -I$(top_srcdir)/src - -compdir = $(pkglibdir) -comp_LTLIBRARIES = libGNA.la -libGNA_la_SOURCES = \ - gna_lib.h \ - neuron.h \ - sts.h \ - sts.cc \ - GNA.cc \ - GNA.h - -EXTRA_DIST = \ - tests/testsuite_default_GNA.py \ - tests/test_GNA_1.py \ - tests/refFiles/test_GNA_1.out \ - tests/test.py \ - tests/test.ref.out \ - README - - -libGNA_la_LDFLAGS = -module -avoid-version - -install-exec-hook: - $(SST_REGISTER_TOOL) SST_ELEMENT_SOURCE GNA=$(abs_srcdir) - $(SST_REGISTER_TOOL) SST_ELEMENT_TESTS GNA=$(abs_srcdir)/tests - -########################################################################## -########################################################################## -########################################################################## - - diff --git a/src/sst/elements/GNA/README b/src/sst/elements/GNA/README deleted file mode 100644 index e275bd0640..0000000000 --- a/src/sst/elements/GNA/README +++ /dev/null @@ -1,23 +0,0 @@ - - -Implementation of a 'Generic Neural Architecture' based on the STPU work (Hill et All, ICRC 2017, "A Spike-Timing Neuromorphic Architecture", 10.1109/ICRC.2017.8123631) - -General simulation process: - -0) GNA in idle state waiting for user input. If user sends EXEC command - proceed to step 1, otherwise, stay idle. -1) process any firing events currently stored in output aggregation FIFO - into neuronal matrix based on connectivity defined in specified CSE context. -2) Perform single LIF operation on neuronal matrix. -3) Collect firing events (if any) into output aggregation FIFO. -4) If more LIFs to perform or more CSEs to process, then return to step 0, - otherwise, dump aggregated firing events to host machine and return to step - 0. - - -TODO: -Set Neuron values as (variable precision) ints or floats -time delay for output consoldiation? -statistics (request q size) -switch to output, rm printf()s -make sure maxOutMem works diff --git a/src/sst/elements/GNA/gna_lib.h b/src/sst/elements/GNA/gna_lib.h deleted file mode 100755 index b6d41f4fb9..0000000000 --- a/src/sst/elements/GNA/gna_lib.h +++ /dev/null @@ -1,95 +0,0 @@ -/****************************************************************************** - Copyright 2018-2020 NTESS. Under the terms of Contract DE-NA0003525 with - NTESS, the U.S. Government retains certain rights in this software. - - Copyright (c) 2018-2020, NTESS - All rights reserved. - - Portions are copyright of other developers: - See the file CONTRIBUTORS.TXT in the top level directory - the distribution for more information. - - This file is part of the SST software package. For license - information, see the LICENSE file in the top level directory of the - distribution. - -******************************************************************************/ - -/****************************************************************************** - - Author : Jonathon W. Donaldson, Arun Rodrigues - - Project : Spiking Temporal Processing Unit (STPU) / Generic Neural Architecture (GNA) - - Description: STPU/GNA User API Library - - Special Req: - - Notes : All signals ending with _n are active low. - -******************************************************************************/ - -#ifndef _GNA_LIB_H_ /* prevent circular inclusions */ -#define _GNA_LIB_H_ /* by using protection macros */ - -//////////////////////////////////////////////////////////////////////////////// -//////////////////////////////// DATA TYPES //////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////// - -namespace Neuron_Loader_Types { - - // Neuron Configuration Transmission (NCT) (Floating-Point Format) - typedef struct { - float NrnThr; // Neuron Firing Potential - float NrnMin; // Neuron Minimum Allowed Potential - float NrnLkg; // Neuron Leakage Value - } T_NctFl; - -} // Neuron_Loader_Types - -namespace White_Matter_Types { - - // White Matter Entry (WME) Format - // AFR: Changed to uint16 - typedef struct { - uint16_t SynStr; // Synaptic Strength - uint16_t TmpOff; // Temporal Offset - uint16_t SubAdr; // Sub-Address - uint16_t Valid; // Valid Flag - } T_Wme; - -} // White_Matter_Types - -namespace Ctx_Seq_Mem_Types { - - // Context Sequence Entry (CSE) Format - typedef struct { - int CtxNum; // Context Number - uint32_t LifCnt; // LIF Count - } T_Cse; - -} // Ctx_Seq_Mem_Types - -namespace Ctrl_And_Stat_Types { - - // Brain Wave Pulse (BWP) Format (Floating-Point Format) - typedef struct { - float InpValFl; // Input Value - int InpNrn; // Input Neuron Number - int TmpSft; // Temporal Shift - } T_BwpFl; - - // Dump Tuple Format - typedef struct { - int NrnNum; // Neuron Number - int TmpIdx; // Temporal Index - } T_Dmp; - -} // Ctrl_And_Stat_Types - - -#endif // _GNA_LIB_H_ - -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////// OFFICIAL USE ONLY ///////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// diff --git a/src/sst/elements/GNA/neuron.h b/src/sst/elements/GNA/neuron.h deleted file mode 100644 index cb79ba28e7..0000000000 --- a/src/sst/elements/GNA/neuron.h +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2018-2020 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2018-2020, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _NEURON_H -#define _NEURON_H - -#include -#include "gna_lib.h" - -namespace SST { -namespace GNAComponent { - -using namespace std; - -class neuron { -public: - void configure(const Neuron_Loader_Types::T_NctFl &in) { - config = in; - } - void deliverSpike(float str, uint when) { - temporalBuffer[when] += str; - //printf(" got %f @ %d\n", str, when); - } - // performs Leaky Integrate and Fire. Returns true if fired. - bool lif(const uint now) { - // Leak - value -= config.NrnLkg; - - // Bound? - // AFR: is this right? - if (value < config.NrnMin) { - value = 0; - } - - // Integrate - value += getCurrentSpikes(now); - - // Fire? - if (value > config.NrnThr) { - value = config.NrnMin; - return true; - } else { - return false; - } - } - void setWML(uint64_t addr, uint32_t entries) { - WMLAddr = addr; - WMLLen = entries; - } - uint32_t getWMLLen() const {return WMLLen;} - uint32_t getWMLAddr() const {return WMLAddr;} -private: - Neuron_Loader_Types::T_NctFl config; - float value; - // temporal buffer - typedef map tBuf_t; - tBuf_t temporalBuffer; - // Neuron's white matter list - uint64_t WMLAddr; // start - uint32_t WMLLen; // number of entries in WML - - // get any current spike values - float getCurrentSpikes(const int now) { - tBuf_t::iterator i = temporalBuffer.find(now); - if (i != temporalBuffer.end()) { - float val = i->second; - temporalBuffer.erase(i); - //printf(" got current spike %f @ %d\n", val, now); - return val; - } else { - return 0; - } - } -}; - -} -} - -#endif // _NEURON_H diff --git a/src/sst/elements/GNA/sts.cc b/src/sst/elements/GNA/sts.cc deleted file mode 100644 index 6ec32cc62d..0000000000 --- a/src/sst/elements/GNA/sts.cc +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2018-2020 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2018-2020, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#include -#include "sts.h" -#include "GNA.h" - -using namespace SST; -using namespace SST::GNAComponent; - -void STS::assign(int neuronNum) { - const neuron *spiker = myGNA->getNeuron(neuronNum); - numSpikes = spiker->getWMLLen(); - uint64_t listAddr = spiker->getWMLAddr(); - - // for each link, request the WML structure - for (int i = 0; i < numSpikes; ++i) { - // AFR: should throttle - using namespace Interfaces; - using namespace White_Matter_Types; - SimpleMem::Request *req = - new SimpleMem::Request(SimpleMem::Request::Read, listAddr, sizeof(T_Wme)); - myGNA->readMem(req, this); - listAddr += sizeof(T_Wme); - } -} - -bool STS::isFree() { - return (numSpikes == 0); -} - -void STS::advance(uint now) { - // AFR: should throttle - while (incomingReqs.empty() == false) { - // get the request - SST::Interfaces::SimpleMem::Request *req = incomingReqs.front(); - - assert(req->cmd == SST::Interfaces::SimpleMem::Request::ReadResp); - - // deliver the spike - auto &data = req->data; - uint16_t strength = (req->data[0]<<8) + req->data[1]; - uint16_t tempOffset = (data[2]<<8) + data[3]; - uint16_t target = (data[4]<<8) + data[5]; - //printf(" gna deliver str%u to %u @ %u\n", strength, target, tempOffset+now); - myGNA->deliver(strength, target, tempOffset+now); - numSpikes--; - - incomingReqs.pop(); - delete req; - } -} - diff --git a/src/sst/elements/GNA/sts.h b/src/sst/elements/GNA/sts.h deleted file mode 100644 index e6376b1045..0000000000 --- a/src/sst/elements/GNA/sts.h +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2018-2020 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2018-2020, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _STS_H -#define _STS_H - -#ifndef __STDC_FORMAT_MACROS -#define __STDC_FORMAT_MACROS -#endif -#include -#include - -#include - -#include - -namespace SST { -namespace GNAComponent { - -//foward decl -class GNA; -class Request; - -// A Spike Transfer Structure engine - transforms a given spike by -// performing a look up and delivering spikes -class STS { - GNA *myGNA; - int stsID; - int numSpikes; // number of spikes yet to deliver - std::queue incomingReqs; -public: - STS(GNA *parent, int n) : myGNA(parent), stsID(n), numSpikes(0) {;} - bool isFree(); - void assign(int); - void advance(uint); - void returnRequest(SST::Interfaces::SimpleMem::Request *req) { - incomingReqs.push(req); - } -}; - -} -} - -#endif // _STS_H diff --git a/src/sst/elements/GNA/tests/refFiles/test_GNA_1.out b/src/sst/elements/GNA/tests/refFiles/test_GNA_1.out deleted file mode 100644 index 0b5f5276de..0000000000 --- a/src/sst/elements/GNA/tests/refFiles/test_GNA_1.out +++ /dev/null @@ -1,170 +0,0 @@ -Constructed 500 neurons with 1246 links -BWP st2001.0 to 398 -BWP st2001.0 to 193 -28 neurons fired @ 64 -78 neurons fired @ 128 -99 neurons fired @ 192 -117 neurons fired @ 256 -146 neurons fired @ 320 -170 neurons fired @ 384 -154 neurons fired @ 448 -145 neurons fired @ 512 -134 neurons fired @ 576 -121 neurons fired @ 640 -105 neurons fired @ 704 -95 neurons fired @ 768 -85 neurons fired @ 832 -66 neurons fired @ 896 -45 neurons fired @ 960 -27 neurons fired @ 1024 -11 neurons fired @ 1088 -1 neurons fired @ 1152 -Completed 105728 neuron firings -Completed 245698 spike deliveries - l1cache.default_stat : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetS_I : Accumulator : Sum.u64 = 29143; SumSQ.u64 = 29143; Count.u64 = 29143; Min.u64 = 1; Max.u64 = 1; - l1cache.stateEvent_GetS_S : Accumulator : Sum.u64 = 216553; SumSQ.u64 = 216553; Count.u64 = 216553; Min.u64 = 1; Max.u64 = 1; - l1cache.stateEvent_GetS_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetX_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetX_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetX_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetSX_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetSX_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetSX_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetSResp_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetXResp_IS : Accumulator : Sum.u64 = 29143; SumSQ.u64 = 29143; Count.u64 = 29143; Min.u64 = 1; Max.u64 = 1; - l1cache.stateEvent_GetXResp_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetXResp_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Inv_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Inv_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Inv_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Inv_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Inv_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Inv_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Inv_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInvX_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInvX_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInvX_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInvX_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInvX_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInvX_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Fetch_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Fetch_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Fetch_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Fetch_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Fetch_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Fetch_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Fetch_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInv_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInv_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInv_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInv_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInv_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInv_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInv_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInv_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_ForceInv_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_ForceInv_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_ForceInv_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_ForceInv_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_ForceInv_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_ForceInv_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_ForceInv_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_ForceInv_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLine_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLine_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLine_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLineInv_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLineInv_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLineInv_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLineResp_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLineResp_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLineResp_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_GetS : Accumulator : Sum.u64 = 29143; SumSQ.u64 = 29143; Count.u64 = 29143; Min.u64 = 1; Max.u64 = 1; - l1cache.eventSent_GetX : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_GetSX : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_PutM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_NACK : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_FlushLine : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_FlushLineInv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_FetchResp : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_FetchXResp : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_AckInv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_GetSResp : Accumulator : Sum.u64 = 245696; SumSQ.u64 = 245696; Count.u64 = 245696; Min.u64 = 1; Max.u64 = 1; - l1cache.eventSent_GetXResp : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_FlushLineResp : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_Put : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_Get : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_AckMove : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_CustomReq : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_CustomResp : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_CustomAck : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.EventStalledForLockedCacheline : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.evict_I : Accumulator : Sum.u64 = 33; SumSQ.u64 = 33; Count.u64 = 33; Min.u64 = 1; Max.u64 = 1; - l1cache.evict_S : Accumulator : Sum.u64 = 29111; SumSQ.u64 = 29111; Count.u64 = 29111; Min.u64 = 1; Max.u64 = 1; - l1cache.evict_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.evict_IS : Accumulator : Sum.u64 = 1; SumSQ.u64 = 1; Count.u64 = 1; Min.u64 = 1; Max.u64 = 1; - l1cache.evict_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.evict_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.evict_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_GetS_hit : Accumulator : Sum.u64 = 22708101; SumSQ.u64 = 4579029603; Count.u64 = 216553; Min.u64 = 2; Max.u64 = 233; - l1cache.latency_GetS_miss : Accumulator : Sum.u64 = 5980641; SumSQ.u64 = 1227342571; Count.u64 = 29143; Min.u64 = 205; Max.u64 = 235; - l1cache.latency_GetX_hit : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_GetX_miss : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_GetX_upgrade : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_GetSX_hit : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_GetSX_miss : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_GetSX_upgrade : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_FlushLine : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_FlushLine_fail : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_FlushLineInv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_FlushLineInv_fail : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSHit_Arrival : Accumulator : Sum.u64 = 98766; SumSQ.u64 = 98766; Count.u64 = 98766; Min.u64 = 1; Max.u64 = 1; - l1cache.GetXHit_Arrival : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSXHit_Arrival : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSHit_Blocked : Accumulator : Sum.u64 = 117787; SumSQ.u64 = 117787; Count.u64 = 117787; Min.u64 = 1; Max.u64 = 1; - l1cache.GetXHit_Blocked : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSXHit_Blocked : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSMiss_Arrival : Accumulator : Sum.u64 = 29142; SumSQ.u64 = 29142; Count.u64 = 29142; Min.u64 = 1; Max.u64 = 1; - l1cache.GetXMiss_Arrival : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSXMiss_Arrival : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSMiss_Blocked : Accumulator : Sum.u64 = 1; SumSQ.u64 = 1; Count.u64 = 1; Min.u64 = 1; Max.u64 = 1; - l1cache.GetXMiss_Blocked : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSXMiss_Blocked : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.CacheHits : Accumulator : Sum.u64 = 216553; SumSQ.u64 = 216553; Count.u64 = 216553; Min.u64 = 1; Max.u64 = 1; - l1cache.CacheMisses : Accumulator : Sum.u64 = 29143; SumSQ.u64 = 29143; Count.u64 = 29143; Min.u64 = 1; Max.u64 = 1; - l1cache.stateEvent_AckPut_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_PutS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_PutE : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.TotalEventsReceived : Accumulator : Sum.u64 = 274839; SumSQ.u64 = 274839; Count.u64 = 274839; Min.u64 = 1; Max.u64 = 1; - l1cache.TotalEventsReplayed : Accumulator : Sum.u64 = 117789; SumSQ.u64 = 117789; Count.u64 = 117789; Min.u64 = 1; Max.u64 = 1; - l1cache.Put_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.Get_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.AckMove_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetS_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetX_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSX_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSResp_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetXResp_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.CustomReq_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.CustomResp_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.CustomAck_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.NULLCMD_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetS_recv : Accumulator : Sum.u64 = 245696; SumSQ.u64 = 245696; Count.u64 = 245696; Min.u64 = 1; Max.u64 = 1; - l1cache.GetX_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSX_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.FlushLine_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.FlushLineInv_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSResp_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetXResp_recv : Accumulator : Sum.u64 = 29143; SumSQ.u64 = 29143; Count.u64 = 29143; Min.u64 = 1; Max.u64 = 1; - l1cache.FlushLineResp_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.Inv_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.ForceInv_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.Fetch_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.FetchInv_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.FetchInvX_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.NACK_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.AckPut_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.MSHR_occupancy : Accumulator : Sum.u64 = 27888529; SumSQ.u64 = 265664125; Count.u64 = 3261543; Min.u64 = 0; Max.u64 = 16; - l1cache.Bank_conflicts : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; -Simulation is complete, simulated time: 3.26154 ms diff --git a/src/sst/elements/GNA/tests/sweep.csh b/src/sst/elements/GNA/tests/sweep.csh deleted file mode 100755 index 3e7de876be..0000000000 --- a/src/sst/elements/GNA/tests/sweep.csh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/tcsh - -#foreach c (1 4 16 64) -foreach c (${1}) - foreach s (32 128 512 2048) - set fileN = test-${c}K-${s}.out - echo "running $fileN" - rm -f $fileN - time sst ./test.py -- -n 2500 -c $c -s $s -m $s >& $fileN & - sleep 1 - end - wait - echo "done cache = ${c}K" -end - diff --git a/src/sst/elements/GNA/tests/sweep2.csh b/src/sst/elements/GNA/tests/sweep2.csh deleted file mode 100755 index 780654641b..0000000000 --- a/src/sst/elements/GNA/tests/sweep2.csh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/tcsh - -#foreach c (16 32 64 128) -foreach c (${1}) - foreach s (32 64 128 256) - set fileN = test-${c}K-${s}.out - echo "running $fileN" - rm -f $fileN - time sst ./test.py -- -n 20000 -c $c -s $s -m 32 >& $fileN & - sleep 1 - end - wait - echo "done cache = ${c}K" -end - diff --git a/src/sst/elements/GNA/tests/test.py b/src/sst/elements/GNA/tests/test.py deleted file mode 100644 index d5c8eee446..0000000000 --- a/src/sst/elements/GNA/tests/test.py +++ /dev/null @@ -1,67 +0,0 @@ -import sst -import os -from optparse import OptionParser - -# options -op = OptionParser() -op.add_option("-n", "--neurons", action="store", type="int", dest="neurons", default=500) -# cache size in KiB -op.add_option("-c", "--cacheSz", action="store", type="int", dest="cacheSz", default=2) -#sts dispatch & parallelism -op.add_option("-s", "--STS", action="store", type="int", dest="sts", default=4) -# max memory out -op.add_option("-m", "--memOut", action="store", type="int", dest="memOut", default=4) -(options, args) = op.parse_args() - -# Define the simulation components -comp_gna = sst.Component("GNA", "GNA.GNA") -comp_gna.addParams({ - "verbose" : 1, - "neurons" : options.neurons, - "clock" : "1GHz", - "BWPperTic" : 1, - "STSDispatch" : options.sts, - "STSParallelism" : options.sts, - "MaxOutMem" : options.memOut -}) - -comp_l1cache = sst.Component("l1cache", "memHierarchy.Cache") -comp_l1cache.addParams({ - "access_latency_cycles" : "1", - "cache_frequency" : "1 Ghz", - "replacement_policy" : "lru", - "coherence_protocol" : "MSI", - "associativity" : "4", - "cache_line_size" : "64", - #"debug" : "1", - #"debug_level" : "10", - "verbose" : 0, - "L1" : "1", - "cache_size" : "%dKiB"%options.cacheSz -}) - -comp_memctrl = sst.Component("memory", "memHierarchy.MemController") -comp_memctrl.addParams({ - "debug" : 0, - "debug_level" : 10, - "backing" : "malloc", - "clock" : "1GHz", -}) -memory = comp_memctrl.setSubComponent("backend", "memHierarchy.simpleMem") -memory.addParams({ - "mem_size" : "512MiB", - "access_time" : "200 ns", -}) - -# Enable statistics -sst.setStatisticLoadLevel(7) -sst.setStatisticOutput("sst.statOutputConsole") -sst.enableAllStatisticsForComponentType("memHierarchy.Cache") - - -# Define the simulation links -link_gna_cache = sst.Link("link_gna_mem") -link_gna_cache.connect( (comp_gna, "mem_link", "1000ps"), (comp_l1cache, "high_network_0", "1000ps") ) -link_mem_bus_link = sst.Link("link_mem_bus_link") -link_mem_bus_link.connect( (comp_l1cache, "low_network_0", "50ps"), (comp_memctrl, "direct_link", "50ps") ) - diff --git a/src/sst/elements/GNA/tests/test.ref.out b/src/sst/elements/GNA/tests/test.ref.out deleted file mode 100644 index 0b5f5276de..0000000000 --- a/src/sst/elements/GNA/tests/test.ref.out +++ /dev/null @@ -1,170 +0,0 @@ -Constructed 500 neurons with 1246 links -BWP st2001.0 to 398 -BWP st2001.0 to 193 -28 neurons fired @ 64 -78 neurons fired @ 128 -99 neurons fired @ 192 -117 neurons fired @ 256 -146 neurons fired @ 320 -170 neurons fired @ 384 -154 neurons fired @ 448 -145 neurons fired @ 512 -134 neurons fired @ 576 -121 neurons fired @ 640 -105 neurons fired @ 704 -95 neurons fired @ 768 -85 neurons fired @ 832 -66 neurons fired @ 896 -45 neurons fired @ 960 -27 neurons fired @ 1024 -11 neurons fired @ 1088 -1 neurons fired @ 1152 -Completed 105728 neuron firings -Completed 245698 spike deliveries - l1cache.default_stat : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetS_I : Accumulator : Sum.u64 = 29143; SumSQ.u64 = 29143; Count.u64 = 29143; Min.u64 = 1; Max.u64 = 1; - l1cache.stateEvent_GetS_S : Accumulator : Sum.u64 = 216553; SumSQ.u64 = 216553; Count.u64 = 216553; Min.u64 = 1; Max.u64 = 1; - l1cache.stateEvent_GetS_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetX_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetX_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetX_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetSX_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetSX_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetSX_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetSResp_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetXResp_IS : Accumulator : Sum.u64 = 29143; SumSQ.u64 = 29143; Count.u64 = 29143; Min.u64 = 1; Max.u64 = 1; - l1cache.stateEvent_GetXResp_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_GetXResp_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Inv_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Inv_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Inv_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Inv_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Inv_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Inv_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Inv_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInvX_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInvX_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInvX_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInvX_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInvX_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInvX_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Fetch_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Fetch_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Fetch_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Fetch_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Fetch_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Fetch_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_Fetch_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInv_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInv_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInv_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInv_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInv_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInv_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInv_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FetchInv_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_ForceInv_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_ForceInv_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_ForceInv_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_ForceInv_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_ForceInv_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_ForceInv_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_ForceInv_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_ForceInv_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLine_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLine_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLine_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLineInv_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLineInv_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLineInv_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLineResp_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLineResp_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.stateEvent_FlushLineResp_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_GetS : Accumulator : Sum.u64 = 29143; SumSQ.u64 = 29143; Count.u64 = 29143; Min.u64 = 1; Max.u64 = 1; - l1cache.eventSent_GetX : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_GetSX : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_PutM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_NACK : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_FlushLine : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_FlushLineInv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_FetchResp : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_FetchXResp : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_AckInv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_GetSResp : Accumulator : Sum.u64 = 245696; SumSQ.u64 = 245696; Count.u64 = 245696; Min.u64 = 1; Max.u64 = 1; - l1cache.eventSent_GetXResp : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_FlushLineResp : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_Put : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_Get : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_AckMove : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_CustomReq : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_CustomResp : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_CustomAck : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.EventStalledForLockedCacheline : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.evict_I : Accumulator : Sum.u64 = 33; SumSQ.u64 = 33; Count.u64 = 33; Min.u64 = 1; Max.u64 = 1; - l1cache.evict_S : Accumulator : Sum.u64 = 29111; SumSQ.u64 = 29111; Count.u64 = 29111; Min.u64 = 1; Max.u64 = 1; - l1cache.evict_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.evict_IS : Accumulator : Sum.u64 = 1; SumSQ.u64 = 1; Count.u64 = 1; Min.u64 = 1; Max.u64 = 1; - l1cache.evict_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.evict_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.evict_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_GetS_hit : Accumulator : Sum.u64 = 22708101; SumSQ.u64 = 4579029603; Count.u64 = 216553; Min.u64 = 2; Max.u64 = 233; - l1cache.latency_GetS_miss : Accumulator : Sum.u64 = 5980641; SumSQ.u64 = 1227342571; Count.u64 = 29143; Min.u64 = 205; Max.u64 = 235; - l1cache.latency_GetX_hit : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_GetX_miss : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_GetX_upgrade : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_GetSX_hit : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_GetSX_miss : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_GetSX_upgrade : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_FlushLine : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_FlushLine_fail : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_FlushLineInv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.latency_FlushLineInv_fail : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSHit_Arrival : Accumulator : Sum.u64 = 98766; SumSQ.u64 = 98766; Count.u64 = 98766; Min.u64 = 1; Max.u64 = 1; - l1cache.GetXHit_Arrival : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSXHit_Arrival : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSHit_Blocked : Accumulator : Sum.u64 = 117787; SumSQ.u64 = 117787; Count.u64 = 117787; Min.u64 = 1; Max.u64 = 1; - l1cache.GetXHit_Blocked : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSXHit_Blocked : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSMiss_Arrival : Accumulator : Sum.u64 = 29142; SumSQ.u64 = 29142; Count.u64 = 29142; Min.u64 = 1; Max.u64 = 1; - l1cache.GetXMiss_Arrival : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSXMiss_Arrival : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSMiss_Blocked : Accumulator : Sum.u64 = 1; SumSQ.u64 = 1; Count.u64 = 1; Min.u64 = 1; Max.u64 = 1; - l1cache.GetXMiss_Blocked : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSXMiss_Blocked : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.CacheHits : Accumulator : Sum.u64 = 216553; SumSQ.u64 = 216553; Count.u64 = 216553; Min.u64 = 1; Max.u64 = 1; - l1cache.CacheMisses : Accumulator : Sum.u64 = 29143; SumSQ.u64 = 29143; Count.u64 = 29143; Min.u64 = 1; Max.u64 = 1; - l1cache.stateEvent_AckPut_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_PutS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.eventSent_PutE : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.TotalEventsReceived : Accumulator : Sum.u64 = 274839; SumSQ.u64 = 274839; Count.u64 = 274839; Min.u64 = 1; Max.u64 = 1; - l1cache.TotalEventsReplayed : Accumulator : Sum.u64 = 117789; SumSQ.u64 = 117789; Count.u64 = 117789; Min.u64 = 1; Max.u64 = 1; - l1cache.Put_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.Get_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.AckMove_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetS_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetX_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSX_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSResp_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetXResp_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.CustomReq_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.CustomResp_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.CustomAck_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.NULLCMD_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetS_recv : Accumulator : Sum.u64 = 245696; SumSQ.u64 = 245696; Count.u64 = 245696; Min.u64 = 1; Max.u64 = 1; - l1cache.GetX_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSX_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.FlushLine_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.FlushLineInv_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetSResp_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.GetXResp_recv : Accumulator : Sum.u64 = 29143; SumSQ.u64 = 29143; Count.u64 = 29143; Min.u64 = 1; Max.u64 = 1; - l1cache.FlushLineResp_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.Inv_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.ForceInv_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.Fetch_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.FetchInv_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.FetchInvX_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.NACK_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.AckPut_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - l1cache.MSHR_occupancy : Accumulator : Sum.u64 = 27888529; SumSQ.u64 = 265664125; Count.u64 = 3261543; Min.u64 = 0; Max.u64 = 16; - l1cache.Bank_conflicts : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; -Simulation is complete, simulated time: 3.26154 ms diff --git a/src/sst/elements/GNA/tests/test_GNA_1.py b/src/sst/elements/GNA/tests/test_GNA_1.py deleted file mode 100644 index d5c8eee446..0000000000 --- a/src/sst/elements/GNA/tests/test_GNA_1.py +++ /dev/null @@ -1,67 +0,0 @@ -import sst -import os -from optparse import OptionParser - -# options -op = OptionParser() -op.add_option("-n", "--neurons", action="store", type="int", dest="neurons", default=500) -# cache size in KiB -op.add_option("-c", "--cacheSz", action="store", type="int", dest="cacheSz", default=2) -#sts dispatch & parallelism -op.add_option("-s", "--STS", action="store", type="int", dest="sts", default=4) -# max memory out -op.add_option("-m", "--memOut", action="store", type="int", dest="memOut", default=4) -(options, args) = op.parse_args() - -# Define the simulation components -comp_gna = sst.Component("GNA", "GNA.GNA") -comp_gna.addParams({ - "verbose" : 1, - "neurons" : options.neurons, - "clock" : "1GHz", - "BWPperTic" : 1, - "STSDispatch" : options.sts, - "STSParallelism" : options.sts, - "MaxOutMem" : options.memOut -}) - -comp_l1cache = sst.Component("l1cache", "memHierarchy.Cache") -comp_l1cache.addParams({ - "access_latency_cycles" : "1", - "cache_frequency" : "1 Ghz", - "replacement_policy" : "lru", - "coherence_protocol" : "MSI", - "associativity" : "4", - "cache_line_size" : "64", - #"debug" : "1", - #"debug_level" : "10", - "verbose" : 0, - "L1" : "1", - "cache_size" : "%dKiB"%options.cacheSz -}) - -comp_memctrl = sst.Component("memory", "memHierarchy.MemController") -comp_memctrl.addParams({ - "debug" : 0, - "debug_level" : 10, - "backing" : "malloc", - "clock" : "1GHz", -}) -memory = comp_memctrl.setSubComponent("backend", "memHierarchy.simpleMem") -memory.addParams({ - "mem_size" : "512MiB", - "access_time" : "200 ns", -}) - -# Enable statistics -sst.setStatisticLoadLevel(7) -sst.setStatisticOutput("sst.statOutputConsole") -sst.enableAllStatisticsForComponentType("memHierarchy.Cache") - - -# Define the simulation links -link_gna_cache = sst.Link("link_gna_mem") -link_gna_cache.connect( (comp_gna, "mem_link", "1000ps"), (comp_l1cache, "high_network_0", "1000ps") ) -link_mem_bus_link = sst.Link("link_mem_bus_link") -link_mem_bus_link.connect( (comp_l1cache, "low_network_0", "50ps"), (comp_memctrl, "direct_link", "50ps") ) - diff --git a/src/sst/elements/GNA/tests/testsuite_default_GNA.py b/src/sst/elements/GNA/tests/testsuite_default_GNA.py deleted file mode 100644 index 5462637665..0000000000 --- a/src/sst/elements/GNA/tests/testsuite_default_GNA.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -*- - -from sst_unittest import * -from sst_unittest_support import * - -################################################################################ -# Code to support a single instance module initialize, must be called setUp method - -module_init = 0 -module_sema = threading.Semaphore() - -def initializeTestModule_SingleInstance(class_inst): - global module_init - global module_sema - - module_sema.acquire() - if module_init != 1: - # Put your single instance Init Code Here - module_init = 1 - module_sema.release() - -################################################################################ - -class testcase_GNA_Component(SSTTestCase): - - def initializeClass(self, testName): - super(type(self), self).initializeClass(testName) - # Put test based setup code here. it is called before testing starts - # NOTE: This method is called once for every test - - def setUp(self): - super(type(self), self).setUp() - initializeTestModule_SingleInstance(self) - # Put test based setup code here. it is called once before every test - - def tearDown(self): - # Put test based teardown code here. it is called once after every test - super(type(self), self).tearDown() - -##### - - def test_GNA_1(self): - self.GNA_test_template("1") - -##### - - def GNA_test_template(self, testcase): - # Note: testcase param is ignored for now - # Get the path to the test files - test_path = self.get_testsuite_dir() - outdir = self.get_test_output_run_dir() - tmpdir = self.get_test_output_tmp_dir() - - # Set the various file paths - testDataFileName="test_GNA_{0}".format(testcase) - - sdlfile = "{0}/{1}.py".format(test_path, testDataFileName) - reffile = "{0}/refFiles/{1}.out".format(test_path, testDataFileName) - outfile = "{0}/{1}.out".format(outdir, testDataFileName) - errfile = "{0}/{1}.err".format(outdir, testDataFileName) - mpioutfiles = "{0}/{1}.testfile".format(outdir, testDataFileName) - - self.run_sst(sdlfile, outfile, errfile, mpi_out_files=mpioutfiles) - - testing_remove_component_warning_from_file(outfile) - - # NOTE: THE PASS / FAIL EVALUATIONS ARE PORTED FROM THE SQE BAMBOO - # BASED testSuite_XXX.sh THESE SHOULD BE RE-EVALUATED BY THE - # DEVELOPER AGAINST THE LATEST VERSION OF SST TO SEE IF THE - # TESTS & RESULT FILES ARE STILL VALID - - # Perform the tests - self.assertFalse(os_test_file(errfile, "-s"), "GNA test {0} has Non-empty Error File {1}".format(testDataFileName, errfile)) - - cmp_result = testing_compare_sorted_diff(testcase, outfile, reffile) - self.assertTrue(cmp_result, "Output file {0} does not match Reference File {1}".format(outfile, reffile)) - - - diff --git a/src/sst/elements/Opal/Makefile.am b/src/sst/elements/Opal/Makefile.am deleted file mode 100644 index e618f8a6ae..0000000000 --- a/src/sst/elements/Opal/Makefile.am +++ /dev/null @@ -1,55 +0,0 @@ -AM_CPPFLAGS = \ - $(MPIT_CPPFLAGS) - -AM_LDFLAGS = -lm -compdir = $(pkglibdir) -comp_LTLIBRARIES = libOpal.la - -libOpal_la_SOURCES = \ - mempool.h \ - mempool.cc \ - Opal.cc \ - Opal.h \ - Opal_Event.h \ - arielmemmgr_opal.cc \ - arielmemmgr_opal.h \ - opalMemNIC.cc \ - opalMemNIC.h \ - PageFaultHandler.cc \ - PageFaultHandler.h - - -libOpal_la_CPPFLAGS = \ - -I$(top_srcdir)/src \ - $(SST_CXX0X_FLAGS) -fPIC -Wall \ - $(MPI_CPPFLAGS) - -libOpal_la_LDFLAGS = \ - -avoid-version - -libOpal_la_LIBADD = \ - $(SST_SYSTEMC_LIB) - -# -# EXTRA_DIST = \ -# tests/gupsgen_2RANKS.py \ -# tests/gupsgen_fastNVM.py \ -# tests/gupsgen.py \ -# tests/stencil3dbench_messier.py \ -# tests/streambench_messier.py - -#noinst_PROGRAMS = infogather -#infogather_SOURCES = infogather.cc -#infogather_CPPFLAGS = -I/usr/local/systemc-2.3/include \ -# -std=c++11 -fPIC -Wall \ -# # $(BOOST_CPPFLAGS) \ -# # $(MPI_CPPFLAGS) -# #infogather_LDFLAGS = -L/usr/local/systemc-2.3/lib-linux64/ -lsystemc \ -# # -lhmcsim \ -# # -avoid-version -# #infogather_LDADD = libhmcsim.so -# - -install-exec-hook: - $(SST_REGISTER_TOOL) SST_ELEMENT_SOURCE Opal=$(abs_srcdir) - $(SST_REGISTER_TOOL) SST_ELEMENT_TESTS Opal=$(abs_srcdir)/tests diff --git a/src/sst/elements/Opal/Opal.cc b/src/sst/elements/Opal/Opal.cc deleted file mode 100644 index 2b0ef42ee0..0000000000 --- a/src/sst/elements/Opal/Opal.cc +++ /dev/null @@ -1,564 +0,0 @@ -// Copyright 2009-2020 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2020, NTESS -// All rights reserved. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. -// - - -// -/* Author: Amro Awad - * E-mail: aawad@sandia.gov - */ -/* Author: Vamsee Reddy Kommareddy - * E-mail: vamseereddy@knights.ucf.edu - */ - - -#include -#include "Opal.h" - -#include -#include - -using namespace SST::Interfaces; -using namespace SST; -using namespace SST::OpalComponent; - - -#define OPAL_VERBOSE(LEVEL, OUTPUT) if(verbosity >= (LEVEL)) OUTPUT - - -Opal::Opal(SST::ComponentId_t id, SST::Params& params): Component(id) { - - - registerAsPrimaryComponent(); - - verbosity = (uint32_t) params.find("verbose", 1); - output = new SST::Output("OpalComponent[@f:@l:@p] ", verbosity, 0, SST::Output::STDOUT); - - max_inst = (uint32_t) params.find("max_inst", 1); - num_nodes = (uint32_t) params.find("num_nodes", 1); - nodeInfo = new NodePrivateInfo*[num_nodes]; - num_cores = 0; - num_memCntrls = 0; - - cycles = 0; - - opalBase = new OpalBase(); - - char* buffer = (char*) malloc(sizeof(char) * 256); - - /* Configuring shared memory */ - /*----------------------------------------------------------------------------------------*/ - num_shared_mempools = params.find("shared_mempools", 0); - std::cerr << getName().c_str() << "Number of Shared Memory Pools: "<< num_shared_mempools << endl; - - Params sharedMemParams = params.find_prefix_params("shared_mem."); - shared_mem_size = 0; - - sharedMemoryInfo = new MemoryPrivateInfo*[num_shared_mempools]; - - for(uint32_t i = 0; i < num_shared_mempools; i++) { - memset(buffer, 0 , 256); - sprintf(buffer, "mempool%" PRIu32 ".", i); - Params memPoolParams = sharedMemParams.find_prefix_params(buffer); - sharedMemoryInfo[i] = new MemoryPrivateInfo(opalBase, i, memPoolParams); - std::cerr << getName().c_str() << "Configuring Shared " << buffer << std::endl; - shared_mem_size += memPoolParams.find("size", 0); - memset(buffer, 0 , 256); - sprintf(buffer, "globalMemCntrLink%" PRIu32, i); - sharedMemoryInfo[i]->link = configureLink(buffer, "1ns", new Event::Handler((sharedMemoryInfo[i]), &MemoryPrivateInfo::handleRequest)); - } - - /* Configuring nodes */ - /*----------------------------------------------------------------------------------------*/ - for(uint32_t i = 0; i < num_nodes; i++) { - memset(buffer, 0 , 256); - sprintf(buffer, "node%" PRIu32 ".", i); - Params nodePrivateParams = params.find_prefix_params(buffer); - nodeInfo[i] = new NodePrivateInfo(opalBase, i, nodePrivateParams); - for(uint32_t j=0; jcores; j++) { - memset(buffer, 0 , 256); - sprintf(buffer, "coreLink%" PRIu32, num_cores); - nodeInfo[i]->coreInfo[j].coreLink = configureLink(buffer, "1ns", new Event::Handler((&nodeInfo[i]->coreInfo[j]), &CorePrivateInfo::handleRequest)); - memset(buffer, 0 , 256); - sprintf(buffer, "mmuLink%" PRIu32, num_cores); - nodeInfo[i]->coreInfo[j].mmuLink = configureLink(buffer, "1ns", new Event::Handler((&nodeInfo[i]->coreInfo[j]), &CorePrivateInfo::handleRequest)); - num_cores++; - } - for(uint32_t j=0; jmemory_cntrls; j++) { - memset(buffer, 0 , 256); - sprintf(buffer, "memCntrLink%" PRIu32, num_memCntrls); - nodeInfo[i]->memCntrlInfo[j].link = configureLink(buffer, "1ns", new Event::Handler((&nodeInfo[i]->memCntrlInfo[j]), &MemoryPrivateInfo::handleRequest)); - num_memCntrls++; - } - - char* subID = (char*) malloc(sizeof(char) * 32); - sprintf(subID, "%" PRIu32, i); - nodeInfo[i]->statLocalMemUsage = registerStatistic("local_mem_usage", subID ); - nodeInfo[i]->statSharedMemUsage = registerStatistic("shared_mem_usage", subID ); - free(subID); - } - - free(buffer); - - /* registering clock */ - /*----------------------------------------------------------------------------------------*/ - std::string cpu_clock = params.find("clock", "1GHz"); - std::cerr << "clock: "<< cpu_clock.c_str() << std::endl; - registerClock( cpu_clock, new Clock::Handler(this, &Opal::tick ) ); -} - - - -Opal::Opal() : Component(-1) -{ - // for serialization only - // -} - - -void Opal::setNextMemPool( int node, int fault_level ) -{ - switch(nodeInfo[node]->memoryAllocationPolicy) - { - case 8: - //alternate allocation policy 1:16 - nodeInfo[node]->nextallocmem = ( nodeInfo[node]->nextallocmem + 1 ) % 17; - nodeInfo[node]->allocatedmempool = nodeInfo[node]->nextallocmem; - break; - - case 7: - //alternate allocation policy 1:8 - nodeInfo[node]->nextallocmem = ( nodeInfo[node]->nextallocmem + 1 ) % 9; - nodeInfo[node]->allocatedmempool = nodeInfo[node]->nextallocmem; - break; - - case 6: - //alternate allocation policy 1:4 - nodeInfo[node]->nextallocmem = ( nodeInfo[node]->nextallocmem + 1 ) % 5; - nodeInfo[node]->allocatedmempool = nodeInfo[node]->nextallocmem; - break; - - case 5: - //alternate allocation policy 1:2 - nodeInfo[node]->nextallocmem = ( nodeInfo[node]->nextallocmem + 1 ) % 3; - nodeInfo[node]->allocatedmempool = nodeInfo[node]->nextallocmem; - break; - - case 4: - //random allocation policy - nodeInfo[node]->allocatedmempool = rand() % ( num_shared_mempools + 1 ); - break; - - case 3: - //proportional allocation policy - nodeInfo[node]->nextallocmem = ( nodeInfo[node]->nextallocmem + 1 ) % ( shared_mem_size/nodeInfo[node]->memory_size + 1 ); - nodeInfo[node]->allocatedmempool = nodeInfo[node]->nextallocmem ? ( nodeInfo[node]->allocatedmempool + 1 ) % ( num_shared_mempools + 1) - ? ( nodeInfo[node]->allocatedmempool + 1 ) % ( num_shared_mempools + 1) : 1 : 0; - - break; - - case 2: - //round robin allocation policy - nodeInfo[node]->nextallocmem = ( nodeInfo[node]->nextallocmem + 1 ) % ( num_shared_mempools + 1 ); - nodeInfo[node]->allocatedmempool = nodeInfo[node]->nextallocmem; - break; - - case 1: - //alternate allocation policy - if( nodeInfo[node]->allocatedmempool != 0) { - nodeInfo[node]->allocatedmempool = 0; - } else { - nodeInfo[node]->nextallocmem = ( nodeInfo[node]->nextallocmem + 1 ) % ( num_shared_mempools + 1 ) - ? ( nodeInfo[node]->nextallocmem + 1 ) % ( num_shared_mempools + 1 ) : 1; - nodeInfo[node]->allocatedmempool = nodeInfo[node]->nextallocmem; - } - break; - - case 0: - default: - //local memory first - nodeInfo[node]->allocatedmempool = 0; - break; - - } - -} - -void Opal::processHint(int node, int fileId, uint64_t vAddress, int size) -{ - - std::map*, std::vector* > >::iterator fileIdHint = opalBase->mmapFileIdHints.find(fileId); - - //fileId is already registered by another node - if( fileIdHint != opalBase->mmapFileIdHints.end() ) - { - //search for nodeId - std::vector* it = (fileIdHint->second).first; - auto it1 = std::find(it->begin(), it->end(), node); - if( it1 != it->end() ) - { - std::cerr << "Memory is already allocated for fileId: " << fileIdHint->first << " in the same node" << std::endl; - } - else - { - int owner_node = it->back(); - if( nodeInfo[owner_node]->page_size != nodeInfo[node]->page_size) - output->fatal(CALL_INFO, -1, "Opal: Page sizes of the owner node which reserved space with fileId: %d and the requesting node are not same\n",fileId); - - it->push_back(node); - //(fileIdHint->second).first = it; - nodeInfo[node]->reservedSpace.insert(std::make_pair(vAddress/4096, std::make_pair(fileId, std::make_pair( ceil(size/(nodeInfo[node]->page_size)), 0)))); - } - } - else - { - std::vector *it = new std::vector; - std::vector *pa = new std::vector; - - it->push_back(node); - opalBase->mmapFileIdHints.insert(std::make_pair(fileId, std::make_pair( it, pa ))); - nodeInfo[node]->reservedSpace.insert(std::make_pair(vAddress/4096, std::make_pair(fileId, std::make_pair( ceil(size/(nodeInfo[node]->page_size)), 0)))); - - } -} - -REQRESPONSE Opal::isAddressReserved(int node, uint64_t vAddress) -{ - REQRESPONSE response; - response.status = 0; - - - for (std::map > >::iterator it= (nodeInfo[node]->reservedSpace).begin(); it!=(nodeInfo[node]->reservedSpace).end(); ++it) - { - uint64_t reservedVAddress = it->first; - int pages_reserved = (it->second).second.first; - if(reservedVAddress <= vAddress && vAddress < reservedVAddress + pages_reserved*nodeInfo[node]->page_size) { - response.status = 1; - response.address = reservedVAddress; - } - } - - return response; -} - -REQRESPONSE Opal::allocateSharedMemory(int node, int coreId, uint64_t vAddress, int fault_level, int pages) -{ - REQRESPONSE response; - response.status = 0; - - int sharedMemPoolId; - - if(nodeInfo[node]->memoryAllocationPolicy) { - - sharedMemPoolId = nodeInfo[node]->allocatedmempool - 1; - - } - else { - - for(uint32_t i = 0; ipool->available_frames >= pages ) - { - Pool *pool = sharedMemoryInfo[i]->pool; - for(int j=0; jallocate_frame(1); - if(!response.status) - output->fatal(CALL_INFO, -1, "Opal: Allocating shared memory. This should never happen\n"); - - nodeInfo[node]->profileEvent(SST::OpalComponent::MemType::SHARED); - } - - response.pages = pages; - response.status = 1; - break; - } - } - - if(!response.status) - output->fatal(CALL_INFO, -1, "Opal(%s): Memory is drained out\n",getName().c_str()); - - return response; - } - - if( sharedMemoryInfo[sharedMemPoolId]->pool->available_frames >= pages ) { - Pool *pool = sharedMemoryInfo[sharedMemPoolId]->pool; - for(int j=0; jallocate_frame(1); - if(!response.status) - output->fatal(CALL_INFO, -1, "Opal: Allocating shared memory. This should never happen\n"); - - nodeInfo[node]->profileEvent(SST::OpalComponent::MemType::SHARED); - } - - setNextMemPool( node,fault_level ); - response.pages = pages; - response.status = 1; - } - else - { - for(uint32_t i = 0; i < num_shared_mempools; i++) - { - setNextMemPool(node,fault_level); - if( !nodeInfo[node]->allocatedmempool ) // skip local memory - setNextMemPool( node,fault_level ); - - sharedMemPoolId = nodeInfo[node]->allocatedmempool - 1; - - if( sharedMemoryInfo[sharedMemPoolId]->pool->available_frames >= pages ) { - Pool *pool = sharedMemoryInfo[sharedMemPoolId]->pool; - for(int j=0; jallocate_frame(1); - if(!response.status) - output->fatal(CALL_INFO, -1, "Opal: Allocating shared memory. This should never happen\n"); - - nodeInfo[node]->profileEvent(SST::OpalComponent::MemType::SHARED); - } - - setNextMemPool( node,fault_level ); - response.pages = pages; - response.status = 1; - break; - } - } - - if(!response.status) - output->fatal(CALL_INFO, -1, "Opal: Memory is drained out\n"); - - } - - return response; -} - -REQRESPONSE Opal::allocateLocalMemory(int node, int coreId, uint64_t vAddress, int fault_level, int pages) -{ - - REQRESPONSE response; - response.status = 0; - - - if(nodeInfo[node]->pool->available_frames >= pages) { - Pool *pool = nodeInfo[node]->pool; - for(int i=0; iallocate_frame(1); - if(!response.status) - output->fatal(CALL_INFO, -1, "Opal: Allocating local memory. This should never happen\n"); - - nodeInfo[node]->profileEvent(SST::OpalComponent::MemType::LOCAL); - } - - response.pages = pages; - response.status = 1; - setNextMemPool( node,fault_level ); - } - else { - OPAL_VERBOSE(8, output->verbose(CALL_INFO, 8, 0, "Node%" PRIu32 " Local Memory is drained out\n", node)); - - setNextMemPool( node,fault_level ); - response = allocateSharedMemory(node, coreId, vAddress, fault_level, pages); - } - - - return response; - -} - -REQRESPONSE Opal::allocateFromReservedMemory(int node, uint64_t reserved_vAddress, uint64_t vAddress, int pages) -{ - REQRESPONSE response; - response.status = 0; - - int fileID = nodeInfo[node]->reservedSpace[reserved_vAddress].first; - int pages_reserved = nodeInfo[node]->reservedSpace[reserved_vAddress].second.first; - int pages_used = nodeInfo[node]->reservedSpace[reserved_vAddress].second.second; - - std::vector *reserved_pAddress = opalBase->mmapFileIdHints[fileID].second; - - //Allocate all the pages. TODO: pages can be reserved on demand instead of allocating all the pages at a time. But what if the memory is drained out. - if(reserved_pAddress->empty()) { - - for(uint32_t i = 0; ipool->available_frames >= pages ) { - Pool *pool = sharedMemoryInfo[i]->pool; - for(int j=0; jallocate_frame(1); - reserved_pAddress->push_back( response.address ); - - if(!response.status) - output->fatal(CALL_INFO, -1, "Opal: Allocating reserved memory. This should never happen\n"); - } - - response.pages = pages; - response.status = 1; - break; - } - - } - - if(!response.status) - output->fatal(CALL_INFO, -1, "Opal: memory not available to allocate memory for file ID: %d \n", fileID); - - } - - if( pages_used + pages <= pages_reserved ) - { - - auto it = reserved_pAddress->begin(); - std::advance(it, pages_used); - response.address = *it; - response.pages = pages; - response.status = 1; - nodeInfo[node]->reservedSpace[reserved_vAddress].second.second += pages; - - } - else - { - output->fatal(CALL_INFO, -1, "Opal: address :%llu requested with fileId:%d has no space left\n", vAddress, fileID); - } - - return response; -} - -bool Opal::processRequest(int node, int coreId, uint64_t vAddress, int fault_level, int size) -{ - - REQRESPONSE response; - response.status = 0; - - int pages = ceil(size/(nodeInfo[node]->page_size)); - - // If multiple pages are requested how are the physical addresses sent to the requester as in future sue to opal parallelization continuous addresses cannot be allocated - if(pages != 1) - output->fatal(CALL_INFO, -1, "Opal: currently opal does not support multiple page allocations\n"); - - // if the page fault request is for CR3 register allocate the memory from local memory - if(4 == fault_level) - response = allocateLocalMemory(node, coreId, vAddress, fault_level, pages); - else - { - - // check if memory is to be allocated from the reserved address space - response = isAddressReserved(node, vAddress); - - if( response.status ) - response = allocateFromReservedMemory(node, response.address, vAddress, pages); - - else { - if( !nodeInfo[node]->allocatedmempool ) { - response = allocateLocalMemory(node, coreId, vAddress, fault_level, pages); - //std::cerr << getName() << " Node: " << node << " core " << coreId << " response page address: " << vAddress << " allocated local address: " << response.address << " pages: "<< pages << " level: " << fault_level << std::endl; - } - else { - response = allocateSharedMemory(node, coreId, vAddress, fault_level, pages); - //std::cerr << getName() << " Node: " << node << " core " << coreId << " response page address: " << vAddress << " allocated shared address: " << response.address << " pages: " << " level: " << fault_level << std::endl; - } - } - } - - if( response.status ) { - OpalEvent *tse = new OpalEvent(EventType::RESPONSE); - tse->setResp(vAddress, response.address, response.pages*nodeInfo[node]->page_size); - tse->setCoreId(coreId); - nodeInfo[node]->coreInfo[coreId].mmuLink->send(tse); - } - else - output->fatal(CALL_INFO, -1, "Opal(%s): Memory is drained out\n",getName().c_str()); - - return true; - -} - - -bool Opal::tick(SST::Cycle_t x) -{ - cycles++; - - int inst_served = 0; - while(!opalBase->requestQ.empty()) { - if(inst_served < max_inst) { - OpalEvent *ev = opalBase->requestQ.front(); - bool removeEvent = true; - - switch(ev->getType()) { - case SST::OpalComponent::EventType::HINT: - { - std::cerr << getName().c_str() << " node: " << ev->getNodeId() << " core: "<< ev->getCoreId() << " request page address: " << ev->getAddress() << " hint" << std::endl; - } - break; - - case SST::OpalComponent::EventType::MMAP: - { - OPAL_VERBOSE(8, output->verbose(CALL_INFO, 8, 0, "Node%" PRIu32 " Opal has received an MMAP CALL\n", ev->getNodeId())); - std::cerr << "MLM mmap(" << ev->getFileId()<< ") : level "<< ev->getHint() << " Starting address is "<< std::hex << ev->getAddress(); - std::cerr << std::dec << " Size: "<< ev->getSize(); - std::cerr << " Ending address is " << std::hex << ev->getAddress() + ev->getSize() - 1; - std::cerr << std::dec << std::endl; - //size should be in the multiple of page size (4096) from ariel core - //processHint(ev->getNodeId(), ev->getFileId(), ev->getAddress(), ev->getSize()); - } - break; - - case SST::OpalComponent::EventType::UNMAP: - { - std::cerr << getName().c_str() << " node: " << ev->getNodeId() << " core: "<< ev->getCoreId() << " request page address: " << ev->getAddress() << " unmap"<< std::endl; - OPAL_VERBOSE(8, output->verbose(CALL_INFO, 8, 0, "Node%" PRIu32 " Opal has received an UNMAP CALL\n", ev->getNodeId())); - } - break; - - case SST::OpalComponent::EventType::REQUEST: - { - removeEvent = processRequest(ev->getNodeId(), ev->getCoreId(), ev->getAddress(), ev->getFaultLevel(), ev->getSize()); - } - break; - - default: - output->fatal(CALL_INFO, -1, "%s, Error - Unknown request\n", getName().c_str()); - break; - - } - - if(!removeEvent) { - break; - } - - opalBase->requestQ.pop(); - delete ev; - inst_served++; - } - else { - output->verbose(CALL_INFO, 2, 0, "%s, number of requests served has reached maximum width in the given time slot \n", getName().c_str()); - break; - } - } - - return false; -} - - -void Opal::finish() -{ - for(uint32_t i = 0; i < num_nodes; i++ ) - nodeInfo[i]->pool->finish(); - - for(uint32_t i = 0; i < num_shared_mempools; i++ ) - sharedMemoryInfo[i]->pool->finish(); - -} - -void Opal::deallocateSharedMemory(uint64_t page, int N) -{ - for(uint32_t sm=0; smcontains(page)) { - sharedMemoryInfo[sm]->pool->deallocate_frame(page, 1); - break; - } -} - diff --git a/src/sst/elements/Opal/Opal.h b/src/sst/elements/Opal/Opal.h deleted file mode 100644 index 01dba6a5f1..0000000000 --- a/src/sst/elements/Opal/Opal.h +++ /dev/null @@ -1,388 +0,0 @@ -// Copyright 2009-2020 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2020, NTESS -// All rights reserved. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. -// - - -/* Author: Amro Awad - * E-mail: amro.awad@ucf.edu - */ -/* Author: Vamsee Reddy Kommareddy - * E-mail: vamseereddy@knights.ucf.edu - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "Opal_Event.h" -#include "mempool.h" - -using namespace SST; - -using namespace SST::OpalComponent; - - -namespace SST -{ - namespace OpalComponent - { - - class OpalBase { - - public: - OpalBase() { } - - ~OpalBase() { - - while( !requestQ.empty() ) { - delete requestQ.front(); - requestQ.pop(); - } - - std::map*, std::vector* > >::iterator it; - for(it=mmapFileIdHints.begin(); it!=mmapFileIdHints.end(); it++){ - delete (it->second).second; - delete (it->second).first; - } - } - - std::queue requestQ; // stores page fault requests, hints and shootdown acknowledgement events from all the cores - - std::map*, std::vector* > > mmapFileIdHints; // used to store reserved memory which is useful for inter-node communication - }; - - class MemoryPrivateInfo - { - public: - - OpalBase *opalBase; - - int nodeId; - - int memContrlId; - - SST::Link * link; - - unsigned int latency; - - Pool* pool; - - MemoryPrivateInfo() { } - - MemoryPrivateInfo(OpalBase *base, uint32_t _id, Params params) - { - memContrlId = _id; - opalBase = base; - latency = (uint32_t) params.find("latency", 1); - pool = new Pool(params, SST::OpalComponent::MemType::SHARED, _id); - } - - ~MemoryPrivateInfo() { - delete pool; - } - - void setOwner(OpalBase *base) { opalBase = base; } - - void handleRequest(SST::Event* e) - { - OpalEvent *ev = static_cast (e); - ev->setMemContrlId(memContrlId); - delete ev; // delete event from memory controller for now - } - - bool contains(uint64_t page) - { - return ((pool->start <= page) && (page < pool->start + pool->num_frames*pool->frsize)) ? true : false; - } - }; - - class CorePrivateInfo - { - public: - - OpalBase *opalBase; - - int nodeId; - - int coreId; - - uint64_t cr3; - - SST::Link * coreLink; - - SST::Link * mmuLink; - - float ipc; - - CorePrivateInfo() { } - - ~CorePrivateInfo() { } - - unsigned int latency; - - void setOwner(OpalBase *base) { opalBase = base; } - - void handleRequest(SST::Event* e) - { - OpalEvent *ev = static_cast (e); - ev->setNodeId(nodeId); - ev->setCoreId(coreId); - opalBase->requestQ.push(ev); - } - - }; - - class NodePrivateInfo - { - public: - - OpalBase *opalBase; - - uint32_t clock; // clock rate of Opal - - uint32_t node_num; // stores the node number of this node - - uint32_t cores; // stores number of cores in this node - - uint32_t memory_cntrls; // stores number of memory controllers available in this node - - uint32_t latency; // latency to communicate with Core, MMU and Memory controller units - - CorePrivateInfo *coreInfo; // stores core specific information of this node - - MemoryPrivateInfo *memCntrlInfo; // stores memory controller information of this node - - uint32_t memoryAllocationPolicy; // used for deciding memory allocation policies - - int nextallocmem; // stores next memory pool to allocate memory from - - int allocatedmempool; // used to store current allocated memory pool - - Pool* pool; // local memory pool which maintains memory utilization - allocated and free pages - - uint64_t page_size; // page size of the node in KB's - - uint64_t memory_size; // local memory size in terms of number of pages - - uint64_t num_pages; // number of pages in local memory - - uint32_t pages_available; // used to check number of pages free in local memory - - std::map > > reservedSpace; // stores pages that are reserved by nodes. these can be shared by other nodes for inter-node communication. fileds: virtual address, fileId, size - - Statistic* statLocalMemUsage; - Statistic* statSharedMemUsage; - - NodePrivateInfo(OpalBase *base, uint32_t node, Params params) - { - opalBase = base; - node_num = node; - clock = (uint32_t) params.find("clock", 2000); // in MHz - cores = (uint32_t) params.find("cores", 1); - memory_cntrls = (uint32_t) params.find("memory_cntrls", 1); - latency = (uint32_t) params.find("latency", 2000); //2us - - memoryAllocationPolicy = (uint32_t) params.find("allocation_policy", 0); - nextallocmem = 0; - allocatedmempool = 0; - - pool = new Pool((Params) params.find_prefix_params("memory."), SST::OpalComponent::MemType::LOCAL, node); - memory_size = (uint32_t) params.find("memory.size", 1); // in KB's - page_size = (uint32_t) params.find("memory.frame_size", 4); - page_size = page_size * 1024; - - coreInfo = new CorePrivateInfo[cores]; - for(uint32_t i=0; iaddData(1); - } - else{ - statSharedMemUsage->addData(1); - } - } - - }; - - class Opal : public SST::Component - { - public: - - Opal( SST::ComponentId_t id, SST::Params& params); - - void setup() { }; - - void finish(); - - bool tick(SST::Cycle_t x); - - void setNextMemPool( int node,int fault_level ); - - REQRESPONSE allocateLocalMemory(int node, int coreId, uint64_t vAddress, int fault_level, int pages); - - REQRESPONSE allocateSharedMemory(int node, int coreId, uint64_t vAddress, int fault_level, int pages); - - REQRESPONSE allocateFromReservedMemory(int node, uint64_t reserved_vAddress, uint64_t vAddress, int pages); - - REQRESPONSE isAddressReserved(int node, uint64_t vAddress); - - bool processRequest(int node, int coreId, uint64_t vAddress, int fault_level, int size); - - void processHint(int node, int fileId, uint64_t vAddress, int size); - - void deallocateSharedMemory(uint64_t page, int N); - - ~Opal() { - for(uint32_t i=0; i -#include -#include <../memHierarchy/memEvent.h> - -#include -#include -#include - -using namespace SST; - - -namespace SST{ namespace OpalComponent{ - - enum EventType { HINT, MMAP, REQUEST, RESPONSE, UNMAP, UMAPACK, SHOOTDOWN, REMAP, SDACK, ARIEL_ENABLED, HALT, PAGE_REFERENCE, PAGE_REFERENCE_END, IPC_INFO }; - enum MemType { LOCAL, SHARED }; - enum MemTech { DRAM, NVM, HBM, HMC, SCRATCHPAD, BURSTBUFFER}; - -// **************** Important ***************** -// Levels hints are: 0 for DRAM -// 1: NVM -// 2: HBM -// 3: HMC -// 4: SCRATCHPAD -// 5: BURSTBUFFER - - // Thie defines a class for events of Opal - class OpalEvent : public SST::Event - { - - private: - OpalEvent() {} // For serialization - - EventType ev; - uint64_t address; - uint64_t paddress; - int faultLevel; - int size; // to redcue packet size, size valiable we used size variable for multiple pusrposes. 1. size of the page fault. 2. shoowdownID. 3. local and global page reference - uint32_t nodeId; - uint32_t coreId; - MemType memType; - int hint; - int fileId; - int memContrlId; - bool invalidate; - - public: - - OpalEvent(EventType y) : SST::Event() - { ev = y; memType = SST::OpalComponent::MemType::LOCAL; invalidate = false;} - - OpalEvent(EventType y, const uint32_t level, const uint64_t virtualAddress, const uint64_t size_, const uint32_t thread) : SST::Event() - { - ev = y; - memType = SST::OpalComponent::MemType::LOCAL; - invalidate = false; - size =size_; - faultLevel = level; - address = virtualAddress; - coreId = thread; - } - - void setType(int ev1) { ev = static_cast(ev1);} - int getType() { return ev; } - - void setMemType(int mtype) { memType = static_cast(mtype);} - MemType getMemType() { return memType; } - - void setNodeId(uint32_t id) { nodeId = id; } - uint32_t getNodeId() { return nodeId; } - - void setCoreId(uint32_t id) { coreId = id; } - uint32_t getCoreId() { return coreId; } - - void setResp(uint64_t add, uint64_t padd, int sz) { address = add; paddress = padd; size = sz;} - - void setAddress(uint64_t add) { address = add; } - uint64_t getAddress() { return address; } - - void setPAddress(uint64_t add) { paddress = add; } - uint64_t getPaddress() { return paddress; } - - void setSize(int size_) { size = size_; } - int getSize() { return size; } - - void setFaultLevel(int level) { faultLevel = level; } - int getFaultLevel() { return faultLevel; } - - void setInvalidate() { invalidate = true; } - bool getInvalidate() { return invalidate; } - - void setFileId(int id) { fileId = id; } - int getFileId() { return fileId; } - - void setHint(int x) { hint = x; } - int getHint() { return hint; } - - void setMemContrlId(int id) { memContrlId = id; } - int getMemContrlId() { return memContrlId; } - - void serialize_order(SST::Core::Serialization::serializer &ser) override { - Event::serialize_order(ser); - ser & ev; - ser & address; - ser & paddress; - ser & faultLevel; - ser & size; - ser & nodeId; - ser & coreId; - ser & memType; - ser & hint; - ser & fileId; - ser & memContrlId; - ser & invalidate; - } - - - ImplementSerializable(OpalEvent); - - }; - - -}} - -#endif - diff --git a/src/sst/elements/Opal/PageFaultHandler.cc b/src/sst/elements/Opal/PageFaultHandler.cc deleted file mode 100644 index a05938e170..0000000000 --- a/src/sst/elements/Opal/PageFaultHandler.cc +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2009-2020 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2020, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -/* Author: Vamsee Reddy Kommareddy - * E-mail: vamseereddy8@knights.ucf.edu - */ - -#include - -#include "PageFaultHandler.h" -#include "Opal_Event.h" - -#include - - -using namespace SST::OpalComponent; - -PageFaultHandler::PageFaultHandler(ComponentId_t id, Params& params) : - SambaComponent::PageFaultHandler(id, params) { - - // Find links - std::string linkprefix = "opal_link_"; - std::string linkname = linkprefix + "0"; - int numPorts = 0; - - std::string latency = params.find("opal_latency", "32ps"); - - while (isPortConnected(linkname)) { - SST::Link* link = configureLink(linkname, latency, new Event::Handler(this, &PageFaultHandler::handleEvent)); - opalLink.push_back(link); - numPorts++; - linkname = linkprefix + std::to_string(numPorts); - } -} - - -PageFaultHandler::~PageFaultHandler() { -} - - -void PageFaultHandler::handleEvent(SST::Event *event) { - OpalEvent * ev = static_cast(event); - output->verbose(CALL_INFO, 4, 0, "Core %" PRIu32 " handling opal page fault event\n", ev->getCoreId()); - - PageFaultHandlerPacket pkt; - - switch(ev->getType()) { - case SST::OpalComponent::EventType::RESPONSE: - pkt.action = PageFaultHandlerAction::RESPONSE; - break; - default: - output->fatal(CALL_INFO, -4, "Opal event interrupt to core: %" PRIu32 " was not valid.\n", ev->getCoreId()); - } - - pkt.vAddress = ev->getAddress(); - pkt.pAddress = ev->getPaddress(); - pkt.size = 4096; - (*(pageFaultHandlerInterface[ev->getCoreId()]))(pkt); - - delete ev; -} - -void PageFaultHandler::allocatePage(const uint32_t thread, const uint32_t level, const uint64_t virtualAddress, const uint64_t size) { - OpalEvent * tse = new OpalEvent(OpalComponent::EventType::REQUEST); - tse->setResp(virtualAddress, 0, size); - tse->setFaultLevel(level); - opalLink[thread]->send(tse); - -} - diff --git a/src/sst/elements/Opal/PageFaultHandler.h b/src/sst/elements/Opal/PageFaultHandler.h deleted file mode 100644 index bca280cab7..0000000000 --- a/src/sst/elements/Opal/PageFaultHandler.h +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2009-2020 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2020, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - - -#ifndef _H_OPAL_PAGE_FAULT_HANDLER_OPAL -#define _H_OPAL_PAGE_FAULT_HANDLER_OPAL - -#include -#include - -#include "sst/elements/Samba/PageFaultHandler.h" - -#include -#include -#include -#include - -using namespace SST; - -namespace SST { -namespace OpalComponent { - -class PageFaultHandler : public SambaComponent::PageFaultHandler { - - public: - /* SST ELI */ - SST_ELI_REGISTER_SUBCOMPONENT_DERIVED(PageFaultHandler, "Opal", "PageFaultHandler", SST_ELI_ELEMENT_VERSION(1,0,0), - "Page fault hander uses the Opal memory allocation component", SST::SambaComponent::PageFaultHandler) - - SST_ELI_DOCUMENT_PARAMS( - { "opal_latency", "latency to communicate to the Opal manager", "32ps"} ) - - SST_ELI_DOCUMENT_PORTS( {"opal_link_%(corecound)d", "Each core's mmu link to the Opal page fault handler", {"Opal.OpalEvent"}} ) - - /* MemoryManagerOpal */ - PageFaultHandler(ComponentId_t id, Params& params); - ~PageFaultHandler(); - - void handleEvent(SST::Event * event); - void allocatePage(const uint32_t thread, const uint32_t level, const uint64_t virtualAddress, const uint64_t size); - - private: - - std::vector opalLink; -}; - -} -} - -#endif diff --git a/src/sst/elements/Opal/README b/src/sst/elements/Opal/README deleted file mode 100644 index 22081a5b25..0000000000 --- a/src/sst/elements/Opal/README +++ /dev/null @@ -1,20 +0,0 @@ - - -Opal is a centralized memory manager that will be used to request physical memory from the system - -Opal will be configured and exposed to the details (capacity and memory types) of the shared memory system. - -A single or multiple/hirarchical (in case of disaggregated memory system) Opal components should be instantiated and connected to: - -1- Samba unit --- A- Minor page faults (or first access to virtual page) in Samba will trigger a request for a physical page from Opal manager - B- Any updates or page migration requests to Opal will trigger TLB shootdown to be sent to all samba units - -2- Ariel (or other core types) --- - A- unmap requests can be sent directly to Opal to deallocate physical space, which might also result in Opal sending TLB shootdowns to Samba units - B- Although virtual memory allocations (mmap, new, malloc) do not immediately allocate physical space, sometimes they carry out some hints such as memory type, huge page preference, pinning hints. Accordingly, Opal need to keep track of such preferences for specific virtual address ranges of different virtual memory spaces. - -Assumptions and limitations: -* In real systems, libc or any memory allocation library typically return allocated physical space to the OS through unmap, either when unmap is called by the application or when enough chunks have been returned by the applications through **free/delete**. For instance, if free/delete tries to delete a large allocation, then it can be beneficial to immediately return its corresponding physical allocation to the OS, so other applications can allocate it. Also when there are many contigious chunks that have been freed by the application and they can be coalecsed in multiple virtual pages, then the memory allocation library might return them back to the OS through unmap. Due to the overhead (TLB shootdown and context switching) of returning physical pages to the OS, memory allocation libraries try to reuse the **freed/deleted** virtual spaces. Accordingly, since we don't simulate libc or memory allocation library behavior, we only return physical spaces to Opal when unmap is called by the applications, and do not do anything when free/delete is called. - -Possible TODO: -When free is called for a **really** large allocation, we can assume the libc (or other memory allocation libraries) will immediately return this space to Opal diff --git a/src/sst/elements/Opal/arielmemmgr_opal.cc b/src/sst/elements/Opal/arielmemmgr_opal.cc deleted file mode 100644 index b4d2157e60..0000000000 --- a/src/sst/elements/Opal/arielmemmgr_opal.cc +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright 2009-2020 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2020, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#include -#include "arielmemmgr_opal.h" -#include "Opal_Event.h" - -#include - - -using namespace SST::OpalComponent; - -MemoryManagerOpal::MemoryManagerOpal(ComponentId_t id, Params& params) : - ArielComponent::ArielMemoryManager(id, params) { - - // Find links - std::string linkprefix = "opal_link_"; - std::string linkname = linkprefix + "0"; - int numPorts = 0; - - std::string latency = params.find("opal_latency", "32ps"); - - while (isPortConnected(linkname)) { - SST::Link* link = configureLink(linkname, latency, new Event::Handler(this, &MemoryManagerOpal::handleInterrupt)); - opalLink.push_back(link); - numPorts++; - linkname = linkprefix + std::to_string(numPorts); - } - - std::string translatorstr = params.find("translator", "ariel.MemoryManagerSimple"); - if (NULL != (temp_translator = loadUserSubComponent("translator"))) { - output->verbose(CALL_INFO, 1, 0, "Opal is using named subcomponent translator\n"); - } else { - int memLevels = params.find("memmgr.memorylevels", 1); - if (translatorstr == "ariel.MemoryManagerSimple" && memLevels > 1) { - output->verbose(CALL_INFO, 1, 0, "Warning - the default 'ariel.MemoryManagerSimple' does not support multiple memory levels. Configuring anyways but memorylevels will be 1.\n"); - params.insert("memmgr.memorylevels", "1", true); - } - output->verbose(CALL_INFO, 1, 0, "Loading memory manager: %s\n", translatorstr.c_str()); - Params translatorParams = params.find_prefix_params("memmgr."); - temp_translator = loadAnonymousSubComponent(translatorstr, "translator", 0, ComponentInfo::SHARE_STATS | ComponentInfo::INSERT_STATS, translatorParams); - if (NULL == temp_translator) - output->fatal(CALL_INFO, -1, "Failed to load memory manager: %s\n", translatorstr.c_str()); - } -} - - -MemoryManagerOpal::~MemoryManagerOpal() { -} - - -void MemoryManagerOpal::handleInterrupt(SST::Event *event) { - OpalEvent * ev = dynamic_cast(event); // TODO can we static_cast instead? - output->verbose(CALL_INFO, 4, 0, "Core %" PRIu32 " handling opal interrupt event\n", ev->getCoreId()); - - switch(ev->getType()) { - case SST::OpalComponent::EventType::SHOOTDOWN: - (*(interruptHandler[ev->getCoreId()]))(ArielComponent::ArielMemoryManager::InterruptAction::STALL); - break; - case SST::OpalComponent::EventType::SDACK: - (*(interruptHandler[ev->getCoreId()]))(ArielComponent::ArielMemoryManager::InterruptAction::UNSTALL); - break; - default: - output->fatal(CALL_INFO, -4, "Opal event interrupt to core: %" PRIu32 " was not valid.\n", ev->getCoreId()); - } -} - -bool MemoryManagerOpal::allocateMalloc(const uint64_t size, const uint32_t level, const uint64_t addr, const uint64_t ip, const uint32_t thread) { - OpalEvent * tse = new OpalEvent(OpalComponent::EventType::HINT, level, addr, size, thread); - opalLink[thread]->send(tse); - - return temp_translator->allocateMalloc(size, level, addr, ip, thread); -} - -bool MemoryManagerOpal::allocateMMAP(const uint64_t size, const uint32_t level, const uint64_t addr, const uint64_t ip, const uint32_t file, const uint32_t thread) { - OpalEvent * tse = new OpalEvent(OpalComponent::EventType::HINT, level, addr, size, thread); - tse->setFileId(file); - output->output("Before sending to Opal.. file ID is: %" PRIu32 "\n", file); - opalLink[thread]->send(tse); - return true; -} - -void MemoryManagerOpal::freeMalloc(const uint64_t virtualAddress) { - temp_translator->freeMalloc(virtualAddress); -} - -void MemoryManagerOpal::freeMMAP(const uint32_t file) { -} - - -void MemoryManagerOpal::setDefaultPool(uint32_t pool) { - temp_translator->setDefaultPool(pool); -} - -uint32_t MemoryManagerOpal::getDefaultPool() { - return temp_translator->getDefaultPool(); -} - -uint64_t MemoryManagerOpal::translateAddress(uint64_t virtAddr) { - return temp_translator->translateAddress(virtAddr); -} - -void MemoryManagerOpal::printStats() { - temp_translator->printStats(); -} - diff --git a/src/sst/elements/Opal/arielmemmgr_opal.h b/src/sst/elements/Opal/arielmemmgr_opal.h deleted file mode 100644 index 32011c7488..0000000000 --- a/src/sst/elements/Opal/arielmemmgr_opal.h +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2009-2020 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2020, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - - -#ifndef _H_OPAL_MEM_MANAGER_OPAL -#define _H_OPAL_MEM_MANAGER_OPAL - -#include -#include - -#include "sst/elements/ariel/arielmemmgr.h" - -#include -#include -#include -#include - -using namespace SST; - -namespace SST { -namespace OpalComponent { - -class MemoryManagerOpal : public ArielComponent::ArielMemoryManager { - - public: - /* SST ELI */ - SST_ELI_REGISTER_SUBCOMPONENT_DERIVED(MemoryManagerOpal, "Opal", "MemoryManagerOpal", SST_ELI_ELEMENT_VERSION(1,0,0), - "Memory manager which uses the Opal memory allocation component", SST::ArielComponent::ArielMemoryManager) - - SST_ELI_DOCUMENT_PARAMS( - { "corecount", "Sets the verbosity of the memory manager output", "1"}, - { "opal_latency", "latency to communicate to the Opal manager", "32ps"}, - { "translator", "(temporary) translation memory manager to actually translate addresses for now", "MemoryManagerSimple"} ) - - SST_ELI_DOCUMENT_PORTS( {"opal_link_%(corecound)d", "Each core's link to the Opal memory manager", {"Opal.OpalEvent"}} ) - - SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( { "translator", "Temporarily, which memory manager to use to translate addresses", "SST::Ariel::ArielMemoryManager" } ) - - /* MemoryManagerOpal */ - MemoryManagerOpal(ComponentId_t id, Params& params); - ~MemoryManagerOpal(); - - /* Call through to temporary translator */ - void setDefaultPool(uint32_t pool); - uint32_t getDefaultPool(); - - uint64_t translateAddress(uint64_t virtAddr); - void printStats(); - - /* Call through to Opal */ - bool allocateMalloc(const uint64_t size, const uint32_t level, const uint64_t virtualAddress, const uint64_t instructionPointer, const uint32_t thread); - bool allocateMMAP(const uint64_t size, const uint32_t level, const uint64_t virtualAddress, const uint64_t instructionPointer, const uint32_t file, const uint32_t thread); - void freeMalloc(const uint64_t vAddr); - void freeMMAP(const uint32_t file); - - void handleInterrupt(SST::Event * event); - - private: - ArielMemoryManager* temp_translator; // Temporary while Opal still uses Ariel's built-in translator - - std::vector opalLink; -}; - -} -} - -#endif diff --git a/src/sst/elements/Opal/configure.m4 b/src/sst/elements/Opal/configure.m4 deleted file mode 100644 index d5f8b0e71b..0000000000 --- a/src/sst/elements/Opal/configure.m4 +++ /dev/null @@ -1,6 +0,0 @@ -dnl -*- Autoconf -*- - -AC_DEFUN([SST_Opal_CONFIG], [ - sst_Opal_comp_happy="yes" -AS_IF([test "x$sst_Opal_comp_happy" = "xyes"], [$1], [$2]) -]) diff --git a/src/sst/elements/Opal/mempool.cc b/src/sst/elements/Opal/mempool.cc deleted file mode 100644 index 2607cabbe5..0000000000 --- a/src/sst/elements/Opal/mempool.cc +++ /dev/null @@ -1,302 +0,0 @@ -// Copyright 2009-2020 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2020, NTESS -// All rights reserved. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. -// - - -/* Author: Amro Awad - * E-mail: amro.awad@ucf.edu - */ -/* Author: Vamsee Reddy Kommareddy - * E-mail: vamseereddy@knights.ucf.edu - */ - -#include - -#include "mempool.h" - -#include -#include - -//Constructor for pool -Pool::Pool(Params params, SST::OpalComponent::MemType mem_type, int id) -{ - - output = new SST::Output("OpalMemPool[@f:@l:@p] ", 16, 0, SST::Output::STDOUT); - - size = params.find("size", 0); // in KB's - - start = params.find("start", 0); - - frsize = params.find("frame_size", 4); //4KB frame size - - memType = mem_type; - - poolId = id; - - /*char* subID = (char*) malloc(sizeof(char) * 32); - sprintf(subID, "%" PRIu32, id); - - if(memType == SST::OpalComponent::MemType::LOCAL) { - memUsage = own->registerStatistic( "local_mem_usage", subID ); - mappedMemory = own->registerStatistic( "local_mem_mapped", subID ); - unmappedMemory = own->registerStatistic( "local_mem_unmapped", subID ); - tlbShootdowns = own->registerStatistic( "tlb_shootdowns", subID ); - tlbShootdownDelay = own->registerStatistic( "tlb_shootdown_delay", subID ); - } - else { - memUsage = own->registerStatistic( "shared_mem_usage", subID ); - mappedMemory = own->registerStatistic( "shared_mem_mapped", subID ); - unmappedMemory = own->registerStatistic( "shared_mem_unmapped", subID ); - } - - free(subID); - */ - /* memory technology - * 0: DRAM - * 1: NVRAM - */ - uint32_t mem_tech = (uint32_t) params.find("mem_tech", 0); - switch(mem_tech) - { - case 1: - memTech = SST::OpalComponent::MemTech::NVM; - break; - case 0: - default: - memTech = SST::OpalComponent::MemTech::DRAM; - } - - std::cerr << "Pool start: " << start << " size: " << size << " frame size: " << frsize << " mem tech: " << mem_tech << std::endl; - build_mem(); - -} - -//Create free frames of size framesize, note that the size is in KB -void Pool::build_mem() -{ - int i=0; - num_frames = ceil(size/frsize); - real_size = num_frames * frsize; - - //std::vector numbers; - //for(int i=0; i frames_allocated; - - // Fixme: Shuffle memory to make continuous memory available - while(frames) { - // Make sure pool has free frames in the requested memory pool type. If not deallocate allocated frames (if number of frames to be allocated are > 1) - if(freelist.empty()) { - while(!frames_allocated.empty()) { - Frame *frame = frames_allocated.front(); - freelist.push_back(frame->starting_address); - alloclist.erase(frame->starting_address); - frames_allocated.pop_front(); - available_frames++; - delete frame; - } - break; - } - else - { - uint64_t frameAddr = freelist.front(); - freelist.erase(freelist.begin()); - Frame *frame = new Frame(frameAddr,0); - alloclist[frame->starting_address] = frame; - frames_allocated.push_back(frame); - available_frames--; - } - frames--; - - } - - if(!frames_allocated.empty()) { - response.address = (frames_allocated.front())->starting_address; - response.pages = pages; - response.status = 1; - } - - return response; - -} - -// Allocate N contigiuous frames, returns the starting address if successfull, or -1 if it fails! -REQRESPONSE Pool::allocate_frame(int N) -{ - - REQRESPONSE response; - response.status = 0; - - - // Make sure we have free frames first - if(freelist.empty()) - return response; - - // For now, we will assume you can only allocate 1 frame, TODO: We will implemenet a buddy-allocator style that enables allocating contigous physical spaces - if(N>1) - return response; - - else - { - // Simply, pop the first free frame and assign it - uint64_t frameAddr = freelist.front(); - freelist.erase(freelist.begin()); - Frame *temp = new Frame(frameAddr,0); - alloclist[temp->starting_address] = temp; - available_frames--; - response.address = temp->starting_address; - response.pages = 1; - response.status = 1; - return response; - - } - -} - -/* Deallocate 'size' contigiuous memory of type 'memType' starting from physical address 'starting_pAddress', - * returns a structure which indicates whether the memory is successfully deallocated or not - */ -REQRESPONSE Pool::deallocate_frames(int pages, uint64_t starting_pAddress) -{ - - REQRESPONSE response; - int frames = pages; - uint64_t pAddress = starting_pAddress; - uint64_t frame_number; - - while(frames) { - - // If we can find the frame to be free in the allocated list - std::map::iterator it; - it = alloclist.find(pAddress); - if (it != alloclist.end()) - { - //Remove from allocation map and add to free list - Frame *temp = it->second; - freelist.push_back(temp->starting_address); - alloclist.erase(pAddress); - delete temp; - } - else - { - response.address = pAddress; //physical address of the frame which failed to deallocate. - response.pages = frames; //This indicates number of frames that are not deallocated. - response.status = 0; - return response; - } - - frame_number = (pAddress - start) / frsize * 1024; - pAddress += ((uint64_t) (frame_number+1)*frsize*1024) + start; //to get the next frame physical address - frames--; - } - - response.status = 1; //successfully deallocated - return response; -} - -// Freeing N frames starting from Address X, this will return -1 if we find that these frames were not allocated -REQRESPONSE Pool::deallocate_frame(uint64_t X, int N) -{ - - REQRESPONSE response; - response.status = 0; - - - // For now, we will assume you can free only 1 frame, TODO: We will implemenet a buddy-allocator style that enables allocating and freeing contigous physical spaces - if(N>1) - return response; - else - { - // If we can find the frame to be free in the allocated list - if(alloclist.find(X)!=alloclist.end()) - { - // Remove from allocation map and add to free list - Frame * temp = alloclist[X]; - freelist.push_back(temp->starting_address); - alloclist.erase(X); - delete temp; - available_frames++; - response.status = 1; - } - else // Means we couldn't find an allocated frame that is being unmapped - response.status = 0; - - } - - return response; -} - -bool Pool::isAllocated(uint64_t address) -{ - if(alloclist.find(address)==alloclist.end()) - return false; - - return true; -} - -/*REQRESPONSE Pool::allocate_frame_address(uint64_t address) -{ - - REQRESPONSE response; - response.status = 0; - - // Make sure we have free frames first - if(freelist.empty()) - return response; - - // For now, we will assume you can only allocate 1 frame, TODO: We will implemenet a buddy-allocator style that enables allocating contigous physical spaces - if(N>1) - return response; - - else - { - int index = freelist_index[address]; - Frame * temp = freelist[index]; - //freelist.remove(temp); - freelist.erase(freelist.begin()+index); - alloclist[temp->starting_address] = temp; - available_frames--; - response.address = temp->starting_address; - response.pages = 1; - response.status = 1; - return response; - - } - -}*/ - - diff --git a/src/sst/elements/Opal/mempool.h b/src/sst/elements/Opal/mempool.h deleted file mode 100644 index 1fe7d6c3a4..0000000000 --- a/src/sst/elements/Opal/mempool.h +++ /dev/null @@ -1,159 +0,0 @@ -// Copyright 2009-2020 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2020, NTESS -// All rights reserved. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. -// - - -/* Author: Amro Awad - * E-mail: amro.awad@ucf.edu - */ -/* Author: Vamsee Reddy Kommareddy - * E-mail: vamseereddy@knights.ucf.edu - */ - -#include "Opal_Event.h" - -#include -#include -#include - - -typedef struct reqresponse { - uint64_t address; - int pages; - int status; - -}REQRESPONSE; - - -// This defines a physical frame of size 4KB by default -class Frame{ - - public: - // Constructor - Frame() { starting_address = 0; metadata = 0;} - - // Constructor with paramteres - Frame(uint64_t st, uint64_t md) { starting_address = st; metadata = 0;} - - ~Frame(){} - - // The starting address of the frame - uint64_t starting_address; - - // This will be used to store information about current allocation - int metadata; - - int frame_number; - -}; - - -// This class defines a memory pool - -class Pool{ - - public: - - //Constructor for pool - Pool(Params parmas, SST::OpalComponent::MemType mem_type, int id); - - ~Pool() { -/* while(!freelist.empty()) { - Frame* frame = freelist.front(); - freelist.erase(freelist.begin()); - delete frame; - } -*/ - std::map::iterator it; - for(it=alloclist.begin();it!=alloclist.end();it++) { - Frame* frame = it->second; - delete frame; - } - } - - void finish() {} - - // The size of the memory pool in KBs - uint32_t size; - - // The starting address of the memory pool - uint64_t start; - - // Allocate N contigiuous frames, returns the starting address if successfull, or -1 if it fails! - REQRESPONSE allocate_frame(int N); - - // Allocate 'size' contigiuous memory, returns a structure with starting address and number of frames allocated - REQRESPONSE allocate_frames(int pages); - - REQRESPONSE allocate_frame_address(uint64_t address, int N); - - // Freeing N frames starting from Address X, this will return -1 if we find that these frames were not allocated - REQRESPONSE deallocate_frame(uint64_t X, int N); - - // Deallocate 'size' contigiuous memory starting from physical address 'starting_pAddress', returns a structure which indicates success or not - REQRESPONSE deallocate_frames(int size, uint64_t starting_pAddress); - - bool isAllocated(uint64_t address); - - // Current number of free frames - int freeframes() { return freelist.size(); } - - // Frame size in KBs - int frsize; - - //Total number of frames - int num_frames; - - //real size of the memory pool - uint32_t real_size; - - //number of free frames - int available_frames; - - void set_memPool_type(SST::OpalComponent::MemType _memType) { memType = _memType; } - - SST::OpalComponent::MemType get_memPool_type() { return memType; } - - void set_memPool_tech(SST::OpalComponent::MemTech _memTech) { memTech = _memTech; } - - SST::OpalComponent::MemTech get_memPool_tech() { return memTech; } - - void setMemID(int id) { poolId = id; } - - int getMemID() { return poolId; } - - void build_mem(); - - void profileStats(int stat, int value); - - private: - - Output *output; - - //memory pool id - int poolId; - - //shared or local - SST::OpalComponent::MemType memType; - - //Memory technology - SST::OpalComponent::MemTech memTech; - - // The list of free frames - std::list freelist; - - //std::map freelist_index; - - // The list of allocated frames --- the key is the starting physical address - std::map alloclist; - -}; - diff --git a/src/sst/elements/Opal/opalMemNIC.cc b/src/sst/elements/Opal/opalMemNIC.cc deleted file mode 100644 index e0883f9394..0000000000 --- a/src/sst/elements/Opal/opalMemNIC.cc +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright 2013-2020 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2020, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#include -#include "opalMemNIC.h" - -using namespace SST; -using namespace SST::Opal; - -/* Constructor */ - -OpalMemNIC::OpalMemNIC(ComponentId_t id, Params ¶ms) : SST::MemHierarchy::MemNICBase(id, params) { - build(params); -} - -void OpalMemNIC::build(Params ¶ms) { - - node = params.find("node", 0); - enable = params.find("shared_memory", true); - localMemSize = params.find("local_memory_size", 0); - - /* Set up link control */ - link_control = loadUserSubComponent("linkcontrol", ComponentInfo::SHARE_NONE, 1); - if (!link_control) { - Params lcparams; - lcparams.insert("port_name", "port"); - lcparams.insert("link_bw", params.find("network_bw", "80GiB/s")); - lcparams.insert("in_buf_size", params.find("network_input_buffer_size", "1KiB")); - lcparams.insert("out_buf_size", params.find("network_output_buffer_size", "1KiB")); - std::string lcSub = params.find("linkcontrol", "merlin.linkcontrol"); - link_control = loadAnonymousSubComponent(lcSub, "linkcontrol", 0, ComponentInfo::SHARE_PORTS | ComponentInfo::INSERT_STATS, lcparams, 1); - } - link_control->setNotifyOnReceive(new SST::Interfaces::SimpleNetwork::Handler(this, &OpalMemNIC::recvNotify)); - - packetHeaderBytes = extractPacketHeaderSize(params, "min_packet_size"); -} - -void OpalMemNIC::init(unsigned int phase) { - link_control->init(phase); - MemNICBase::nicInit(link_control, phase); -} - -bool OpalMemNIC::clock() { - if (sendQueue.empty()) return true; - drainQueue(&sendQueue, link_control); - return false; -} - -bool OpalMemNIC::recvNotify(int) { - MemRtrEvent * mre = doRecv(link_control); - if (mre) { - MemHierarchy::MemEventBase * me = mre->event; - delete mre; - if (me) { - (*recvHandler)(me); - } - } - return true; -} - -void OpalMemNIC::send(MemHierarchy::MemEventBase * ev) { - SST::Interfaces::SimpleNetwork::Request * req = new SST::Interfaces::SimpleNetwork::Request(); - MemRtrEvent * mre = new MemRtrEvent(ev); - req->src = info.addr; - req->dest = lookupNetworkAddress(ev->getDst()); - req->size_in_bits = 8 * (packetHeaderBytes + ev->getPayloadSize()); - req->vn = 0; - req->givePayload(mre); - sendQueue.push(req); -} - - -/* Add 'node' to InitMemRtrEvent */ -MemHierarchy::MemNICBase::InitMemRtrEvent * OpalMemNIC::createInitMemRtrEvent() { - return new OpalInitMemRtrEvent(info, node); -} - -void OpalMemNIC::processInitMemRtrEvent(MemHierarchy::MemNICBase::InitMemRtrEvent * ev) { - OpalInitMemRtrEvent* imre = static_cast(ev); - dbg.debug(_L10_, "%s (OpalMemNIC) received imre. Name: %s, Addr: %" PRIu64 ", ID: %" PRIu32 ", start: %" PRIu64 ", end: %" PRIu64 ", size: %" PRIu64 ", step: %" PRIu64 ", node: %" PRIu32 "\n", - getName().c_str(), imre->info.name.c_str(), imre->info.addr, imre->info.id, imre->info.region.start, imre->info.region.end, imre->info.region.interleaveSize, imre->info.region.interleaveStep, imre->node); - - if (sourceIDs.find(imre->info.id) != sourceIDs.end()) { // From one of our source groups - dbg.debug(_L10_, "\tAdding to sourceEndpointInfo. %zu sources found\n", sourceEndpointInfo.size()); - addSource(imre->info); - } else if (destIDs.find(imre->info.id) != destIDs.end()) { // From one of our dest groups - // Filter by node if sharedmem is enabled - if (enable) { - if (imre->node == node || imre->node == 9999) { - dbg.debug(_L10_, "\tAdding to destEndpointInfo. %zu destinations found\n", destEndpointInfo.size()); - addDest(imre->info); - } - } else { - dbg.debug(_L10_, "\tAdding to destEndpointInfo. %zu destinations found\n", destEndpointInfo.size()); - addDest(imre->info); - } - } -} - - -std::string OpalMemNIC::findTargetDestination(MemHierarchy::Addr addr) { - for (std::set::const_iterator it = destEndpointInfo.begin(); it != destEndpointInfo.end(); it++) { - if (it->region.contains(addr)) return it->name; - } - - if (enable && localMemSize) { - MemHierarchy::Addr tempAddr = addr & (localMemSize-1); - for (std::set::const_iterator it = destEndpointInfo.begin(); it != destEndpointInfo.end(); it++) { - if(it->region.contains(tempAddr)) return it->name; - } - } - - /* Build error string */ - stringstream error; - error << getName() + " (OpalMemNIC) cannot find a destination for address " << addr << endl; - error << "Known destination regions: " << endl; - for (std::set::const_iterator it = destEndpointInfo.begin(); it != destEndpointInfo.end(); it++) { - error << it->name << " " << it->region.toString() << endl; - } - dbg.fatal(CALL_INFO, -1, "%s", error.str().c_str()); - return ""; -} diff --git a/src/sst/elements/Opal/opalMemNIC.h b/src/sst/elements/Opal/opalMemNIC.h deleted file mode 100644 index c31d28f3fc..0000000000 --- a/src/sst/elements/Opal/opalMemNIC.h +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright 2013-2020 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2020, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _OPAL_MEMNIC_SUBCOMPONENT_H_ -#define _OPAL_MEMNIC_SUBCOMPONENT_H_ - -#include -#include - -#include "sst/elements/memHierarchy/memNICBase.h" - -namespace SST { -namespace Opal { - -/* - * NIC for multi-node configurations - */ -class OpalMemNIC : public SST::MemHierarchy::MemNICBase { - -public: -/* Element Library Info */ -#define OPAL_MEMNIC_ELI_PARAMS MEMNICBASE_ELI_PARAMS, \ - { "node", "Node number in multinode environment", "0"},\ - { "shared_memory", "Shared meory enable flag", "0"},\ - { "local_memory_size", "Local memory size to mask local memory addresses", "0"},\ - { "network_bw", "Network bandwidth", "80GiB/s" },\ - { "network_input_buffer_size", "Size of input buffer", "1KiB" },\ - { "network_output_buffer_size", "Size of output buffer", "1KiB" },\ - { "min_packet_size", "Size of packet with a payload (e.g., control message size)", "8B"} - - - SST_ELI_REGISTER_SUBCOMPONENT_DERIVED(OpalMemNIC, "Opal", "OpalMemNIC", SST_ELI_ELEMENT_VERSION(1,0,0), - "MemNIC for Opal multi-node configurations", SST::MemHierarchy::MemLinkBase) - - SST_ELI_DOCUMENT_PARAMS( OPAL_MEMNIC_ELI_PARAMS ) - - SST_ELI_DOCUMENT_PORTS( {"port", "Link to network", {"memHierarchy.MemRtrEvent"} } ) - -/* Begin class definition */ - - /* Constructor */ - OpalMemNIC(Component * comp, Params ¶ms); - OpalMemNIC(ComponentId_t id, Params ¶ms); - - /* Destructor */ - virtual ~OpalMemNIC() { } - - /* Specialized init mem rtr event to specify node */ - class OpalInitMemRtrEvent : public MemHierarchy::MemNICBase::InitMemRtrEvent { - public: - uint32_t node; - - OpalInitMemRtrEvent() {} - OpalInitMemRtrEvent(EndpointInfo info, uint32_t node) : InitMemRtrEvent(info), node(node) { } - - virtual Event* clone(void) override { - OpalInitMemRtrEvent * imre = new OpalInitMemRtrEvent(*this); - if (this->event != nullptr) - imre->event = this->event->clone(); - else - imre->event = nullptr; - return imre; - } - - virtual bool hasClientData() const override { return false; } - - void serialize_order(SST::Core::Serialization::serializer &ser) override { - InitMemRtrEvent::serialize_order(ser); - ser & node; - } - - ImplementSerializable(SST::Opal::OpalMemNIC::OpalInitMemRtrEvent); - }; - - bool clock(); - void send(MemHierarchy::MemEventBase *ev); - - bool recvNotify(int); - - void init(unsigned int phase); - void finish() { link_control->finish(); } - void setup() { link_control->setup(); MemLinkBase::setup(); } - - virtual std::string findTargetDestination(MemHierarchy::Addr addr); - -protected: - virtual MemHierarchy::MemNICBase::InitMemRtrEvent* createInitMemRtrEvent(); - virtual void processInitMemRtrEvent(MemHierarchy::MemNICBase::InitMemRtrEvent* ev); - -private: - void build(Params& params); // Temporary function called by both constructors until both are no longer required - - bool enable; - uint64_t localMemSize; - uint32_t node; - - size_t packetHeaderBytes; - SST::Interfaces::SimpleNetwork * link_control; - std::queue sendQueue; -}; - -} //namespace Opal -} //namespace SST - -#endif diff --git a/src/sst/elements/Opal/tests/app/makefile b/src/sst/elements/Opal/tests/app/makefile deleted file mode 100644 index 879671b16f..0000000000 --- a/src/sst/elements/Opal/tests/app/makefile +++ /dev/null @@ -1,19 +0,0 @@ -CXX=g++ - -opal_test: opal_test.o - $(CXX) -O0 -o opal_test -fopenmp opal_test.o - -opal_test.o: opal_test.c - $(CXX) -O0 -o opal_test.o -fopenmp -c opal_test.c - -opal_mlm: opal_mlm.o - $(CXX) -O0 -o opal_mlm -fopenmp opal_mlm.o - -opal_mlm.o: opal_mlm.c - $(CXX) -O0 -o opal_mlm.o -fopenmp -c opal_mlm.c - -all: opal_test opal_mlm - -clean: - rm opal_test opal_test.o opal_mlm opal_mlm.o - diff --git a/src/sst/elements/Opal/tests/app/opal_mlm.c b/src/sst/elements/Opal/tests/app/opal_mlm.c deleted file mode 100644 index 7cdb7a4127..0000000000 --- a/src/sst/elements/Opal/tests/app/opal_mlm.c +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2009-2020 Sandia Corporation. Under the terms -// of Contract DE-NA0003525 with Sandia Corporation, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2020, Sandia Corporation -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#include -#include -#include -#include - -#include - - -#define NUM_INTS 1000000 - -using namespace std; - -#pragma GCC push_options -#pragma GCC optimize ("O0") - -extern "C" { - -void* mlm_malloc(size_t size, int level) -{ - if(size == 0) - { - printf("ZERO BYTE MALLOC\n"); - void* bt_entries[64]; - exit(-1); - } - - printf("Performing a mlm Malloc for size %llu\n", size); - - return malloc(size); -} - -void ariel_enable() { } - -void * ariel_mmap_mlm(int ID, size_t size, int level) { return mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, ID, 0); } - -} - -#pragma GCC pop_options - -int main() -{ - ariel_enable(); - int * x = (int *) malloc(sizeof(int) * NUM_INTS); - int * y = (int *) mlm_malloc(sizeof(int) * NUM_INTS, 1); - - int * z = (int *) ariel_mmap_mlm(660066, sizeof(int) * NUM_INTS, 2); - - for (int i = 0; i < NUM_INTS; ) - { - cout << "*** " << i << " ***\n"; - x[i] = i * 2; - y[i] = i * 2; - z[i] = i * 2; - i = i + 1024; - } - - for (int i = 0; i < NUM_INTS; ) - { - cout << "Value is: " << x[i] << ", " << y[i] << ", " << z[i] << endl; - i = i + 1024; - } - - cout << "Test MLM Malloc" << endl; - - - return 0; -} diff --git a/src/sst/elements/Opal/tests/app/opal_test.c b/src/sst/elements/Opal/tests/app/opal_test.c deleted file mode 100644 index 4ebf026d6c..0000000000 --- a/src/sst/elements/Opal/tests/app/opal_test.c +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2009-2020 Sandia Corporation. Under the terms -// of Contract DE-NA0003525 with Sandia Corporation, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2020, Sandia Corporation -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#include -#include - -extern "C" -void ariel_enable() { printf("Inside Ariel\n"); } - -int main(int argc, char* argv[]) { - - const int LENGTH = 2000; - - ariel_enable(); - - printf("Allocating arrays of size %d elements.\n", LENGTH); - double* a = (double*) malloc(sizeof(double) * LENGTH); - double* b = (double*) malloc(sizeof(double) * LENGTH); - double* c = (double*) malloc(sizeof(double) * LENGTH); - printf("Done allocating arrays.\n"); - - int i; - for(i = 0; i < LENGTH; ++i) { - a[i] = i; - b[i] = LENGTH - i; - c[i] = 0; - } - - - printf("Perfoming the fast_c compute loop...\n"); - #pragma omp parallel num_threads(2) - for(i = 0; i < LENGTH; ++i) { - //printf("issuing a write to: %llu (fast_c)\n", ((unsigned long long int) &fast_c[i])); - c[i] = 2.0 * a[i] + 1.5 * b[i]; - } - - double sum = 0; - for(i = 0; i < LENGTH; ++i) { - sum += c[i]; - } - - printf("Sum of arrays is: %f\n", sum); - printf("Freeing arrays...\n"); - - free(a); - free(b); - free(c); - - printf("Done.\n"); -} diff --git a/src/sst/elements/Opal/tests/basic_1node_1smp.py b/src/sst/elements/Opal/tests/basic_1node_1smp.py deleted file mode 100644 index 5234c50554..0000000000 --- a/src/sst/elements/Opal/tests/basic_1node_1smp.py +++ /dev/null @@ -1,416 +0,0 @@ -import sst - - -# Define SST core options -sst.setProgramOption("timebase", "1ps") -sst.setProgramOption("stopAtCycle", "0 ns") - -# Tell SST what statistics handling we want -sst.setStatisticLoadLevel(4) - -clock = "2GHz" - -cores = 2*2 - -#os.environ['OMP_NUM_THREADS'] = str(cores/2) - - -local_memory_capacity = 128 # Size of memory in MBs -shared_memory_capacity = 2048 # 2GB -shared_memory = 1 -page_size = 4 # In KB -num_pages = local_memory_capacity * 1024 // page_size + 8*1024*1024//page_size - - -ariel = sst.Component("cpu", "ariel.ariel") -ariel.addParams({ - "verbose" : 1, - "clock" : clock, - "maxcorequeue" : 1024, - "maxissuepercycle" : 2, - "maxtranscore": 16, - "pipetimeout" : 0, - "corecount" : cores//2, - "arielmode" : 0, - "appargcount" : 0, - "max_insts" : 10000, - "executable" : "./app/opal_test", - "node" : 0, - "launchparamcount" : 1, - "launchparam0" : "-ifeellucky", -}) - -# Opal uses this memory manager to intercept memory translation requests, mallocs, mmaps, etc. -memmgr = ariel.setSubComponent("memmgr", "Opal.MemoryManagerOpal") -memmgr.addParams({ - "opal_latency" : "30ps" -}) -# Opal uses this memory manager (for now?) to do the actual translation -submemmgr = memmgr.setSubComponent("translator", "ariel.MemoryManagerSimple") -submemmgr.addParams({ - "pagecount0" : num_pages, - "pagesize0" : page_size * 1024, -}) - -ariel.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) - -mmu = sst.Component("mmu", "Samba") -mmu.addParams({ - "os_page_size": 4, - "perfect": 0, - "corecount": cores//2, - "sizes_L1": 3, - "page_size1_L1": 4, - "page_size2_L1": 2048, - "page_size3_L1": 1024*1024, - "assoc1_L1": 4, - "size1_L1": 32, - "assoc2_L1": 4, - "size2_L1": 32, - "assoc3_L1": 4, - "size3_L1": 4, - "sizes_L2": 4, - "page_size1_L2": 4, - "page_size2_L2": 2048, - "page_size3_L2": 1024*1024, - "assoc1_L2": 12, - "size1_L2": 1536,#1536, - "assoc2_L2": 32, #12, - "size2_L2": 32, #1536, - "assoc3_L2": 4, - "size3_L2": 16, - "clock": clock, - "levels": 2, - "max_width_L1": 3, - "max_outstanding_L1": 2, - "max_outstanding_PTWC": 2, - "latency_L1": 4, - "parallel_mode_L1": 1, - "max_outstanding_L2": 2, - "max_width_L2": 4, - "latency_L2": 10, - "parallel_mode_L2": 0, - "self_connected" : 0, - "page_walk_latency": 200, - "size1_PTWC": 32, # this just indicates the number entries of the page table walk cache level 1 (PTEs) - "assoc1_PTWC": 4, # this just indicates the associtativit the page table walk cache level 1 (PTEs) - "size2_PTWC": 32, # this just indicates the number entries of the page table walk cache level 2 (PMDs) - "assoc2_PTWC": 4, # this just indicates the associtativit the page table walk cache level 2 (PMDs) - "size3_PTWC": 32, # this just indicates the number entries of the page table walk cache level 3 (PUDs) - "assoc3_PTWC": 4, # this just indicates the associtativit the page table walk cache level 3 (PUDs) - "size4_PTWC": 32, # this just indicates the number entries of the page table walk cache level 4 (PGD) - "assoc4_PTWC": 4, # this just indicates the associtativit the page table walk cache level 4 (PGD) - "latency_PTWC": 10, # This is the latency of checking the page table walk cache - "opal_latency": "30ps", - "emulate_faults": 1, -}) -mmu.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) - -# MMU uses this page fault handler. -pagefaulthandler = mmu.setSubComponent("pagefaulthandler", "Opal.PageFaultHandler") -pagefaulthandler.addParams({ - "opal_latency" : "30ps" -}) - -opal= sst.Component("opal","Opal") -opal.addParams({ - "clock" : clock, - "num_nodes" : 1, - "verbose" : 1, - "max_inst" : 32, - "shared_mempools" : 1, - "shared_mem.mempool0.start" : local_memory_capacity*1024*1024, - "shared_mem.mempool0.size" : shared_memory_capacity*1024, - "shared_mem.mempool0.frame_size": page_size, - "shared_mem.mempool0.mem_type" : 0, - "node0.cores" : cores//2, - "node0.allocation_policy" : 1, - "node0.page_migration" : 0, - "node0.page_migration_policy" : 0, - "node0.num_pages_to_migrate" : 0, - "node0.latency" : 2000, - "node0.memory.start" : 0, - "node0.memory.size" : local_memory_capacity*1024, - "node0.memory.frame_size" : page_size, - "node0.memory.mem_type" : 0, - "num_ports" : cores, -}) -opal.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) - - -l1_params = { - "cache_frequency": clock, - "cache_size": "32KiB", - "associativity": 8, - "access_latency_cycles": 4, - "L1": 1, - "verbose": 30, - "maxRequestDelay" : "1000000", -} - -l2_params = { - "cache_frequency": clock, - "cache_size": "256KiB", - "associativity": 8, - "access_latency_cycles": 6, - "mshr_num_entries" : 16, -} - -l3_params = { - "access_latency_cycles" : "12", - "cache_frequency" : clock, - "associativity" : "16", - "cache_size" : "2MB", - "mshr_num_entries" : "4096", - "num_cache_slices" : 1, - "slice_allocation_policy" : "rr", -} - -link_params = { - "shared_memory": shared_memory, - "node": 0, -} - -nic_params = { - "shared_memory": shared_memory, - "node": 0, - "network_bw": "96GiB/s", - "local_memory_size" : local_memory_capacity*1024*1024, -} - - - -class Network: - def __init__(self, name,networkId,input_latency,output_latency): - self.name = name - self.ports = 0 - self.rtr = sst.Component("rtr_%s"%name, "merlin.hr_router") - self.rtr.addParams({ - "id": networkId, - #"topology": "merlin.singlerouter", - "link_bw" : "80GiB/s", - "xbar_bw" : "80GiB/s", - "flit_size" : "8B", - "input_latency" : input_latency, - "output_latency" : output_latency, - "input_buf_size" : "1KB", - "output_buf_size" : "1KB", - }) - - topo = self.rtr.setSubComponent("topology", "merlin.singlerouter") - - def getNextPort(self): - self.ports += 1 - self.rtr.addParam("num_ports", self.ports) - return (self.ports-1) - - - -internal_network = Network("internal_network",0,"20ps","20ps") - -for next_core in range(cores): - - l1 = sst.Component("l1cache_" + str(next_core), "memHierarchy.Cache") - l1.addParams(l1_params) - l1_cpulink = l1.setSubComponent("cpulink", "memHierarchy.MemLink") - l1_memlink = l1.setSubComponent("memlink", "memHierarchy.MemLink") - l1_cpulink.addParams(link_params) - l1_memlink.addParams(link_params) - - l2 = sst.Component("l2cache_" + str(next_core), "memHierarchy.Cache") - l2.addParams(l2_params) - l2_cpulink = l2.setSubComponent("cpulink", "memHierarchy.MemLink") - l2_memlink = l2.setSubComponent("memlink", "Opal.OpalMemNIC") - l2_cpulink.addParams(link_params) - l2_memlink.addParams(nic_params) - l2_memlink.addParams({ "group" : 1}) - - arielMMULink = sst.Link("cpu_mmu_link_" + str(next_core)) - MMUCacheLink = sst.Link("mmu_cache_link_" + str(next_core)) - PTWMemLink = sst.Link("ptw_mem_link_" + str(next_core)) - PTWOpalLink = sst.Link("ptw_opal_" + str(next_core)) - ArielOpalLink = sst.Link("ariel_opal_" + str(next_core)) - - if next_core < cores//2: - arielMMULink.connect((ariel, "cache_link_%d"%next_core, "300ps"), (mmu, "cpu_to_mmu%d"%next_core, "300ps")) - ArielOpalLink.connect((memmgr, "opal_link_%d"%next_core, "300ps"), (opal, "coreLink%d"%(next_core), "300ps")) - MMUCacheLink.connect((mmu, "mmu_to_cache%d"%next_core, "300ps"), (l1_cpulink, "port", "300ps")) - PTWOpalLink.connect( (pagefaulthandler, "opal_link_%d"%next_core, "300ps"), (opal, "mmuLink%d"%(next_core), "300ps") ) - else: - PTWMemLink.connect((mmu, "ptw_to_mem%d"%(next_core-cores//2), "300ps"), (l1_cpulink, "port", "300ps")) - - l2_core_link = sst.Link("l2cache_" + str(next_core) + "_link") - l2_core_link.connect((l1_memlink, "port", "300ps"), (l2_cpulink, "port", "300ps")) - - l2_ring_link = sst.Link("l2_ring_link_" + str(next_core)) - l2_ring_link.connect((l2_memlink, "port", "300ps"), (internal_network.rtr, "port%d"%(internal_network.getNextPort()), "300ps")) - - - -l3cache = sst.Component("l3cache", "memHierarchy.Cache") -l3cache.addParams(l3_params) -l3_link = l3cache.setSubComponent("cpulink", "Opal.OpalMemNIC") -l3cache.addParams({ "slice_id" : 0 }) -l3_link.addParams(nic_params) -l3_link.addParams({ - "group" : 2, - "addr_range_start": 0, - "addr_range_end": (local_memory_capacity*1024*1024) - 1, - "interleave_size": "0B", -}) - -l3_ring_link = sst.Link("l3_link") -l3_ring_link.connect( (l3_link, "port", "300ps"), (internal_network.rtr, "port%d"%(internal_network.getNextPort()), "300ps")) - - -mem = sst.Component("local_memory", "memHierarchy.MemController") -mem.addParams({ - "clock" : "1.2GHz", - "backing" : "none", - "backend" : "memHierarchy.timingDRAM", - "backend.id" : 0, - "backend.addrMapper" : "memHierarchy.roundRobinAddrMapper", - "backend.addrMapper.interleave_size" : "64B", - "backend.addrMapper.row_size" : "1KiB", - "backend.clock" : "1.2GHz", - "backend.mem_size" : str(local_memory_capacity) + "MiB", - "backend.channels" : 2, - "backend.channel.numRanks" : 2, - "backend.channel.rank.numBanks" : 16, - "backend.channel.transaction_Q_size" : 32, - "backend.channel.rank.bank.CL" : 14, - "backend.channel.rank.bank.CL_WR" : 12, - "backend.channel.rank.bank.RCD" : 14, - "backend.channel.rank.bank.TRP" : 14, - "backend.channel.rank.bank.dataCycles" : 2, - "backend.channel.rank.bank.pagePolicy" : "memHierarchy.simplePagePolicy", - "backend.channel.rank.bank.transactionQ" : "memHierarchy.fifoTransactionQ", - "backend.channel.rank.bank.pagePolicy.close" : 1, -}) -mem_link = mem.setSubComponent("cpulink", "memHierarchy.MemLink") -mem_link.addParams({ - "shared_memory": 1, - "node" : 0 -}) - -dc = sst.Component("dc", "memHierarchy.DirectoryController") -dc.addParams({ - "entry_cache_size": 256*1024*1024, #Entry cache size of mem/blocksize - "clock": "200MHz", - #"debug" : 1, - #"debug_level" : 10, -}) - -dc_cpulink = dc.setSubComponent("cpulink", "Opal.OpalMemNIC") -dc_memlink = dc.setSubComponent("memlink", "memHierarchy.MemLink") -dc_memlink.addParams(link_params) -dc_cpulink.addParams(nic_params) -dc_cpulink.addParams({ - "group" : 3, - "addr_range_start" : 0, - "addr_range_end" : (local_memory_capacity*1024*1024)-1, - "interleave_size": "0B", - "shared_memory": shared_memory, - "node": 0, - #"debug" : 1, - #"debug_level" : 10, -}) - -memLink = sst.Link("mem_link") -memLink.connect((mem_link, "port", "300ps"), (dc_memlink, "port", "300ps")) - -netLink = sst.Link("dc_link") -netLink.connect((dc_cpulink, "port", "300ps"), (internal_network.rtr, "port%d"%(internal_network.getNextPort()), "300ps")) - - - - - -# External memory configuration - -external_network = Network("Ext_Mem_Net",1,"20ns","20ns") -port = external_network.getNextPort() - -ext_mem = sst.Component("ExternalNVMmemContr", "memHierarchy.MemController") -ext_mem.addParams({ - "memory_size" : str(shared_memory_capacity) + "MB", - "max_requests_per_cycle" : 4, - "backing" : "none", - "clock" : clock, -}) - -ext_memory = ext_mem.setSubComponent("backend", "memHierarchy.Messier") -ext_memory.addParams({ - "max_requests_per_cycle" : 4, - "mem_size" : str(shared_memory_capacity) + "MB", - "clock" : clock, -}) - -ext_mem_link = ext_mem.setSubComponent("cpulink", "memHierarchy.MemLink") -ext_mem_link.addParams({ "node" : 9999, }) ## does not belong to any node - -ext_dc = sst.Component("ExtMemDc", "memHierarchy.DirectoryController") -ext_dc.addParams({ - "entry_cache_size": 256*1024*1024, #Entry cache size of mem/blocksize - "clock": "1GHz", -}) -ext_dc_cpulink = ext_dc.setSubComponent("cpulink", "Opal.OpalMemNIC") -ext_dc_memlink = ext_dc.setSubComponent("memlink", "memHierarchy.MemLink") -ext_dc_cpulink.addParams({ - "network_bw": "80GiB/s", - "addr_range_start" : (local_memory_capacity*1024*1024), - "addr_range_end" : (local_memory_capacity*1024*1024) + (shared_memory_capacity*1024*1024) -1, - "node": 9999, - "group" : 3, # TODO is this the right routing group? means sources are all components in group 2 and dests are all components in group 4 -}) - - -messier = sst.Component("ExternalMem" , "Messier") -messier.addParams({ - "clock" : clock, - "tCL" : 30, - "tRCD" : 300, - "tCL_W" : 1000, - "write_buffer_size" : 32, - "flush_th" : 90, - "num_banks" : 16, - "max_outstanding" : 16, - "max_writes" : "4", - "max_current_weight" : 32*50, - "read_weight" : "5", - "write_weight" : "5", - "cacheline_interleaving" : 0, -}) - -link_nvm_bus_link = sst.Link("External_mem_nvm_link") -link_nvm_bus_link.connect( (messier, "bus", "50ps"), (ext_memory, "nvm_link", "50ps") ) - -extmemLink = sst.Link("External_mem_dc_link") -extmemLink.connect( (ext_dc_memlink, "port", "500ps"), (ext_mem_link, "port", "500ps") ) - -ext_dcLink = sst.Link("External_mem_link") -ext_dcLink.connect( (ext_dc_cpulink, "port", "500ps"), (external_network.rtr, "port%d"%port, "500ps") ) - - - -# Connecting Internal and External network -def bridge(net0, net1): - net0port = net0.getNextPort() - net1port = net1.getNextPort() - name = "%s-%s"%(net0.name, net1.name) - bridge = sst.Component("Bridge:%s"%name, "merlin.Bridge") - bridge.addParams({ - "translator": "memHierarchy.MemNetBridge", - "network_bw" : "80GiB/s", - }) - link = sst.Link("B0-%s"%name) - link.connect( (bridge, "network0", "500ps"), (net0.rtr, "port%d"%net0port, "500ps") ) - link = sst.Link("B1-%s"%name) - link.connect( (bridge, "network1", "500ps"), (net1.rtr, "port%d"%net1port, "500ps") ) - - -midnet = Network("Bridge",3,"50ps","50ps") -bridge(internal_network, midnet) -bridge(external_network, midnet) - - diff --git a/src/sst/elements/Opal/tests/basic_2node_1smp.py b/src/sst/elements/Opal/tests/basic_2node_1smp.py deleted file mode 100644 index 2fbc51b45d..0000000000 --- a/src/sst/elements/Opal/tests/basic_2node_1smp.py +++ /dev/null @@ -1,431 +0,0 @@ -import sst - - -# Define SST core options -sst.setProgramOption("timebase", "1ps") -sst.setProgramOption("stopAtCycle", "0 ns") - -# Tell SST what statistics handling we want -sst.setStatisticLoadLevel(4) - -clock = "2GHz" - -cores = 2*2 - -#os.environ['OMP_NUM_THREADS'] = str(cores/2) - -nodes = 2 -local_memory_capacity = 128 # Size of memory in MBs -shared_memory_capacity = 2048 # 2GB -shared_memory = 1 -page_size = 4 # In KB -num_pages = local_memory_capacity * 1024 // page_size + 8*1024*1024//page_size - - -arielParams = { - "verbose" : 1, - "clock" : clock, - "maxcorequeue" : 1024, - "maxissuepercycle" : 2, - "maxtranscore": 16, - "pipetimeout" : 0, - "corecount" : cores//2, - "arielmode" : 0, - "appargcount" : 0, - "max_insts" : 10000, - "executable" : "./app/opal_test", - "node" : 0, - "launchparamcount" : 1, - "launchparam0" : "-ifeellucky", -} - -mmuParams = { - "os_page_size": 4, - "perfect": 0, - "corecount": cores//2, - "sizes_L1": 3, - "page_size1_L1": 4, - "page_size2_L1": 2048, - "page_size3_L1": 1024*1024, - "assoc1_L1": 4, - "size1_L1": 32, - "assoc2_L1": 4, - "size2_L1": 32, - "assoc3_L1": 4, - "size3_L1": 4, - "sizes_L2": 4, - "page_size1_L2": 4, - "page_size2_L2": 2048, - "page_size3_L2": 1024*1024, - "assoc1_L2": 12, - "size1_L2": 1536,#1536, - "assoc2_L2": 32, #12, - "size2_L2": 32, #1536, - "assoc3_L2": 4, - "size3_L2": 16, - "clock": clock, - "levels": 2, - "max_width_L1": 3, - "max_outstanding_L1": 2, - "max_outstanding_PTWC": 2, - "latency_L1": 4, - "parallel_mode_L1": 1, - "max_outstanding_L2": 2, - "max_width_L2": 4, - "latency_L2": 10, - "parallel_mode_L2": 0, - "self_connected" : 0, - "page_walk_latency": 200, - "size1_PTWC": 32, # this just indicates the number entries of the page table walk cache level 1 (PTEs) - "assoc1_PTWC": 4, # this just indicates the associtativit the page table walk cache level 1 (PTEs) - "size2_PTWC": 32, # this just indicates the number entries of the page table walk cache level 2 (PMDs) - "assoc2_PTWC": 4, # this just indicates the associtativit the page table walk cache level 2 (PMDs) - "size3_PTWC": 32, # this just indicates the number entries of the page table walk cache level 3 (PUDs) - "assoc3_PTWC": 4, # this just indicates the associtativit the page table walk cache level 3 (PUDs) - "size4_PTWC": 32, # this just indicates the number entries of the page table walk cache level 4 (PGD) - "assoc4_PTWC": 4, # this just indicates the associtativit the page table walk cache level 4 (PGD) - "latency_PTWC": 10, # This is the latency of checking the page table walk cache - "opal_latency": "30ps", - "emulate_faults": 1, -} - - -opal= sst.Component("opal","Opal") -opal.addParams({ - "clock" : clock, - "num_nodes" : nodes, - "verbose" : 1, - "max_inst" : 32, - "shared_mempools" : 1, - "shared_mem.mempool0.start" : local_memory_capacity*1024*1024, - "shared_mem.mempool0.size" : shared_memory_capacity*1024, - "shared_mem.mempool0.frame_size": page_size, - "shared_mem.mempool0.mem_type" : 0, - "node0.cores" : cores//2, - "node0.allocation_policy" : 1, - "node0.latency" : 2000, - "node0.memory.start" : 0, - "node0.memory.size" : local_memory_capacity*1024, - "node0.memory.frame_size" : page_size, - "node0.memory.mem_type" : 0, - "node1.cores" : cores//2, - "node1.allocation_policy" : 1, - "node1.latency" : 2000, - "node1.memory.start" : 0, - "node1.memory.size" : local_memory_capacity*1024, - "node1.memory.frame_size" : page_size, - "node1.memory.mem_type" : 0, - "num_ports" : cores*nodes, -}) -opal.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) - - -l1_params = { - "cache_frequency": clock, - "cache_size": "32KiB", - "associativity": 8, - "access_latency_cycles": 4, - "L1": 1, - "verbose": 30, - "maxRequestDelay" : "1000000", -} - -l2_params = { - "cache_frequency": clock, - "cache_size": "256KiB", - "associativity": 8, - "access_latency_cycles": 6, - "mshr_num_entries" : 16, -} - -l3_params = { - "access_latency_cycles" : "12", - "cache_frequency" : clock, - "associativity" : "16", - "cache_size" : "2MB", - "mshr_num_entries" : "4096", - "num_cache_slices" : 1, - "slice_allocation_policy" : "rr", -} - -link_params = { - "shared_memory": shared_memory, -# "node": 0, -} - -nic_params = { - "shared_memory": shared_memory, -# "node": 0, - "network_bw": "96GiB/s", - "local_memory_size" : local_memory_capacity*1024*1024, -} - - - -class Network: - def __init__(self, name,networkId,input_latency,output_latency): - self.name = name - self.ports = 0 - self.rtr = sst.Component("rtr_%s"%name, "merlin.hr_router") - self.rtr.addParams({ - "id": networkId, - #"topology": "merlin.singlerouter", - "link_bw" : "80GiB/s", - "xbar_bw" : "80GiB/s", - "flit_size" : "8B", - "input_latency" : input_latency, - "output_latency" : output_latency, - "input_buf_size" : "1KB", - "output_buf_size" : "1KB", - }) - - topo = self.rtr.setSubComponent("topology", "merlin.singlerouter") - - def getNextPort(self): - self.ports += 1 - self.rtr.addParam("num_ports", self.ports) - return (self.ports-1) - -internal_network_map = {} - -for node in range(nodes): - ariel = sst.Component("node"+str(node)+"_cpu", "ariel.ariel") - ariel.addParams(arielParams) - - # Opal uses this memory manager to intercept memory translation requests, mallocs, mmaps, etc. - memmgr = ariel.setSubComponent("memmgr", "Opal.MemoryManagerOpal") - memmgr.addParams({ - "opal_latency" : "30ps" - }) - # Opal uses this memory manager (for now?) to do the actual translation - submemmgr = memmgr.setSubComponent("translator", "ariel.MemoryManagerSimple") - submemmgr.addParams({ - "pagecount0" : num_pages, - "pagesize0" : page_size * 1024, - }) - - ariel.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) - - # MMU - mmu = sst.Component("node"+str(node)+"_mmu", "Samba") - mmu.addParams(mmuParams) - - # MMU uses this page fault handler - pagefaulthandler = mmu.setSubComponent("pagefaulthandler", "Opal.PageFaultHandler") - pagefaulthandler.addParams({ - "opal_latency" : "30ps" - }) - - mmu.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) - - internal_network = Network("node"+str(node)+"_internal_network",0,"20ps","20ps") - - for next_core in range(cores): - - l1 = sst.Component("node"+str(node)+"_l1cache_" + str(next_core), "memHierarchy.Cache") - l1.addParams(l1_params) - l1_cpulink = l1.setSubComponent("cpulink", "memHierarchy.MemLink") - l1_memlink = l1.setSubComponent("memlink", "memHierarchy.MemLink") - l1_cpulink.addParams(link_params) - l1_memlink.addParams(link_params) - l1_cpulink.addParams({"node": node,}) - l1_memlink.addParams({"node": node,}) - - l2 = sst.Component("node"+str(node)+"_l2cache_" + str(next_core), "memHierarchy.Cache") - l2.addParams(l2_params) - l2_cpulink = l2.setSubComponent("cpulink", "memHierarchy.MemLink") - l2_memlink = l2.setSubComponent("memlink", "Opal.OpalMemNIC") - l2_cpulink.addParams(link_params) - l2_memlink.addParams(nic_params) - l2_cpulink.addParams({ "node" : node}) - l2_memlink.addParams({ "node" : node}) - l2_memlink.addParams({ "group" : 1}) - - arielMMULink = sst.Link("node"+str(node)+"_cpu_mmu_link_" + str(next_core)) - MMUCacheLink = sst.Link("node"+str(node)+"_mmu_cache_link_" + str(next_core)) - PTWMemLink = sst.Link("node"+str(node)+"_ptw_mem_link_" + str(next_core)) - PTWOpalLink = sst.Link("node"+str(node)+"_ptw_opal_" + str(next_core)) - ArielOpalLink = sst.Link("node"+str(node)+"_ariel_opal_" + str(next_core)) - - if next_core < cores//2: - arielMMULink.connect((ariel, "cache_link_%d"%next_core, "300ps"), (mmu, "cpu_to_mmu%d"%next_core, "300ps")) - ArielOpalLink.connect((memmgr, "opal_link_%d"%next_core, "300ps"), (opal, "coreLink%d"%(next_core + node*(cores//2)), "300ps")) - MMUCacheLink.connect((mmu, "mmu_to_cache%d"%next_core, "300ps"), (l1_cpulink, "port", "300ps")) - PTWOpalLink.connect( (pagefaulthandler, "opal_link_%d"%next_core, "300ps"), (opal, "mmuLink%d"%(next_core + node*(cores//2)), "300ps") ) - else: - PTWMemLink.connect((mmu, "ptw_to_mem%d"%(next_core-cores//2), "300ps"), (l1_cpulink, "port", "300ps")) - - l2_core_link = sst.Link("node"+str(node)+"_l2cache_" + str(next_core) + "_link") - l2_core_link.connect((l1_memlink, "port", "300ps"), (l2_cpulink, "port", "300ps")) - - l2_ring_link = sst.Link("node"+str(node)+"_l2_ring_link_" + str(next_core)) - l2_ring_link.connect((l2_memlink, "port", "300ps"), (internal_network.rtr, "port%d"%(internal_network.getNextPort()), "300ps")) - - l3cache = sst.Component("node"+str(node)+"_l3cache", "memHierarchy.Cache") - l3cache.addParams(l3_params) - l3_link = l3cache.setSubComponent("cpulink", "Opal.OpalMemNIC") - l3cache.addParams({ "slice_id" : 0 }) - l3_link.addParams(nic_params) - l3_link.addParams({ - "node" : node, - "group" : 2, - "addr_range_start": 0, - "addr_range_end": (local_memory_capacity*1024*1024) - 1, - "interleave_size": "0B", - }) - - l3_ring_link = sst.Link("node"+str(node)+"_l3_link") - l3_ring_link.connect( (l3_link, "port", "300ps"), (internal_network.rtr, "port%d"%(internal_network.getNextPort()), "300ps")) - - - mem = sst.Component("node"+str(node)+"_local_memory", "memHierarchy.MemController") - mem.addParams({ - "clock" : "1.2GHz", - "backing" : "none", - "backend" : "memHierarchy.timingDRAM", - "backend.id" : 0, - "backend.addrMapper" : "memHierarchy.roundRobinAddrMapper", - "backend.addrMapper.interleave_size" : "64B", - "backend.addrMapper.row_size" : "1KiB", - "backend.clock" : "1.2GHz", - "backend.mem_size" : str(local_memory_capacity) + "MiB", - "backend.channels" : 2, - "backend.channel.numRanks" : 2, - "backend.channel.rank.numBanks" : 16, - "backend.channel.transaction_Q_size" : 32, - "backend.channel.rank.bank.CL" : 14, - "backend.channel.rank.bank.CL_WR" : 12, - "backend.channel.rank.bank.RCD" : 14, - "backend.channel.rank.bank.TRP" : 14, - "backend.channel.rank.bank.dataCycles" : 2, - "backend.channel.rank.bank.pagePolicy" : "memHierarchy.simplePagePolicy", - "backend.channel.rank.bank.transactionQ" : "memHierarchy.fifoTransactionQ", - "backend.channel.rank.bank.pagePolicy.close" : 1, - }) - mem_link = mem.setSubComponent("cpulink", "memHierarchy.MemLink") - mem_link.addParams({ - "shared_memory": 1, - "node" : 0 - }) - - dc = sst.Component("node"+str(node)+"_dc", "memHierarchy.DirectoryController") - dc.addParams({ - "entry_cache_size": 256*1024*1024, #Entry cache size of mem/blocksize - "clock": "200MHz", - #"debug" : 1, - #"debug_level" : 10, - }) - - dc_cpulink = dc.setSubComponent("cpulink", "Opal.OpalMemNIC") - dc_memlink = dc.setSubComponent("memlink", "memHierarchy.MemLink") - dc_memlink.addParams(link_params) - dc_cpulink.addParams(nic_params) - dc_cpulink.addParams({ - "node" : node, - "group" : 3, - "addr_range_start" : 0, - "addr_range_end" : (local_memory_capacity*1024*1024)-1, - "interleave_size": "0B", - #"debug" : 1, - #"debug_level" : 10, - }) - - memLink = sst.Link("node"+str(node)+"_mem_link") - memLink.connect((mem_link, "port", "300ps"), (dc_memlink, "port", "300ps")) - - netLink = sst.Link("node"+str(node)+"_dc_link") - netLink.connect((dc_cpulink, "port", "300ps"), (internal_network.rtr, "port%d"%(internal_network.getNextPort()), "300ps")) - - internal_network_map[str(node)] = internal_network - - - -# External memory configuration - -external_network = Network("Ext_Mem_Net",1,"20ns","20ns") -port = external_network.getNextPort() - -ext_mem = sst.Component("ExternalNVMmemContr", "memHierarchy.MemController") -ext_mem.addParams({ - "memory_size" : str(shared_memory_capacity) + "MB", - "max_requests_per_cycle" : 4, - "backing" : "none", - "clock" : clock, -}) - -ext_memory = ext_mem.setSubComponent("backend", "memHierarchy.Messier") -ext_memory.addParams({ - "max_requests_per_cycle" : 4, - "mem_size" : str(shared_memory_capacity) + "MB", - "clock" : clock, -}) - -ext_mem_link = ext_mem.setSubComponent("cpulink", "memHierarchy.MemLink") -ext_mem_link.addParams({ "node" : 9999, }) ## does not belong to any node - -ext_dc = sst.Component("ExtMemDc", "memHierarchy.DirectoryController") -ext_dc.addParams({ - "entry_cache_size": 256*1024*1024, #Entry cache size of mem/blocksize - "clock": "1GHz", -}) -ext_dc_cpulink = ext_dc.setSubComponent("cpulink", "Opal.OpalMemNIC") -ext_dc_memlink = ext_dc.setSubComponent("memlink", "memHierarchy.MemLink") -ext_dc_cpulink.addParams({ - "network_bw": "80GiB/s", - "addr_range_start" : (local_memory_capacity*1024*1024), - "addr_range_end" : (local_memory_capacity*1024*1024) + (shared_memory_capacity*1024*1024) -1, - "node": 9999, - "group" : 3, # TODO is this the right routing group? means sources are all components in group 2 and dests are all components in group 4 -}) - - -messier = sst.Component("ExternalMem" , "Messier") -messier.addParams({ - "clock" : clock, - "tCL" : 30, - "tRCD" : 300, - "tCL_W" : 1000, - "write_buffer_size" : 32, - "flush_th" : 90, - "num_banks" : 16, - "max_outstanding" : 16, - "max_writes" : "4", - "max_current_weight" : 32*50, - "read_weight" : "5", - "write_weight" : "5", - "cacheline_interleaving" : 0, -}) - -link_nvm_bus_link = sst.Link("External_mem_nvm_link") -link_nvm_bus_link.connect( (messier, "bus", "50ps"), (ext_memory, "nvm_link", "50ps") ) - -extmemLink = sst.Link("External_mem_dc_link") -extmemLink.connect( (ext_dc_memlink, "port", "500ps"), (ext_mem_link, "port", "500ps") ) - -ext_dcLink = sst.Link("External_mem_link") -ext_dcLink.connect( (ext_dc_cpulink, "port", "500ps"), (external_network.rtr, "port%d"%port, "500ps") ) - - - -# Connecting Internal and External network -def bridge(net0, net1): - net0port = net0.getNextPort() - net1port = net1.getNextPort() - name = "%s-%s"%(net0.name, net1.name) - bridge = sst.Component("Bridge:%s"%name, "merlin.Bridge") - bridge.addParams({ - "translator": "memHierarchy.MemNetBridge", - "network_bw" : "80GiB/s", - }) - link = sst.Link("B0-%s"%name) - link.connect( (bridge, "network0", "500ps"), (net0.rtr, "port%d"%net0port, "500ps") ) - link = sst.Link("B1-%s"%name) - link.connect( (bridge, "network1", "500ps"), (net1.rtr, "port%d"%net1port, "500ps") ) - - -for node in range(nodes): - midnet = Network("node"+str(node)+"_Bridge",3,"50ps","50ps") - bridge(internal_network_map[str(node)], midnet) - bridge(external_network, midnet) - - diff --git a/src/sst/elements/Opal/tests/refFiles/test_Opal_basic_1node_1smp.out b/src/sst/elements/Opal/tests/refFiles/test_Opal_basic_1node_1smp.out deleted file mode 100644 index c6238805ac..0000000000 --- a/src/sst/elements/Opal/tests/refFiles/test_Opal_basic_1node_1smp.out +++ /dev/null @@ -1,125 +0,0 @@ -ArielComponent[arielcpu.cc:38:ArielCPU] Creating Ariel component... -ArielComponent[arielcpu.cc:44:ArielCPU] Configuring for 2 cores... -ArielComponent[arielcpu.cc:47:ArielCPU] Configuring for check addresses = no -ArielComponent[arielcpu.cc:120:ArielCPU] Loaded memory manager: cpu:memmgr -ArielComponent[arielcpu.cc:134:ArielCPU] Memory manager construction is completed. -Pin2Frontend[frontend/simple/pin2frontend.cc:75:Pin2Frontend] Model specifies that there are 0 application arguments -Pin2Frontend[frontend/simple/pin2frontend.cc:82:Pin2Frontend] Interception and instrumentation of multi-level memory and malloc/free calls is DISABLED. -Pin2Frontend[frontend/simple/pin2frontend.cc:90:Pin2Frontend] Tracking the stack and dumping on malloc calls is DISABLED. -Pin2Frontend[frontend/simple/pin2frontend.cc:95:Pin2Frontend] Malloc map file is DISABLED -Pin2Frontend[frontend/simple/pin2frontend.cc:104:Pin2Frontend] Base pipe name: /sst_shmem_57403-0-1681692777 -Pin2Frontend[frontend/simple/pin2frontend.cc:128:Pin2Frontend] Processing application arguments... -Pin2Frontend[frontend/simple/pin2frontend.cc:258:Pin2Frontend] Completed processing application arguments. -Pin2Frontend[frontend/simple/pin2frontend.cc:263:Pin2Frontend] Completed initialization of the Ariel CPU. -ArielComponent[arielcpu.cc:170:ArielCPU] Registering ArielCPU clock at 2GHz -ArielComponent[arielcpu.cc:174:ArielCPU] Clocks registered. -ArielComponent[arielcpu.cc:176:ArielCPU] Creating core to cache links... -ArielComponent[arielcpu.cc:178:ArielCPU] Creating processor cores and cache links... -ArielComponent[arielcpu.cc:180:ArielCPU] Configuring cores and cache links... -ArielComponent[arielcpu.cc:239:ArielCPU] Completed initialization of the Ariel CPU. -Initialized with 2 cores -Before initialization -Assigning the PTW correctly -mmu:pagefaulthandler register handler -Assigning the PTW correctly -mmu:pagefaulthandler register handler -After initialization -l2cache_0: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -l2cache_1: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -l2cache_2: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -l2cache_3: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -l3cache: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 3 cycles. -local_memory, WARNING: loading backend in legacy mode (from parameter set). Instead, load backend into this controller's 'backend' slot via ctrl.setSubComponent() in configuration. -0:TimingDRAM::build():57:mc=0: number of channels: 2 -0:TimingDRAM::build():58:mc=0: address mapper: memHierarchy.roundRobinAddrMapper -0:TimingDRAM:Channel:Channel():116:mc=0:chan=0: max pending trans: 32 -0:TimingDRAM:Channel:Channel():117:mc=0:chan=0: number of ranks: 2 -0:TimingDRAM:Rank:Rank():226:mc=0:chan=0:rank=0: number of banks: 16 -0:TimingDRAM:Bank:Bank():294:mc=0:chan=0:rank=0:bank=0: CL: 14 -0:TimingDRAM:Bank:Bank():295:mc=0:chan=0:rank=0:bank=0: CL_WR: 12 -0:TimingDRAM:Bank:Bank():296:mc=0:chan=0:rank=0:bank=0: RCD: 14 -0:TimingDRAM:Bank:Bank():297:mc=0:chan=0:rank=0:bank=0: TRP: 14 -0:TimingDRAM:Bank:Bank():298:mc=0:chan=0:rank=0:bank=0: dataCycles: 2 -0:TimingDRAM:Bank:Bank():299:mc=0:chan=0:rank=0:bank=0: transactionQ: memHierarchy.fifoTransactionQ -0:TimingDRAM:Bank:Bank():300:mc=0:chan=0:rank=0:bank=0: pagePolicy: memHierarchy.simplePagePolicy -dc, Warning: getting region parameters (addr_range_start/end, interleave_step/size) from link subcomponent. In the future this will not be supported and region parameters should be declared in the directory's parameters instead. -ExtMemDc, Warning: getting region parameters (addr_range_start/end, interleave_step/size) from link subcomponent. In the future this will not be supported and region parameters should be declared in the directory's parameters instead. -The value of tRCD is 300 -After initialization -SSTARIEL: Loading Ariel Tool to connect to SST on pipe: /sst_shmem_57403-0-1681692777 max core count: 2 -SSTARIEL: Function profiling is disabled. -Pin2Frontend[frontend/simple/pin2frontend.cc:270:init] Launching PIN... -Pin2Frontend[frontend/simple/pin2frontend.cc:326:forkPINChild] Executing PIN command: /home/vamsee/SST_PORTED/pin-2.14-71313-gcc.4.4.7-linux/pin.sh -follow_execv -ifeellucky -t /home/vamsee/forked/BUILD_ELEMENTS/libexec/fesimple.so -w 0 -E 1 -p /sst_shmem_57403-0-1681692777 -v 1 -t 0 -c 2 -s 0 -m 0 -k 0 -d 0 -- ./app/opal_test -Pin2Frontend[frontend/simple/pin2frontend.cc:275:init] Returned from launching PIN. Waiting for child to attach. -Pin2Frontend[frontend/simple/pin2frontend.cc:278:init] Child has attached! -ArielComponent[arielcpu.cc:257:finish] Ariel Processor Information: -ArielComponent[arielcpu.cc:258:finish] Completed at: 1080738 nanoseconds. -ArielComponent[arielcpu.cc:259:finish] Ariel Component Statistics (By Core) - -Ariel Memory Management Statistics: ---------------------------------------------------------------------- -Page Table Sizes: -- Map entries 38 -Page Table Coverages: -- Bytes 155648 - cpu.read_requests.0 : Accumulator : Sum.u64 = 2865; SumSQ.u64 = 2865; Count.u64 = 2865; Min.u64 = 1; Max.u64 = 1; - cpu.write_requests.0 : Accumulator : Sum.u64 = 1337; SumSQ.u64 = 1337; Count.u64 = 1337; Min.u64 = 1; Max.u64 = 1; - cpu.read_request_sizes.0 : Accumulator : Sum.u64 = 16664; SumSQ.u64 = 124680; Count.u64 = 2865; Min.u64 = 1; Max.u64 = 64; - cpu.write_request_sizes.0 : Accumulator : Sum.u64 = 9662; SumSQ.u64 = 83528; Count.u64 = 1337; Min.u64 = 1; Max.u64 = 64; - cpu.split_read_requests.0 : Accumulator : Sum.u64 = 2; SumSQ.u64 = 2; Count.u64 = 2; Min.u64 = 1; Max.u64 = 1; - cpu.split_write_requests.0 : Accumulator : Sum.u64 = 1; SumSQ.u64 = 1; Count.u64 = 1; Min.u64 = 1; Max.u64 = 1; - cpu.flush_requests.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fence_requests.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.no_ops.0 : Accumulator : Sum.u64 = 5798; SumSQ.u64 = 5798; Count.u64 = 5798; Min.u64 = 1; Max.u64 = 1; - cpu.instruction_count.0 : Accumulator : Sum.u64 = 10000; SumSQ.u64 = 10000; Count.u64 = 10000; Min.u64 = 1; Max.u64 = 1; - cpu.cycles.0 : Accumulator : Sum.u64 = 2161476; SumSQ.u64 = 2161476; Count.u64 = 2161476; Min.u64 = 1; Max.u64 = 1; - cpu.active_cycles.0 : Accumulator : Sum.u64 = 13377; SumSQ.u64 = 13377; Count.u64 = 13377; Min.u64 = 1; Max.u64 = 1; - cpu.fp_sp_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_dp_ins.0 : Accumulator : Sum.u64 = 122; SumSQ.u64 = 122; Count.u64 = 122; Min.u64 = 1; Max.u64 = 1; - cpu.fp_sp_simd_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_dp_simd_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_sp_scalar_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_dp_scalar_ins.0 : Accumulator : Sum.u64 = 122; SumSQ.u64 = 122; Count.u64 = 122; Min.u64 = 1; Max.u64 = 1; - cpu.fp_sp_ops.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_dp_ops.0 : Accumulator : Sum.u64 = 122; SumSQ.u64 = 122; Count.u64 = 122; Min.u64 = 1; Max.u64 = 1; - cpu.read_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.write_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.read_request_sizes.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.write_request_sizes.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.split_read_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.split_write_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.flush_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fence_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.no_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.instruction_count.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.cycles.1 : Accumulator : Sum.u64 = 2161475; SumSQ.u64 = 2161475; Count.u64 = 2161475; Min.u64 = 1; Max.u64 = 1; - cpu.active_cycles.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_sp_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_dp_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_sp_simd_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_dp_simd_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_sp_scalar_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_dp_scalar_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_sp_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_dp_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_hits.Core0_PTWC : Accumulator : Sum.u64 = 8; SumSQ.u64 = 8; Count.u64 = 8; Min.u64 = 1; Max.u64 = 1; - mmu.tlb_misses.Core0_PTWC : Accumulator : Sum.u64 = 42; SumSQ.u64 = 42; Count.u64 = 42; Min.u64 = 1; Max.u64 = 1; - mmu.total_waiting.0 : Accumulator : Sum.u64 = 56937; SumSQ.u64 = 40552311; Count.u64 = 4205; Min.u64 = 1; Max.u64 = 2170; - mmu.tlb_hits.Core0_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_misses.Core0_L2 : Accumulator : Sum.u64 = 50; SumSQ.u64 = 50; Count.u64 = 50; Min.u64 = 1; Max.u64 = 1; - mmu.tlb_shootdown.Core0_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_hits.Core0_L1 : Accumulator : Sum.u64 = 4095; SumSQ.u64 = 4095; Count.u64 = 4095; Min.u64 = 1; Max.u64 = 1; - mmu.tlb_misses.Core0_L1 : Accumulator : Sum.u64 = 110; SumSQ.u64 = 110; Count.u64 = 110; Min.u64 = 1; Max.u64 = 1; - mmu.tlb_shootdown.Core0_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_hits.Core1_PTWC : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_misses.Core1_PTWC : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.total_waiting.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_hits.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_misses.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_shootdown.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_hits.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_misses.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_shootdown.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - opal.local_mem_usage.0 : Accumulator : Sum.u64 = 28; SumSQ.u64 = 28; Count.u64 = 28; Min.u64 = 1; Max.u64 = 1; - opal.shared_mem_usage.0 : Accumulator : Sum.u64 = 27; SumSQ.u64 = 27; Count.u64 = 27; Min.u64 = 1; Max.u64 = 1; -Simulation is complete, simulated time: 1.08074 ms diff --git a/src/sst/elements/Opal/tests/refFiles/test_Opal_basic_2node_1smp.out b/src/sst/elements/Opal/tests/refFiles/test_Opal_basic_2node_1smp.out deleted file mode 100644 index 72626d7588..0000000000 --- a/src/sst/elements/Opal/tests/refFiles/test_Opal_basic_2node_1smp.out +++ /dev/null @@ -1,234 +0,0 @@ -ArielComponent[arielcpu.cc:38:ArielCPU] Creating Ariel component... -ArielComponent[arielcpu.cc:44:ArielCPU] Configuring for 2 cores... -ArielComponent[arielcpu.cc:47:ArielCPU] Configuring for check addresses = no -ArielComponent[arielcpu.cc:120:ArielCPU] Loaded memory manager: node0_cpu:memmgr -ArielComponent[arielcpu.cc:134:ArielCPU] Memory manager construction is completed. -Pin2Frontend[frontend/simple/pin2frontend.cc:75:Pin2Frontend] Model specifies that there are 0 application arguments -Pin2Frontend[frontend/simple/pin2frontend.cc:82:Pin2Frontend] Interception and instrumentation of multi-level memory and malloc/free calls is DISABLED. -Pin2Frontend[frontend/simple/pin2frontend.cc:90:Pin2Frontend] Tracking the stack and dumping on malloc calls is DISABLED. -Pin2Frontend[frontend/simple/pin2frontend.cc:95:Pin2Frontend] Malloc map file is DISABLED -Pin2Frontend[frontend/simple/pin2frontend.cc:104:Pin2Frontend] Base pipe name: /sst_shmem_57429-1-1681692777 -Pin2Frontend[frontend/simple/pin2frontend.cc:128:Pin2Frontend] Processing application arguments... -Pin2Frontend[frontend/simple/pin2frontend.cc:258:Pin2Frontend] Completed processing application arguments. -Pin2Frontend[frontend/simple/pin2frontend.cc:263:Pin2Frontend] Completed initialization of the Ariel CPU. -ArielComponent[arielcpu.cc:170:ArielCPU] Registering ArielCPU clock at 2GHz -ArielComponent[arielcpu.cc:174:ArielCPU] Clocks registered. -ArielComponent[arielcpu.cc:176:ArielCPU] Creating core to cache links... -ArielComponent[arielcpu.cc:178:ArielCPU] Creating processor cores and cache links... -ArielComponent[arielcpu.cc:180:ArielCPU] Configuring cores and cache links... -ArielComponent[arielcpu.cc:239:ArielCPU] Completed initialization of the Ariel CPU. -Initialized with 2 cores -Before initialization -Assigning the PTW correctly -node0_mmu:pagefaulthandler register handler -Assigning the PTW correctly -node0_mmu:pagefaulthandler register handler -After initialization -node0_l2cache_0: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -node0_l2cache_1: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -node0_l2cache_2: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -node0_l2cache_3: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -node0_l3cache: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 3 cycles. -node0_local_memory, WARNING: loading backend in legacy mode (from parameter set). Instead, load backend into this controller's 'backend' slot via ctrl.setSubComponent() in configuration. -0:TimingDRAM::build():57:mc=0: number of channels: 2 -0:TimingDRAM::build():58:mc=0: address mapper: memHierarchy.roundRobinAddrMapper -0:TimingDRAM:Channel:Channel():116:mc=0:chan=0: max pending trans: 32 -0:TimingDRAM:Channel:Channel():117:mc=0:chan=0: number of ranks: 2 -0:TimingDRAM:Rank:Rank():226:mc=0:chan=0:rank=0: number of banks: 16 -0:TimingDRAM:Bank:Bank():294:mc=0:chan=0:rank=0:bank=0: CL: 14 -0:TimingDRAM:Bank:Bank():295:mc=0:chan=0:rank=0:bank=0: CL_WR: 12 -0:TimingDRAM:Bank:Bank():296:mc=0:chan=0:rank=0:bank=0: RCD: 14 -0:TimingDRAM:Bank:Bank():297:mc=0:chan=0:rank=0:bank=0: TRP: 14 -0:TimingDRAM:Bank:Bank():298:mc=0:chan=0:rank=0:bank=0: dataCycles: 2 -0:TimingDRAM:Bank:Bank():299:mc=0:chan=0:rank=0:bank=0: transactionQ: memHierarchy.fifoTransactionQ -0:TimingDRAM:Bank:Bank():300:mc=0:chan=0:rank=0:bank=0: pagePolicy: memHierarchy.simplePagePolicy -node0_dc, Warning: getting region parameters (addr_range_start/end, interleave_step/size) from link subcomponent. In the future this will not be supported and region parameters should be declared in the directory's parameters instead. -ArielComponent[arielcpu.cc:38:ArielCPU] Creating Ariel component... -ArielComponent[arielcpu.cc:44:ArielCPU] Configuring for 2 cores... -ArielComponent[arielcpu.cc:47:ArielCPU] Configuring for check addresses = no -ArielComponent[arielcpu.cc:120:ArielCPU] Loaded memory manager: node1_cpu:memmgr -ArielComponent[arielcpu.cc:134:ArielCPU] Memory manager construction is completed. -Pin2Frontend[frontend/simple/pin2frontend.cc:75:Pin2Frontend] Model specifies that there are 0 application arguments -Pin2Frontend[frontend/simple/pin2frontend.cc:82:Pin2Frontend] Interception and instrumentation of multi-level memory and malloc/free calls is DISABLED. -Pin2Frontend[frontend/simple/pin2frontend.cc:90:Pin2Frontend] Tracking the stack and dumping on malloc calls is DISABLED. -Pin2Frontend[frontend/simple/pin2frontend.cc:95:Pin2Frontend] Malloc map file is DISABLED -Pin2Frontend[frontend/simple/pin2frontend.cc:104:Pin2Frontend] Base pipe name: /sst_shmem_57429-15-1714636915 -Pin2Frontend[frontend/simple/pin2frontend.cc:128:Pin2Frontend] Processing application arguments... -Pin2Frontend[frontend/simple/pin2frontend.cc:258:Pin2Frontend] Completed processing application arguments. -Pin2Frontend[frontend/simple/pin2frontend.cc:263:Pin2Frontend] Completed initialization of the Ariel CPU. -ArielComponent[arielcpu.cc:170:ArielCPU] Registering ArielCPU clock at 2GHz -ArielComponent[arielcpu.cc:174:ArielCPU] Clocks registered. -ArielComponent[arielcpu.cc:176:ArielCPU] Creating core to cache links... -ArielComponent[arielcpu.cc:178:ArielCPU] Creating processor cores and cache links... -ArielComponent[arielcpu.cc:180:ArielCPU] Configuring cores and cache links... -ArielComponent[arielcpu.cc:239:ArielCPU] Completed initialization of the Ariel CPU. -Initialized with 2 cores -Before initialization -Assigning the PTW correctly -node1_mmu:pagefaulthandler register handler -Assigning the PTW correctly -node1_mmu:pagefaulthandler register handler -After initialization -node1_l2cache_0: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -node1_l2cache_1: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -node1_l2cache_2: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -node1_l2cache_3: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -node1_l3cache: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 3 cycles. -node1_local_memory, WARNING: loading backend in legacy mode (from parameter set). Instead, load backend into this controller's 'backend' slot via ctrl.setSubComponent() in configuration. -node1_dc, Warning: getting region parameters (addr_range_start/end, interleave_step/size) from link subcomponent. In the future this will not be supported and region parameters should be declared in the directory's parameters instead. -ExtMemDc, Warning: getting region parameters (addr_range_start/end, interleave_step/size) from link subcomponent. In the future this will not be supported and region parameters should be declared in the directory's parameters instead. -The value of tRCD is 300 -After initialization -SSTARIEL: Loading Ariel Tool to connect to SST on pipe: /sst_shmem_57429-1-1681692777 max core count: 2 -SSTARIEL: Function profiling is disabled. -SSTARIEL: Loading Ariel Tool to connect to SST on pipe: /sst_shmem_57429-15-1714636915 max core count: 2 -SSTARIEL: Function profiling is disabled. -Pin2Frontend[frontend/simple/pin2frontend.cc:270:init] Launching PIN... -Pin2Frontend[frontend/simple/pin2frontend.cc:326:forkPINChild] Executing PIN command: /home/vamsee/SST_PORTED/pin-2.14-71313-gcc.4.4.7-linux/pin.sh -follow_execv -ifeellucky -t /home/vamsee/forked/BUILD_ELEMENTS/libexec/fesimple.so -w 0 -E 1 -p /sst_shmem_57429-1-1681692777 -v 1 -t 0 -c 2 -s 0 -m 0 -k 0 -d 0 -- ./app/opal_test -Pin2Frontend[frontend/simple/pin2frontend.cc:275:init] Returned from launching PIN. Waiting for child to attach. -Pin2Frontend[frontend/simple/pin2frontend.cc:278:init] Child has attached! -Pin2Frontend[frontend/simple/pin2frontend.cc:270:init] Launching PIN... -Pin2Frontend[frontend/simple/pin2frontend.cc:326:forkPINChild] Executing PIN command: /home/vamsee/SST_PORTED/pin-2.14-71313-gcc.4.4.7-linux/pin.sh -follow_execv -ifeellucky -t /home/vamsee/forked/BUILD_ELEMENTS/libexec/fesimple.so -w 0 -E 1 -p /sst_shmem_57429-15-1714636915 -v 1 -t 0 -c 2 -s 0 -m 0 -k 0 -d 0 -- ./app/opal_test -Pin2Frontend[frontend/simple/pin2frontend.cc:275:init] Returned from launching PIN. Waiting for child to attach. -Pin2Frontend[frontend/simple/pin2frontend.cc:278:init] Child has attached! -ArielComponent[arielcpu.cc:257:finish] Ariel Processor Information: -ArielComponent[arielcpu.cc:258:finish] Completed at: 657421 nanoseconds. -ArielComponent[arielcpu.cc:259:finish] Ariel Component Statistics (By Core) - -Ariel Memory Management Statistics: ---------------------------------------------------------------------- -Page Table Sizes: -- Map entries 37 -Page Table Coverages: -- Bytes 151552 -ArielComponent[arielcpu.cc:257:finish] Ariel Processor Information: -ArielComponent[arielcpu.cc:258:finish] Completed at: 657421 nanoseconds. -ArielComponent[arielcpu.cc:259:finish] Ariel Component Statistics (By Core) - -Ariel Memory Management Statistics: ---------------------------------------------------------------------- -Page Table Sizes: -- Map entries 37 -Page Table Coverages: -- Bytes 151552 - opal.local_mem_usage.0 : Accumulator : Sum.u64 = 28; SumSQ.u64 = 28; Count.u64 = 28; Min.u64 = 1; Max.u64 = 1; - opal.shared_mem_usage.0 : Accumulator : Sum.u64 = 27; SumSQ.u64 = 27; Count.u64 = 27; Min.u64 = 1; Max.u64 = 1; - opal.local_mem_usage.1 : Accumulator : Sum.u64 = 27; SumSQ.u64 = 27; Count.u64 = 27; Min.u64 = 1; Max.u64 = 1; - opal.shared_mem_usage.1 : Accumulator : Sum.u64 = 26; SumSQ.u64 = 26; Count.u64 = 26; Min.u64 = 1; Max.u64 = 1; - node0_cpu.read_requests.0 : Accumulator : Sum.u64 = 2865; SumSQ.u64 = 2865; Count.u64 = 2865; Min.u64 = 1; Max.u64 = 1; - node0_cpu.write_requests.0 : Accumulator : Sum.u64 = 1338; SumSQ.u64 = 1338; Count.u64 = 1338; Min.u64 = 1; Max.u64 = 1; - node0_cpu.read_request_sizes.0 : Accumulator : Sum.u64 = 16664; SumSQ.u64 = 124680; Count.u64 = 2865; Min.u64 = 1; Max.u64 = 64; - node0_cpu.write_request_sizes.0 : Accumulator : Sum.u64 = 9670; SumSQ.u64 = 83592; Count.u64 = 1338; Min.u64 = 1; Max.u64 = 64; - node0_cpu.split_read_requests.0 : Accumulator : Sum.u64 = 2; SumSQ.u64 = 2; Count.u64 = 2; Min.u64 = 1; Max.u64 = 1; - node0_cpu.split_write_requests.0 : Accumulator : Sum.u64 = 1; SumSQ.u64 = 1; Count.u64 = 1; Min.u64 = 1; Max.u64 = 1; - node0_cpu.flush_requests.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fence_requests.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.no_ops.0 : Accumulator : Sum.u64 = 5798; SumSQ.u64 = 5798; Count.u64 = 5798; Min.u64 = 1; Max.u64 = 1; - node0_cpu.instruction_count.0 : Accumulator : Sum.u64 = 10001; SumSQ.u64 = 10001; Count.u64 = 10001; Min.u64 = 1; Max.u64 = 1; - node0_cpu.cycles.0 : Accumulator : Sum.u64 = 60312; SumSQ.u64 = 60312; Count.u64 = 60312; Min.u64 = 1; Max.u64 = 1; - node0_cpu.active_cycles.0 : Accumulator : Sum.u64 = 13432; SumSQ.u64 = 13432; Count.u64 = 13432; Min.u64 = 1; Max.u64 = 1; - node0_cpu.fp_sp_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_dp_ins.0 : Accumulator : Sum.u64 = 127; SumSQ.u64 = 127; Count.u64 = 127; Min.u64 = 1; Max.u64 = 1; - node0_cpu.fp_sp_simd_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_dp_simd_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_sp_scalar_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_dp_scalar_ins.0 : Accumulator : Sum.u64 = 127; SumSQ.u64 = 127; Count.u64 = 127; Min.u64 = 1; Max.u64 = 1; - node0_cpu.fp_sp_ops.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_dp_ops.0 : Accumulator : Sum.u64 = 127; SumSQ.u64 = 127; Count.u64 = 127; Min.u64 = 1; Max.u64 = 1; - node0_cpu.read_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.write_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.read_request_sizes.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.write_request_sizes.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.split_read_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.split_write_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.flush_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fence_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.no_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.instruction_count.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.cycles.1 : Accumulator : Sum.u64 = 60311; SumSQ.u64 = 60311; Count.u64 = 60311; Min.u64 = 1; Max.u64 = 1; - node0_cpu.active_cycles.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_sp_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_dp_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_sp_simd_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_dp_simd_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_sp_scalar_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_dp_scalar_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_sp_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_dp_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_hits.Core0_PTWC : Accumulator : Sum.u64 = 7; SumSQ.u64 = 7; Count.u64 = 7; Min.u64 = 1; Max.u64 = 1; - node0_mmu.tlb_misses.Core0_PTWC : Accumulator : Sum.u64 = 40; SumSQ.u64 = 40; Count.u64 = 40; Min.u64 = 1; Max.u64 = 1; - node0_mmu.total_waiting.0 : Accumulator : Sum.u64 = 66211; SumSQ.u64 = 52679535; Count.u64 = 4206; Min.u64 = 1; Max.u64 = 2224; - node0_mmu.tlb_hits.Core0_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_misses.Core0_L2 : Accumulator : Sum.u64 = 47; SumSQ.u64 = 47; Count.u64 = 47; Min.u64 = 1; Max.u64 = 1; - node0_mmu.tlb_shootdown.Core0_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_hits.Core0_L1 : Accumulator : Sum.u64 = 4107; SumSQ.u64 = 4107; Count.u64 = 4107; Min.u64 = 1; Max.u64 = 1; - node0_mmu.tlb_misses.Core0_L1 : Accumulator : Sum.u64 = 99; SumSQ.u64 = 99; Count.u64 = 99; Min.u64 = 1; Max.u64 = 1; - node0_mmu.tlb_shootdown.Core0_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_hits.Core1_PTWC : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_misses.Core1_PTWC : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.total_waiting.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_hits.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_misses.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_shootdown.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_hits.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_misses.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_shootdown.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.read_requests.0 : Accumulator : Sum.u64 = 2865; SumSQ.u64 = 2865; Count.u64 = 2865; Min.u64 = 1; Max.u64 = 1; - node1_cpu.write_requests.0 : Accumulator : Sum.u64 = 1338; SumSQ.u64 = 1338; Count.u64 = 1338; Min.u64 = 1; Max.u64 = 1; - node1_cpu.read_request_sizes.0 : Accumulator : Sum.u64 = 16664; SumSQ.u64 = 124680; Count.u64 = 2865; Min.u64 = 1; Max.u64 = 64; - node1_cpu.write_request_sizes.0 : Accumulator : Sum.u64 = 9670; SumSQ.u64 = 83592; Count.u64 = 1338; Min.u64 = 1; Max.u64 = 64; - node1_cpu.split_read_requests.0 : Accumulator : Sum.u64 = 2; SumSQ.u64 = 2; Count.u64 = 2; Min.u64 = 1; Max.u64 = 1; - node1_cpu.split_write_requests.0 : Accumulator : Sum.u64 = 1; SumSQ.u64 = 1; Count.u64 = 1; Min.u64 = 1; Max.u64 = 1; - node1_cpu.flush_requests.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fence_requests.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.no_ops.0 : Accumulator : Sum.u64 = 5798; SumSQ.u64 = 5798; Count.u64 = 5798; Min.u64 = 1; Max.u64 = 1; - node1_cpu.instruction_count.0 : Accumulator : Sum.u64 = 10001; SumSQ.u64 = 10001; Count.u64 = 10001; Min.u64 = 1; Max.u64 = 1; - node1_cpu.cycles.0 : Accumulator : Sum.u64 = 1314843; SumSQ.u64 = 1314843; Count.u64 = 1314843; Min.u64 = 1; Max.u64 = 1; - node1_cpu.active_cycles.0 : Accumulator : Sum.u64 = 12494; SumSQ.u64 = 12494; Count.u64 = 12494; Min.u64 = 1; Max.u64 = 1; - node1_cpu.fp_sp_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_dp_ins.0 : Accumulator : Sum.u64 = 122; SumSQ.u64 = 122; Count.u64 = 122; Min.u64 = 1; Max.u64 = 1; - node1_cpu.fp_sp_simd_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_dp_simd_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_sp_scalar_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_dp_scalar_ins.0 : Accumulator : Sum.u64 = 122; SumSQ.u64 = 122; Count.u64 = 122; Min.u64 = 1; Max.u64 = 1; - node1_cpu.fp_sp_ops.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_dp_ops.0 : Accumulator : Sum.u64 = 122; SumSQ.u64 = 122; Count.u64 = 122; Min.u64 = 1; Max.u64 = 1; - node1_cpu.read_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.write_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.read_request_sizes.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.write_request_sizes.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.split_read_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.split_write_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.flush_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fence_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.no_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.instruction_count.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.cycles.1 : Accumulator : Sum.u64 = 1314842; SumSQ.u64 = 1314842; Count.u64 = 1314842; Min.u64 = 1; Max.u64 = 1; - node1_cpu.active_cycles.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_sp_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_dp_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_sp_simd_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_dp_simd_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_sp_scalar_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_dp_scalar_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_sp_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_dp_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_hits.Core0_PTWC : Accumulator : Sum.u64 = 7; SumSQ.u64 = 7; Count.u64 = 7; Min.u64 = 1; Max.u64 = 1; - node1_mmu.tlb_misses.Core0_PTWC : Accumulator : Sum.u64 = 45; SumSQ.u64 = 45; Count.u64 = 45; Min.u64 = 1; Max.u64 = 1; - node1_mmu.total_waiting.0 : Accumulator : Sum.u64 = 57913; SumSQ.u64 = 42467155; Count.u64 = 4205; Min.u64 = 1; Max.u64 = 2087; - node1_mmu.tlb_hits.Core0_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_misses.Core0_L2 : Accumulator : Sum.u64 = 52; SumSQ.u64 = 52; Count.u64 = 52; Min.u64 = 1; Max.u64 = 1; - node1_mmu.tlb_shootdown.Core0_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_hits.Core0_L1 : Accumulator : Sum.u64 = 4111; SumSQ.u64 = 4111; Count.u64 = 4111; Min.u64 = 1; Max.u64 = 1; - node1_mmu.tlb_misses.Core0_L1 : Accumulator : Sum.u64 = 94; SumSQ.u64 = 94; Count.u64 = 94; Min.u64 = 1; Max.u64 = 1; - node1_mmu.tlb_shootdown.Core0_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_hits.Core1_PTWC : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_misses.Core1_PTWC : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.total_waiting.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_hits.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_misses.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_shootdown.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_hits.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_misses.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_shootdown.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; -Simulation is complete, simulated time: 657.422 us diff --git a/src/sst/elements/serrano/Makefile.am b/src/sst/elements/serrano/Makefile.am deleted file mode 100644 index f645da5627..0000000000 --- a/src/sst/elements/serrano/Makefile.am +++ /dev/null @@ -1,29 +0,0 @@ -# -*- Makefile -*- -# -# - -AM_CPPFLAGS = \ - $(MPI_CPPFLAGS) - -compdir = $(pkglibdir) -comp_LTLIBRARIES = libserrano.la -libserrano_la_SOURCES = \ - scircq.h \ - sercgunit.h \ - seriterunit.h \ - serprintunit.h \ - serrano.cc \ - serrano.h \ - serstdunit.h \ - smsg.h - -EXTRA_DIST = \ - tests/test_serrano.py \ - tests/graphs/sum.graph - -libserrano_la_LDFLAGS = -module -avoid-version - -install-exec-hook: - $(SST_REGISTER_TOOL) SST_ELEMENT_SOURCE serrano=$(abs_srcdir) - $(SST_REGISTER_TOOL) SST_ELEMENT_TESTS serrano=$(abs_srcdir)/tests - diff --git a/src/sst/elements/serrano/scircq.h b/src/sst/elements/serrano/scircq.h deleted file mode 100644 index 5492705f81..0000000000 --- a/src/sst/elements/serrano/scircq.h +++ /dev/null @@ -1,85 +0,0 @@ - -#ifndef _H_SERRANO_CIRC_Q -#define _H_SERRANO_CIRC_Q - -#include - -namespace SST { -namespace Serrano { - -template -class SerranoCircularQueue { -public: - SerranoCircularQueue( const size_t size ) : - max_capacity(size) { - - front = 0; - back = 0; - - count = 0; - data = new T[size]; - } - - ~SerranoCircularQueue() { - delete[] data; - } - - bool empty() { - return (front == back); - } - - bool full() { - return ( safe_inc(back) == front ); - } - - void push(T item) { - data[back] = item; - back = safe_inc(back); - count++; - } - - T peek() { - return data[front]; - } - - T peek( const size_t index ) { - return data[ (front+index) % max_capacity ]; - } - - T pop() { - T temp = data[front]; - front = safe_inc(front); - count--; - return temp; - } - - size_t size() const { - return count; - } - - size_t capacity() const { - return max_capacity; - } - - void clear() { - front = 0; - back = 0; - } - -private: - size_t safe_inc(size_t v) { - return (v+1) % max_capacity; - } - - size_t front; - size_t back; - size_t count; - const size_t max_capacity; - T* data; - -}; - -} -} - -#endif diff --git a/src/sst/elements/serrano/sercgunit.h b/src/sst/elements/serrano/sercgunit.h deleted file mode 100644 index f2f675eff7..0000000000 --- a/src/sst/elements/serrano/sercgunit.h +++ /dev/null @@ -1,74 +0,0 @@ -\ -#ifndef _H_SERRANO_COARSE_UNIT -#define _H_SERRANO_COARSE_UNIT - -#include - -#include -#include -#include - -#include "scircq.h" -#include "smsg.h" - -namespace SST { -namespace Serrano { - -enum SerranoStandardType { - TYPE_INT32, - TYPE_INT64, - TYPE_FP32, - TYPE_FP64, - TYPE_CUSTOM -}; - -class SerranoCoarseUnit : public SST::SubComponent { - -public: - SST_ELI_REGISTER_SUBCOMPONENT_API( SST::Serrano::SerranoCoarseUnit ) - - SerranoCoarseUnit( SST::ComponentId_t id, Params& params ) : - SubComponent(id) { - - int verbosity = params.find("verbose", 0); - char* comp_name = new char[64]; - snprintf(comp_name, 64, "[cgra]: "); - - output = new SST::Output(comp_name, verbosity, 0, Output::STDOUT ); - } - - ~SerranoCoarseUnit() { - delete output; - } - - virtual bool stillProcessing() = 0; - virtual void execute( const uint64_t current_cycle ) = 0; - - void addInputQueue( SerranoCircularQueue* new_q ) { - output->verbose(CALL_INFO, 4, 0, "Added input queue.\n"); - input_qs.push_back( new_q ); - } - - void addOutputQueue( SerranoCircularQueue* new_q ) { - output->verbose(CALL_INFO, 4, 0, "Added output queue.\n"); - output_qs.push_back( new_q ); - } - - virtual const char* getUnitTypeString() = 0; - - size_t countInputQueues() const { return input_qs.size(); } - size_t countOutputQueues() const { return output_qs.size(); } - - virtual void checkRequiredQueues( SST::Output* output ) = 0; - -protected: - SST::Output* output; - std::vector< SerranoCircularQueue* > input_qs; - std::vector< SerranoCircularQueue* > output_qs; - -}; - -} -} - -#endif diff --git a/src/sst/elements/serrano/seriterunit.h b/src/sst/elements/serrano/seriterunit.h deleted file mode 100644 index 6b4a919445..0000000000 --- a/src/sst/elements/serrano/seriterunit.h +++ /dev/null @@ -1,161 +0,0 @@ - -#ifndef _H_SERRANO_ITERATOR_UNIT -#define _H_SERRANO_ITERATOR_UNIT - -#include "sercgunit.h" -#include - -namespace SST { -namespace Serrano { - -class SerranoIteratorUnit : public SerranoCoarseUnit { - -public: - SST_ELI_REGISTER_SUBCOMPONENT_DERIVED( - SST::Serrano::SerranoIteratorUnit, - "serrano", - "SerranoIteratorUnit", - SST_ELI_ELEMENT_VERSION(1, 0, 0), - "Performs iteration-like behavior", - SST::Serrano::SerranoCoarseUnit ) - - SST_ELI_DOCUMENT_PARAMS( - { "start", "Value to start iterating at." }, - { "end", "Value to stop iterating at." }, - { "step", "Value to step the iteration with." }, - { "data_type", "Type of the iteration value" } - ) - - SST_ELI_DOCUMENT_STATISTICS() - - SerranoIteratorUnit( SST::ComponentId_t id, SST::Params& params ) : - SerranoCoarseUnit(id, params) { - - func = nullptr; - keep_processing = true; - - const int params_type = params.find("data_type", 1); - - output->verbose(CALL_INFO, 2, 0, "Creating iterator with data-type: %d\n", params_type ); - - switch(params_type) { - case 1: - d_type = TYPE_INT32; - configureIterations( params.find("start", 0), - params.find("step", 1), - params.find("end", std::numeric_limits::max() ) ); - func = std::bind( &SST::Serrano::SerranoIteratorUnit::execute_int32, this ); - break; - case 2: - d_type = TYPE_INT64; - configureIterations( params.find("start", 0), - params.find("step", 1), - params.find("end", std::numeric_limits::max() ) ); - func = std::bind( &SST::Serrano::SerranoIteratorUnit::execute_int64, this ); - break; - case 4: - d_type = TYPE_FP32; - configureIterations( params.find("start", 0 ), - params.find("step", 1.0 ), - params.find("end", std::numeric_limits::max() ) ); - func = std::bind( &SST::Serrano::SerranoIteratorUnit::execute_fp32, this ); - break; - case 8: - d_type = TYPE_FP64; - configureIterations( params.find("start", 0 ), - params.find("step", 1.0 ), - params.find("end", std::numeric_limits::max() ) ); - func = std::bind( &SST::Serrano::SerranoIteratorUnit::execute_fp64, this ); - break; - default: - output->fatal(CALL_INFO, -1, "Error: unknown data type to process.\n"); - break; - } - } - - ~SerranoIteratorUnit() { - - } - - virtual const char* getUnitTypeString() { - return "ITERATOR"; - } - - virtual bool stillProcessing() { - return keep_processing; - } - - virtual void checkRequiredQueues( SST::Output* output ) { - if( output_qs.size() == 0 ) { - output->fatal(CALL_INFO, -1, "Need an output queue for an iterator to work.\n"); - } - } - - virtual void execute( const uint64_t currentCycle ) { - output->verbose(CALL_INFO, 8, 0, "Executing iteration generator...\n"); - - if( nullptr != func ) { - func(); - } - } - -protected: - SerranoStandardType d_type; - std::function func; - void* current_value; - void* max_value; - void* step_value; - bool keep_processing; - - void execute_int32() { - executeStep(); - } - - void execute_int64() { - executeStep(); - } - - void execute_fp32() { - executeStep(); - } - - void execute_fp64() { - executeStep(); - } - - template void executeStep() { - T* t_current_value = (T*) current_value; - T* t_max_value = (T*) max_value; - T* t_step_value = (T*) step_value; - - if( (*t_current_value) < (*t_max_value) ) { - if( ! output_qs[0]->full() ) { - output_qs[0]->push( new SerranoMessage( sizeof(T), t_current_value ) ); - (*t_current_value) += (*t_step_value); - } - } else { - output->verbose(CALL_INFO, 16, 0, "Hit the upper limit of the iteration value, processing is complete for iterator.\n"); - keep_processing = false; - } - } - - template void configureIterations( const T start, const T step, const T end ) { - current_value = (void*) ( new T[1] ); - max_value = (void*) ( new T[1] ); - step_value = (void*) ( new T[1] ); - - T* t_current_value = (T*) current_value; - T* t_max_value = (T*) max_value; - T* t_step_value = (T*) step_value; - - (*t_current_value ) = start; - (*t_max_value ) = end; - (*t_step_value ) = step; - } - -}; - -} -} - -#endif diff --git a/src/sst/elements/serrano/serprintunit.h b/src/sst/elements/serrano/serprintunit.h deleted file mode 100644 index d89e01ea2b..0000000000 --- a/src/sst/elements/serrano/serprintunit.h +++ /dev/null @@ -1,88 +0,0 @@ - -#ifndef _H_SERRANO_PRINT_UNIT -#define _H_SERRANO_PRINT_UNIT - -#include -#include "sercgunit.h" - -namespace SST { -namespace Serrano { - -class SerranoPrinterUnit : public SerranoCoarseUnit { -public: - SST_ELI_REGISTER_SUBCOMPONENT_DERIVED( - SST::Serrano::SerranoPrinterUnit, - "serrano", - "SerranoPrinterUnit", - SST_ELI_ELEMENT_VERSION(1, 0, 0), - "Performs printing of a value", - SST::Serrano::SerranoCoarseUnit ) - - SST_ELI_DOCUMENT_PARAMS( - - ) - SST_ELI_DOCUMENT_STATISTICS() - - SerranoPrinterUnit( SST::ComponentId_t id, SST::Params& params ) : - SerranoCoarseUnit(id, params) { - - const int param_d_type = params.find("data_type", 0); - - switch( param_d_type ) { - case 1: d_type = TYPE_INT32; break; - case 2: d_type = TYPE_INT64; break; - case 4: d_type = TYPE_FP32; break; - case 8: d_type = TYPE_FP64; break; - } - - } - - virtual bool stillProcessing() { - return false; - } - - virtual void execute( const uint64_t current_cycle ) { - print(); - } - - virtual void checkRequiredQueues( SST::Output* output ) { - if( 0 == input_qs.size() ) { - output->fatal(CALL_INFO, -1, "Error - not enough input queues for a printer unit.\n"); - } - } - - virtual const char* getUnitTypeString() { - return "PRINTER"; - } - -protected: - SerranoStandardType d_type; - - void print() { - if(! input_qs[0]->empty() ) { - SerranoMessage* msg = input_qs[0]->pop(); - - switch(d_type) { - case TYPE_INT32: - output->verbose(CALL_INFO, 0, 0, "%" PRId32 "\n", extractValue(output, msg) ); break; - case TYPE_INT64: - output->verbose(CALL_INFO, 0, 0, "%" PRId64 "\n", extractValue(output, msg) ); break; - case TYPE_FP32: - output->verbose(CALL_INFO, 0, 0, "%f\n", extractValue(output, msg) ); break; - case TYPE_FP64: - output->verbose(CALL_INFO, 0, 0, "%f\n", extractValue(output, msg) ); break; - default: - output->fatal(CALL_INFO, -1, "Unknown data type.\n"); - break; - } - - delete msg; - } - } - -}; - -} -} - -#endif diff --git a/src/sst/elements/serrano/serrano.cc b/src/sst/elements/serrano/serrano.cc deleted file mode 100644 index d0bf588c78..0000000000 --- a/src/sst/elements/serrano/serrano.cc +++ /dev/null @@ -1,256 +0,0 @@ - -#include "serrano.h" - -#include "sercgunit.h" -#include "serstdunit.h" -#include "seriterunit.h" -#include "serprintunit.h" - -#include - -using namespace SST::Serrano; - -SerranoComponent::SerranoComponent( SST::ComponentId_t id, SST::Params& params ) : - Component(id) { - - char* comp_prefix = new char[128]; - snprintf(comp_prefix, 128, "[Ser: %5d] ", (int) id); - - const int verbosity = params.find("verbose", 0); - output = new SST::Output( comp_prefix, verbosity, 0, Output::STDOUT ); - delete[] comp_prefix; - - const std::string clock = params.find("clock", "1GHz"); - output->verbose(CALL_INFO, 2, 0, "Configuring Serrano for clock of %s...\n", clock.c_str()); - registerClock( clock, new Clock::Handler( this, &SerranoComponent::tick ) ); - - constexpr int kernel_name_len = 128; - char* kernel_name = new char[kernel_name_len]; - for( int i = 0; i < std::numeric_limits::max(); ++i ) { - snprintf( kernel_name, kernel_name_len, "kernel%d", i); - std::string kernel_name_file = params.find( kernel_name, "" ); - - if( "" != kernel_name_file) { - output->verbose(CALL_INFO, 4, 0, "Found Kernel (%s): %s\n", kernel_name, kernel_name_file.c_str()); - kernel_queue.push_back( kernel_name_file ); - } else { - break; - } - } - delete[] kernel_name; - - if( kernel_queue.size() > 0 ) { - constructGraph( output, kernel_queue.front().c_str() ); - kernel_queue.pop_front(); - } - - registerAsPrimaryComponent(); - primaryComponentDoNotEndSim(); -} - -SerranoComponent::~SerranoComponent() { - delete output; -} - -bool SerranoComponent::tick( SST::Cycle_t currentCycle ) { - - output->verbose(CALL_INFO, 4, 0, "Clocking Serrano cycle %" PRIu64 "...\n", currentCycle ); - - // Tick all units - for( auto next_unit : units ) { - next_unit.second->execute( currentCycle ); - } - - bool units_continue = false; - bool queues_continue = false; - - // Do we have any units which want to continue processing - for( auto next_unit : units ) { - output->verbose(CALL_INFO, 16, 0, "Unit-ID: %" PRIu64 " status: %s\n", next_unit.first, - ( next_unit.second->stillProcessing() ? "keep-processing" : "completed" ) ); - units_continue |= next_unit.second->stillProcessing(); - } - - // Check that any queue is not empty - for( auto next_q : msg_queues ) { - queues_continue |= ( ! next_q.second->empty() ); - } - - if( units_continue ) { - output->verbose(CALL_INFO, 4, 0, "Work units are still processing, continue for another cycle\n"); - return false; - } else { - if( queues_continue ) { - output->verbose(CALL_INFO, 4, 0, "Queues contain entries that may need processing, continue for another cycle.\n"); - return false; - } else { - output->verbose(CALL_INFO, 4, 0, "Neither queues or units have no work, no need to continue processing.\n"); - primaryComponentOKToEndSim(); - return true; - } - } - -} - -void SerranoComponent::constructGraph( SST::Output* output, const char* kernel_file ) { - output->verbose(CALL_INFO, 4, 0, "Parsing kernel at: %s...\n", kernel_file); - FILE* graph_file = fopen( kernel_file, "rt" ); - - if( nullptr == graph_file ) { - output->fatal(CALL_INFO, -1, "Error: unable to open file: %s\n", kernel_file ); - } - - constexpr int buff_max = 1024; - char* line = new char[buff_max]; - int index = 0; - - Params empty_params; - - while( ! feof( graph_file ) ) { - read_line( graph_file, line, buff_max); - printf("Line[%s]\n", line); - - if( ( 0 == strcmp( line, "" ) ) || ( line[0] == '#') ) { - continue; - } - - char* token = strtok( line, " " ); - char* item_id = strtok( nullptr, " " ); - const uint64_t id = std::atoll( item_id ); - - if( 0 == strcmp( token, "NODE" ) ) { - char* unit_type = strtok( nullptr, " " ); - char* unit_data_type = strtok( nullptr, " " ); - - int iterator_type = 0; - SerranoStandardType node_op_type; - - if( 0 == strcmp( unit_data_type, "INT32" ) ) { - node_op_type = TYPE_INT32; - iterator_type = 1; - } else if( 0 == strcmp( unit_data_type, "INT64" ) ) { - node_op_type = TYPE_INT64; - iterator_type = 2; - } else if( 0 == strcmp( unit_data_type, "FP32" ) ) { - node_op_type = TYPE_FP32; - iterator_type = 4; - } else if( 0 == strcmp( unit_data_type, "FP64" ) ) { - node_op_type = TYPE_FP64; - iterator_type = 8; - } - - Params unit_params; - - char* verbose_param = new char[16]; - snprintf( verbose_param, 16, "%d", output->getVerboseLevel() ); - unit_params.insert( "verbose", verbose_param ); - delete[] verbose_param; - - char* param_name = strtok( nullptr, " " ); - while( nullptr != param_name ) { - char* value = strtok( nullptr, " " ); - output->verbose(CALL_INFO, 4, 0, "param: %s=%s\n", param_name, value); - unit_params.insert( param_name, value ); - param_name = strtok( nullptr, " "); - } - - SerranoCoarseUnit* new_unit = nullptr; - - output->verbose(CALL_INFO, 4, 0, "Creating a new coarse unit type: %s\n", unit_type); - - char* dtype_str = new char[16]; - snprintf( dtype_str, 16, "%d", iterator_type ); - unit_params.insert( "data_type", dtype_str ); - delete[] dtype_str; - - if( 0 == strcmp( unit_type, "ITERATOR" ) ) { - new_unit = loadAnonymousSubComponent( "serrano.SerranoIteratorUnit", "slot", 0, ComponentInfo::SHARE_NONE, unit_params ); - } else if( 0 == strcmp( unit_type, "ADD" ) ) { - new_unit = loadAnonymousSubComponent( "serrano.SerranoBasicUnit", "slot", 0, ComponentInfo::SHARE_NONE, unit_params ); - SerranoBasicUnit* new_unit_basic = (SerranoBasicUnit*) new_unit; - new_unit_basic->configureFunction( output, OP_ADD, node_op_type ); - } else if( 0 == strcmp( unit_type, "SUB" ) ) { - new_unit = loadAnonymousSubComponent( "serrano.SerranoBasicUnit", "slot", 0, ComponentInfo::SHARE_NONE, unit_params ); - SerranoBasicUnit* new_unit_basic = (SerranoBasicUnit*) new_unit; - new_unit_basic->configureFunction( output, OP_SUB, node_op_type ); - } else if( 0 == strcmp( unit_type, "PRINTER" ) ) { - new_unit = loadAnonymousSubComponent( "serrano.SerranoPrinterUnit", "slot", 0, ComponentInfo::SHARE_NONE, unit_params ); - } else { - output->fatal(CALL_INFO, -1, "Error: unable to parse node type (%s)\n", token ); - } - - units.insert( std::pair< uint64_t, SerranoCoarseUnit* >( id, new_unit ) ); - } else if( 0 == strcmp( token, "LINK" ) ) { - char* in_unit = strtok( nullptr, " " ); - char* out_unit = strtok( nullptr, " " ); - - const uint64_t u64_in_unit = std::atoll( in_unit ); - const uint64_t u64_out_unit = std::atoll( out_unit ); - - SerranoCircularQueue* new_q = new SerranoCircularQueue(2); - - if( ( units.find( u64_in_unit ) != units.end() ) && ( units.find( u64_out_unit ) != units.end() ) ) { - output->verbose(CALL_INFO, 4, 0, "Connecting %" PRIu64 " -> %" PRIu64 " (link-id: %" PRIu64 ")\n", - u64_in_unit, u64_out_unit, id); - // These are swapped, input to the link is the output of a unit and vice versa - units[ u64_in_unit ]->addOutputQueue( new_q ); - units[ u64_out_unit ]->addInputQueue( new_q ); - } else { - output->fatal(CALL_INFO, -1, "Error: link does not connect an existing input or output component.\n"); - } - } else { - - } - } - - delete[] line; - fclose( graph_file ); - - /* cycle over and check queues are good, these will fatal */ - for( auto next_unit : units ) { - next_unit.second->checkRequiredQueues( output ); - } -} - -int SerranoComponent::read_line( FILE* file_h, char* buffer, const size_t buffer_max ) { - int status = 0; - int index = 0; - bool keep_looping = true; - - while( keep_looping ) { - char nxt_c = (char) fgetc( file_h ); - - switch( nxt_c ) { - case EOF: - keep_looping = false; - break; - case '\n': - keep_looping = false; - break; - default: - buffer[index++] = nxt_c; - break; - } - } - - buffer[index] = '\0'; - return status; -} - -void SerranoComponent::clearGraph() { - output->verbose(CALL_INFO, 2, 0, "Clearing current graph...\n"); - - for( auto next_q : msg_queues ) { - delete next_q.second;; - } - - msg_queues.clear(); - - for( auto next_unit : units ) { - delete next_unit.second; - } - - units.clear(); - - output->verbose(CALL_INFO, 2, 0, "Graph clear done. Reset is complete\n"); -} diff --git a/src/sst/elements/serrano/serrano.h b/src/sst/elements/serrano/serrano.h deleted file mode 100644 index aaf21477be..0000000000 --- a/src/sst/elements/serrano/serrano.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef _H_SST_SERRANO -#define _H_SST_SERRANO - -#include -#include -#include - -#include - -#include "smsg.h" -#include "scircq.h" -#include "sercgunit.h" - - -namespace SST { -namespace Serrano { - -class SerranoComponent : public SST::Component { - -public: - SerranoComponent( SST::ComponentId_t id, SST::Params& params ); - ~SerranoComponent(); - - bool tick( SST::Cycle_t currentCycle ); - - SST_ELI_REGISTER_COMPONENT( - SerranoComponent, - "serrano", - "Serrano", - SST_ELI_ELEMENT_VERSION( 1, 0, 0 ), - "High-Level CGRA Simulation Model", - COMPONENT_CATEGORY_PROCESSOR - ) - - SST_ELI_DOCUMENT_PARAMS( - - ) - - SST_ELI_DOCUMENT_STATISTICS( - - ) - - void clearGraph(); - void constructGraph( SST::Output* output, const char* kernel_file ); - -private: - int read_line( FILE* file_h, char* buffer, const size_t buffer_max ); - - SST::Output* output; - std::list< std::string > kernel_queue; - std::map< uint64_t, SerranoCoarseUnit* > units; - std::map< uint64_t, SerranoCircularQueue* > msg_queues; - - -}; - -} -} - -#endif diff --git a/src/sst/elements/serrano/serstdunit.h b/src/sst/elements/serrano/serstdunit.h deleted file mode 100644 index b8127ae6cc..0000000000 --- a/src/sst/elements/serrano/serstdunit.h +++ /dev/null @@ -1,178 +0,0 @@ - -#ifndef _H_SERRANO_BINARY_OP_CG_UNIT -#define _H_SERRANO_BINARY_OP_CG_UNIT - -#include - -#include "smsg.h" -#include "sercgunit.h" -#include "scircq.h" - -namespace SST { -namespace Serrano { - -enum SerranoStandardOp { - OP_ADD, - OP_SUB, - OP_DIV, - OP_MUL, - OP_MOD, - OP_MSG_DUPLICATE, - OP_MSG_INTERLEAVE, - OP_CUSTOM -}; - -class SerranoBasicUnit : public SerranoCoarseUnit { - -public: - SST_ELI_REGISTER_SUBCOMPONENT_DERIVED( - SST::Serrano::SerranoBasicUnit, - "serrano", - "SerranoBasicUnit", - SST_ELI_ELEMENT_VERSION(1, 0, 0), - "Basic coarse-grained functional unit for simple operations", - SST::Serrano::SerranoCoarseUnit ) - - SST_ELI_DOCUMENT_PARAMS() - SST_ELI_DOCUMENT_STATISTICS() - - SerranoBasicUnit( SST::ComponentId_t id, Params& params ) : - SerranoCoarseUnit(id, params) { - - required_in_qs = 0; - required_out_qs = 0; - } - - ~SerranoBasicUnit() { - msgs_in.clear(); - } - - void configureFunction( SST::Output* output, SerranoStandardOp op, SerranoStandardType dt ) { - switch( op ) { - case OP_ADD: - switch( dt ) { - case TYPE_INT32: unit_func = std::bind( &SST::Serrano::SerranoBasicUnit::execute_i32_add, this, std::placeholders::_1, std::placeholders::_2 ); break; - case TYPE_INT64: unit_func = std::bind( &SST::Serrano::SerranoBasicUnit::execute_i64_add, this, std::placeholders::_1, std::placeholders::_2 ); break; - case TYPE_FP32: unit_func = std::bind( &SST::Serrano::SerranoBasicUnit::execute_f32_add, this, std::placeholders::_1, std::placeholders::_2 ); break; - case TYPE_FP64: unit_func = std::bind( &SST::Serrano::SerranoBasicUnit::execute_f64_add, this, std::placeholders::_1, std::placeholders::_2 ); break; - default: - output->fatal(CALL_INFO, -1, "Unknown data type supplied to an add operation.\n"); - break; - } - - required_in_qs = 2; - required_out_qs = 1; - - break; - default: - output->verbose(CALL_INFO, 2, 0, "Function was not decoded, and so will not be set. This will likely cause a fatal later in execution.\n"); - } - } - - void checkRequiredQueues( SST::Output* output ) { - if( ( required_in_qs >= input_qs.size() ) && - ( required_out_qs >= output_qs.size() ) ) { - - } else { - output->fatal(CALL_INFO, -1, "Error: required queues were not matched. in (req/av): %d/%d, out (req/av): %d/%d\n", - (int) required_in_qs, (int) input_qs.size(), (int) required_out_qs, (int) output_qs.size() ); - } - } - - virtual const char* getUnitTypeString() { - return "STD-UNIT"; - } - - virtual bool stillProcessing() { return false; } - - virtual void execute( const uint64_t current_cycle ) { - if( nullptr == unit_func ) { - output->fatal(CALL_INFO, -1, "Error: function to execute has not been defined or was not decoded correctly.\n"); - } - - bool all_ins_ready = true; - bool out_ready = (! output_qs[0]->full()); - - for( SerranoCircularQueue* in_q : input_qs ) { - all_ins_ready &= (!in_q->empty()); - } - - if( all_ins_ready & out_ready ) { - // We are good to go, all inputs have a message, output has a slot - for( SerranoCircularQueue* in_q : input_qs ) { - msgs_in.push_back( in_q->pop() ); - } - - // Execute the function - unit_func( output, msgs_in ); - - // Delete the messages from the incoming queues to free memory - for( SerranoMessage* in_msg : msgs_in ) { - delete in_msg; - } - - // Clear the vector this cycle - msgs_in.clear(); - } else { - output->verbose(CALL_INFO, 8, 0, "Unable to execute this cycle due to queue-check failing: in-q: %s / out-q: %s\n", - (all_ins_ready) ? "ready" : "not-ready", (out_ready) ? "ready" : "not-ready" ); - } - } - -protected: - template void execute_add( std::vector& msg_in, const T init_value ) { - T result = init_value; - - for( SerranoMessage* msg : msg_in ) { - result += extractValue( output, msg ); - } - - output_qs[0]->push( constructMessage( result ) ); - } - - template void execute_sub( std::vector& msg_in, const T init_value ) { - T result = init_value; - - for( SerranoMessage* msg : msg_in ) { - result -= extractValue( output, msg ); - } - - output_qs[0]->push( constructMessage( result ) ); - } - - void execute_i32_add( SST::Output* output, std::vector& msg_in ) { - execute_add(msg_in, 0); - } - - void execute_u32_add( SST::Output* output, std::vector& msg_in ) { - execute_add(msg_in, 0); - } - - void execute_i64_add( SST::Output* output, std::vector& msg_in ) { - execute_add(msg_in, 0); - } - - void execute_u64_add( SST::Output* output, std::vector& msg_in ) { - execute_add(msg_in, 0); - } - - void execute_f32_add( SST::Output* output, std::vector& msg_in ) { - execute_add(msg_in, 0.0); - } - - void execute_f64_add( SST::Output* output, std::vector& msg_in ) { - execute_add(msg_in, 0.0); - } - - std::vector msgs_in; - std::function< void( SST::Output*, std::vector& )> unit_func; - - size_t required_in_qs; - size_t required_out_qs; - -}; - -} -} - -#endif diff --git a/src/sst/elements/serrano/smsg.h b/src/sst/elements/serrano/smsg.h deleted file mode 100644 index 03c02d18b4..0000000000 --- a/src/sst/elements/serrano/smsg.h +++ /dev/null @@ -1,74 +0,0 @@ - -#ifndef _H_SERRANO_MESSAGE -#define _H_SERRANO_MESSAGE - -#include -#include - -namespace SST { -namespace Serrano { - -class SerranoMessage { - -public: - SerranoMessage( const size_t size ) : msg_size(size) { - payload = new uint8_t[ msg_size ]; - } - - SerranoMessage( const size_t size, void* ptr ) : msg_size(size) { - payload = new uint8_t[ msg_size ]; - - uint8_t* ptr_u = (uint8_t*) ptr; - - for( size_t i = 0; i < msg_size; ++i ) { - payload[i] = ptr_u[i]; - } - } - - ~SerranoMessage() { - delete[] payload; - } - - size_t getSize() const { return msg_size; } - uint8_t* getPayload() { return payload; } - - void setPayload( const uint8_t* new_data ) { - for( size_t i = 0; i < msg_size; ++i ) { - payload[i] = new_data[i]; - } - } - - void setPayload( const uint8_t* new_data, const size_t new_size ) { - for( size_t i = 0; i < std::min( new_size, msg_size); ++i ) { - payload[i] = new_data[i]; - } - } - -protected: - const size_t msg_size; - uint8_t* payload; - -}; - -template SerranoMessage* constructMessage( T value ) { - SerranoMessage* new_msg = new SerranoMessage( sizeof(T) ); - new_msg->setPayload( (uint8_t*) &value ); - - return new_msg; -}; - -template T extractValue( SST::Output* output, SerranoMessage* msg ) { - if( sizeof(T) == msg->getSize() ) { - return *( (T*) msg->getPayload() ); - } else { - output->fatal(CALL_INFO, -1, "Error: tried to construct a value needing %d bytes from a message with %d bytes in payload.\n", - (int) sizeof(T), (int) msg->getSize()); - - return T(); - } -}; - -} -} - -#endif diff --git a/src/sst/elements/serrano/tests/graphs/sum.graph b/src/sst/elements/serrano/tests/graphs/sum.graph deleted file mode 100644 index 5a64f3c5b4..0000000000 --- a/src/sst/elements/serrano/tests/graphs/sum.graph +++ /dev/null @@ -1,9 +0,0 @@ - -NODE 0 ITERATOR INT32 start 0 step 1 end 100 -NODE 1 ITERATOR INT32 start 100 step 1 end 200 -NODE 2 ADD INT32 -NODE 3 PRINTER INT32 - -LINK 0 0 2 -LINK 1 1 2 -LINK 2 2 3 diff --git a/src/sst/elements/serrano/tests/test_serrano.py b/src/sst/elements/serrano/tests/test_serrano.py deleted file mode 100644 index cf21ccede0..0000000000 --- a/src/sst/elements/serrano/tests/test_serrano.py +++ /dev/null @@ -1,13 +0,0 @@ - -import os -import sst - -# Define SST core options -sst.setProgramOption("timebase", "1ps") -sst.setProgramOption("stopAtCycle", "0s") - -serr_comp = sst.Component("serrano", "serrano.Serrano") -serr_comp.addParams({ - "verbose" : 26, - "kernel0" : "test/graphs/sum.graph" - })