-
Notifications
You must be signed in to change notification settings - Fork 406
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8bb0e34
commit 0018bfa
Showing
69 changed files
with
5,744 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Minimal makefile for Sphinx documentation | ||
# | ||
|
||
# You can set these variables from the command line. | ||
SPHINXOPTS = | ||
SPHINXBUILD = sphinx-build | ||
SOURCEDIR = . | ||
BUILDDIR = _build | ||
|
||
# Put it first so that "make" without argument is like "make help". | ||
help: | ||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) | ||
|
||
.PHONY: help Makefile | ||
|
||
# Catch-all target: route all unknown targets to Sphinx using the new | ||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). | ||
%: Makefile | ||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,368 @@ | ||
@article{Gharachorloo:1990:MCE:325096.325102, | ||
author = {Gharachorloo, Kourosh and Lenoski, Daniel and Laudon, James and Gibbons, Phillip and Gupta, Anoop and Hennessy, John}, | ||
title = {Memory Consistency and Event Ordering in Scalable Shared-memory Multiprocessors}, | ||
journal = {SIGARCH Comput. Archit. News}, | ||
issue_date = {June 1990}, | ||
volume = {18}, | ||
number = {2SI}, | ||
month = may, | ||
year = {1990}, | ||
issn = {0163-5964}, | ||
pages = {15--26}, | ||
numpages = {12}, | ||
url = {http://doi.acm.org/10.1145/325096.325102}, | ||
doi = {10.1145/325096.325102}, | ||
acmid = {325102}, | ||
publisher = {ACM}, | ||
address = {New York, NY, USA}, | ||
} | ||
@inproceedings{seznec2002design, | ||
title={Design tradeoffs for the Alpha EV8 conditional branch predictor}, | ||
author={Seznec, Andr{\'e} and Felix, Stephen and Krishnan, Venkata and Sazeides, Yiannakis}, | ||
booktitle={Computer Architecture, 2002. Proceedings. 29th Annual International Symposium on}, | ||
pages={295--306}, | ||
year={2002}, | ||
organization={IEEE} | ||
} | ||
@article{seznec2006case, | ||
title={A case for (partially) TAgged GEometric history length branch prediction}, | ||
author={Seznec, Andr{\'e} and Michaud, Pierre}, | ||
journal={Journal of Instruction Level Parallelism}, | ||
volume={8}, | ||
pages={1--23}, | ||
year={2006} | ||
} | ||
|
||
@inproceedings{seznec2011new, | ||
title={A new case for the TAGE branch predictor}, | ||
author={Seznec, Andr{\'e}}, | ||
booktitle={Proceedings of the 44th Annual IEEE/ACM International Symposium on Microarchitecture}, | ||
pages={117--127}, | ||
year={2011}, | ||
organization={ACM} | ||
} | ||
|
||
@misc{hwacha, | ||
author = {}, | ||
title = "{The Hwacha Project}", | ||
year = {2015}, | ||
note = {http://hwacha.org} | ||
} | ||
|
||
@misc{gem5, | ||
author = {}, | ||
title = "{Gem5 Visualization}", | ||
year = {2014}, | ||
note = {http://www.m5sim.org/Visualization} | ||
} | ||
|
||
@misc{rocket, | ||
author = {}, | ||
title = "{Rocket Microarchitectural Implementation of RISC-V ISA}", | ||
year = {2016}, | ||
note = {https://github.com/ucb-bar/rocket} | ||
} | ||
|
||
@article{riscv_nature, | ||
title={Single-chip microprocessor that communicates directly using light}, | ||
author={Sun, Chen and Wade, Mark T and Lee, Yunsup and Orcutt, Jason S and Alloatti, Luca and Georgas, Michael S and Waterman, Andrew S and Shainline, Jeffrey M and Avizienis, Rimas R and Lin, Sen and others}, | ||
journal={Nature}, | ||
volume={528}, | ||
number={7583}, | ||
pages={534--538}, | ||
year={2015}, | ||
publisher={Nature Publishing Group} | ||
} | ||
@article{mipsr10k, | ||
author = {K.C. Yeager}, | ||
title = "{The MIPS R10000 Superscalar Microprocessor}", | ||
journal ={IEEE Micro}, | ||
volume = {16}, | ||
number = {2}, | ||
issn = {0272-1732}, | ||
year = {1996}, | ||
pages = {28-41}, | ||
doi = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=491460}, | ||
publisher = {IEEE Computer Society} | ||
} | ||
|
||
|
||
|
||
|
||
@article{alpha21264, | ||
author = {R.E. Kessler}, | ||
title = "{The Alpha 21264 Microprocessor}", | ||
journal ={IEEE Micro}, | ||
volume = {19}, | ||
number = {2}, | ||
issn = {0272-1732}, | ||
year = {1999}, | ||
pages = {24-36}, | ||
doi = {http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=755465}, | ||
publisher = {IEEE Computer Society} | ||
} | ||
|
||
|
||
|
||
@techreport{sam_thesis, | ||
author = {S. Williams}, | ||
title = "{Autotuning Performance on Multicore Computers, PhD thesis}", | ||
institution = {U.C. Berkeley}, | ||
year = {2008} | ||
} | ||
|
||
@techreport{asanovic_thesis, | ||
author = {K. Asanovic}, | ||
title = "{Vector Microprocessors, PhD thesis}", | ||
institution = {U.C. Berkeley}, | ||
year = {1998} | ||
} | ||
|
||
@article{ieee.vectorthread.2004, | ||
author = {Ronny Krashinsky and Christopher Batten and Mark Hampton and Steve Gerding and Brian Pharris and Jared Casper and Krste Asanovic}, | ||
title = {The Vector-Thread Architecture}, | ||
journal ={IEEE Micro}, | ||
volume = {24}, | ||
number = {6}, | ||
issn = {0272-1732}, | ||
year = {2004}, | ||
pages = {84-90}, | ||
doi = {http://doi.ieeecomputersociety.org/10.1109/MM.2004.90}, | ||
publisher = {IEEE Computer Society}, | ||
address = {Los Alamitos, CA, USA}, | ||
} | ||
|
||
@article{roofline_cacm, | ||
author = {Williams,, Samuel and Waterman,, Andrew and Patterson,, David}, | ||
title = "{Roofline: an insightful visual performance model for multicore architectures}", | ||
journal = {Commun. ACM}, | ||
volume = {52}, | ||
number = {4}, | ||
year = {2009}, | ||
issn = {0001-0782}, | ||
pages = {65--76}, | ||
doi = {http://doi.acm.org/10.1145/1498765.1498785}, | ||
publisher = {ACM}, | ||
address = {New York, NY, USA}, | ||
} | ||
|
||
@techreport{berkeley_view, | ||
Author = {Asanovic, Krste and Bodik, Ras and Catanzaro, Bryan Christopher and Gebis, Joseph James and Husbands, Parry and Keutzer, Kurt and Patterson, David A. and Plishker, William Lester and Shalf, John and Williams, Samuel Webb and Yelick, Katherine A.}, | ||
Title = "{The Landscape of Parallel Computing Research: A View from Berkeley}", | ||
Institution = {EECS Department, University of California, Berkeley}, | ||
Year = {2006}, | ||
Month = {Dec}, | ||
URL = {http://www.eecs.berkeley.edu/Pubs/TechRpts/2006/EECS-2006-183.html}, | ||
Number = {UCB/EECS-2006-183} | ||
} | ||
|
||
@article{simplescalar, | ||
author = {Austin,, Todd and Larson,, Eric and Ernst,, Dan}, | ||
title = "{SimpleScalar: An Infrastructure for Computer System Modeling}", | ||
journal = {Computer}, | ||
volume = {35}, | ||
number = {2}, | ||
year = {2002}, | ||
issn = {0018-9162}, | ||
pages = {59--67}, | ||
doi = {http://dx.doi.org/10.1109/2.982917}, | ||
publisher = {IEEE Computer Society Press}, | ||
address = {Los Alamitos, CA, USA}, | ||
} | ||
|
||
@unpublished{sesc, | ||
author = {Ortego, P. M. and Sack, P. }, | ||
citeulike-article-id = {232792}, | ||
keywords = {bibtex-import}, | ||
month = {Dec}, | ||
posted-at = {2005-06-20 21:13:08}, | ||
priority = {2}, | ||
title = "{SESC: SuperESCalar Simulator}", | ||
year = {2004} | ||
} | ||
|
||
@misc{palladium, | ||
author = "{Cadence Design Systems}", | ||
title = "{Palladium Accelerator/Emulator}", | ||
note = {http://www.cadence.com/products/functional\_ver/palladium/} | ||
} | ||
|
||
@article{asim, | ||
author = {Joel Emer and Pritpal Ahuja and Eric Borch and Artur Klauser and Chi-Keung Luk and Srilatha Manne and Shubhendu S. Mukherjee and Harish Patil and Steven Wallace and Nathan Binkert and Roger Espasa and Toni Juan}, | ||
title = "{Asim: A Performance Model Framework}", | ||
journal ={Computer}, | ||
volume = {35}, | ||
number = {2}, | ||
issn = {0018-9162}, | ||
year = {2002}, | ||
pages = {68-76}, | ||
doi = {http://doi.ieeecomputersociety.org/10.1109/2.982918}, | ||
publisher = {IEEE Computer Society}, | ||
address = {Los Alamitos, CA, USA}, | ||
} | ||
|
||
@misc{hasim, | ||
author = {Michael Pellauer and Joel Emer and Arvind}, | ||
title = "{HAsim: Implementing a Partitioned Performance Model on an FPGA}", | ||
year = {2007}, | ||
note = {http://publications.csail.mit.edu/abstracts/abstracts07/pellauer-abstract/hasim.html} | ||
} | ||
|
||
@INPROCEEDINGS{rsim, | ||
author = {Vijay S. Pai and Parthasarathy Ranganathan and Sarita V. Adve}, | ||
title = "{RSIM: An Execution-Driven Simulator for ILP-Based Shared-Memory Multiprocessors and Uniprocessors}", | ||
booktitle = {In Proceedings of the Third Workshop on Computer Architecture Education}, | ||
year = {1997} | ||
} | ||
|
||
|
||
@article{simics, | ||
author = {Magnusson, P. S. and Christensson, M. and Eskilson, J. and Forsgren, D. and Hallberg, G. and Hogberg, J. and Larsson, F. and Moestedt, A. and Werner, B. }, | ||
citeulike-article-id = {1474011}, | ||
journal = {IEEE Computer}, | ||
keywords = {simulator}, | ||
posted-at = {2007-07-23 02:09:37}, | ||
priority = {2}, | ||
title = {Simics: A full system simulation platform}, | ||
volume = {35}, | ||
year = {2002} | ||
} | ||
|
||
@inproceedings{ramp_blue, | ||
author = {Alex Krasnov and Andrew Schultz and John Wawrzynek | ||
and Greg Gibeling and Pierre-Yves Droz}, | ||
title = "{RAMP Blue: A Message-Passing Manycore System in | ||
FPGAs}", | ||
booktitle = {International Conference on Field Programmable | ||
Logic and Applications}, | ||
month = {August}, | ||
year = {2007}, | ||
URL = {http://www.gigascale.org/pubs/1033.html} | ||
} | ||
|
||
@INPROCEEDINGS{ramp_red, | ||
author = {Njuguna Njoroge Sewook and Sewook Wee and Jared Casper and Justin Burdick and Yuriy Teslyar and Christos Kozyrakis and Kunle Olukotun}, | ||
title = "{Building and Using the ATLAS Transactional Memory System}", | ||
booktitle = {in Proceedings of the Workshop on Architecture Research using FPGA Platforms, held at HPCA12. 2006}, | ||
year = {2006} | ||
} | ||
|
||
@article{bee2, | ||
author = {Chen Chang and John Wawrzynek and Robert W. Brodersen}, | ||
interHash = {93fdd4142452750074cecb2b7f2be032}, | ||
intraHash = {b751d16d1b4e578f1fdea08baf7b3920}, | ||
journal = {IEEE Design \& Test of Computers}, | ||
number = {2}, | ||
pages = {114-125}, | ||
title = "{BEE2: A High-End Reconfigurable Computing System.}", | ||
url = {http://dblp.uni-trier.de/db/journals/dt/dt22.html#ChangWB05}, | ||
volume = {22}, | ||
year = {2005}, | ||
ee = {http://doi.ieeecomputersociety.org/10.1109/MDT.2005.30}, | ||
date = {2006-04-27} | ||
} | ||
|
||
@misc{BEE3, | ||
author = "{Microsoft Research}", | ||
title = "{Berkeley Emulation Engine 3}", | ||
note = {http://research.microsoft.com/en-us/projects/BEE3/} | ||
} | ||
|
||
@inproceedings{lithe, | ||
author = {Heidi Pan and Benjamin Hindman and Krste Asanovic}, | ||
title = "{Lithe: Enabling Efficient Composition of Parallel | ||
Libraries}", | ||
booktitle = "{Workshop on Hot Topics in Parallelism (HotPar-09)}", | ||
organization = "{USENIX}", | ||
month = {March}, | ||
year = {2009}, | ||
abstract = {For the software industry to take advantage of | ||
multicore processors, we must allow programmers to | ||
arbitrarily compose parallel libraries without | ||
sacrificing performance. We argue that high-level | ||
task or thread abstractions and a common global | ||
scheduler cannot provide effective library | ||
composition. Instead, the operating system should | ||
expose unvirtualized processing resources that can | ||
be shared cooperatively between parallel libraries | ||
within an application. In this paper, we describe | ||
a system that standardizes and facilitates the | ||
exchange of these unvirtualized processing | ||
resources between libraries.}, | ||
URL = {http://www.gigascale.org/pubs/1870.html} | ||
} | ||
|
||
% Jae's QoS | ||
@inproceedings{lee08memqos, | ||
author = {Jae W. Lee and Man Cheuk Ng and Krste Asanovic}, | ||
title = "{Globally-Synchronized Frames for Guaranteed Quality-of-Service in On-Chip Networks}", | ||
booktitle = {ISCA '08: Proceedings of the 35th International Symposium on Computer Architecture}, | ||
year = {2008}, | ||
isbn = {978-0-7695-3174-8}, | ||
pages = {89--100}, | ||
doi = {http://dx.doi.org/10.1109/ISCA.2008.31}, | ||
publisher = {IEEE Computer Society}, | ||
address = {Washington, DC, USA}, | ||
} | ||
|
||
% Memory QoS | ||
@inproceedings{nesbit06queuing, | ||
author = {Kyle J. Nesbit and Nidhi Aggarwal and James Laudon and James E. Smith}, | ||
title = "{Fair Queuing Memory Systems}", | ||
booktitle = {MICRO 39: Proceedings of the 39th Annual IEEE/ACM International Symposium on Microarchitecture}, | ||
year = {2006}, | ||
isbn = {0-7695-2732-9}, | ||
pages = {208--222}, | ||
doi = {http://dx.doi.org/10.1109/MICRO.2006.24}, | ||
publisher = {IEEE Computer Society}, | ||
address = {Washington, DC, USA}, | ||
} | ||
@Article{statistical_sampling, | ||
abstract = {Current software-based micro architecture simulators are | ||
many orders of magnitude slower than the hardware they | ||
simulate. Hence, most microarchitecture design studies | ||
draw their conclusions from drastically truncated | ||
benchmark simulations that are often inaccurate and | ||
misleading. This article presents the Sampling | ||
Microarchitecture Simulation (SMARTS) framework as an | ||
approach to enable fast and accurate performance | ||
measurements of full-length benchmarks. SMARTS | ||
accelerates simulation by selectively measuring in detail | ||
only an appropriate benchmark subset. SMARTS prescribes a | ||
statistically sound procedure for configuring a | ||
systematic sampling simulation run to achieve a desired | ||
quantifiable confidence in estimates. Analysis of the | ||
SPEC CPU2000 benchmark suite shows that CPI and energy | ||
per instruction (EPI) can be estimated to within | ||
±3% with 99.7% confidence by measuring fewer than | ||
50 million instructions per benchmark. In practice, | ||
inaccuracy in microarchitectural state initialization | ||
introduces an additional uncertainty which we empirically | ||
bound to ∼2% for the tested benchmarks. Our | ||
implementation of SMARTS achieves an actual average error | ||
of only 0.64% on CPI and 0.59% on EPI for the tested | ||
benchmarks, running with average speedups of 35 and 60 | ||
over detailed simulation of 8-way and 16-way out-of-order | ||
processors, respectively. © 2006 ACM.}, | ||
affiliation = {OTHER}, | ||
author = {Wunderlich, Roland E. and Wenisch, Thomas F. and | ||
Falsafi, Babak and Hoe, James C.}, | ||
details = {http://infoscience.epfl.ch/record/135593}, | ||
doi = {10.1145/1147224.1147225}, | ||
issn = {10493301}, | ||
journal = {{ACM} {T}ransactions on {M}odeling and {C}omputer {S}imulation}, | ||
number = {3}, | ||
oai-id = {oai:infoscience.epfl.ch:135593}, | ||
oai-set = {article}, | ||
pages = {197 -- 224}, | ||
review = {REVIEWED}, | ||
status = {PUBLISHED}, | ||
title = {Statistical sampling of microarchitecture simulation}, | ||
unit = {PARSA}, | ||
volume = {16}, | ||
year = 2006 | ||
} |
Oops, something went wrong.