Permalink
Browse files

cleanup Wikipedia benchmark: removed genTrace

  • Loading branch information...
lmwnshn authored and apavlo committed Jan 4, 2018
1 parent 78d8ef6 commit 3f7e25016377d36a445dd9fcbfac28482835b64d
@@ -12,11 +12,6 @@
<!-- Scale factor is the number of wikipages *1000 -->
<scalefactor>100</scalefactor>
<!-- Wikipedia Trace Options -->
<tracefile>config/traces/wikipedia-100k.trace</tracefile>
<traceOut>10</traceOut>
<base_ip>10.1.</base_ip>
<!-- The workload -->
<terminals>100</terminals>
<works>
@@ -22,7 +22,6 @@
import java.sql.Connection;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.commons.configuration.XMLConfiguration;
@@ -31,13 +30,9 @@
import com.oltpbenchmark.WorkloadConfiguration;
import com.oltpbenchmark.api.BenchmarkModule;
import com.oltpbenchmark.api.Loader;
import com.oltpbenchmark.api.TransactionGenerator;
import com.oltpbenchmark.api.Worker;
import com.oltpbenchmark.benchmarks.wikipedia.data.RevisionHistograms;
import com.oltpbenchmark.benchmarks.wikipedia.procedures.AddWatchList;
import com.oltpbenchmark.benchmarks.wikipedia.util.TraceTransactionGenerator;
import com.oltpbenchmark.benchmarks.wikipedia.util.TransactionSelector;
import com.oltpbenchmark.benchmarks.wikipedia.util.WikipediaOperation;
import com.oltpbenchmark.util.TextGenerator;
import com.oltpbenchmark.util.RandomDistribution.FlatHistogram;
@@ -48,36 +43,10 @@
protected final FlatHistogram<Integer> minorEdit;
private final FlatHistogram<Integer> revisionDeltas[];
private final File traceInput;
private final File traceOutput;
private final File traceOutputDebug;
private final int traceSize;
@SuppressWarnings("unchecked")
public WikipediaBenchmark(WorkloadConfiguration workConf) {
super("wikipedia", workConf, true);
XMLConfiguration xml = workConf.getXmlConfig();
if (xml != null && xml.containsKey("tracefile")) {
this.traceInput = new File(xml.getString("tracefile"));
} else {
this.traceInput = null;
}
if (xml != null && xml.containsKey("traceOut")) {
this.traceSize = xml.getInt("traceOut");
} else {
this.traceSize = 0;
}
if (xml != null && xml.containsKey("tracefile")) {
this.traceOutput = new File(xml.getString("tracefile"));
} else {
this.traceOutput = null;
}
if (xml != null && xml.containsKey("tracefiledebug")) {
this.traceOutputDebug = new File(xml.getString("tracefiledebug"));
} else {
this.traceOutputDebug = null;
}
this.commentLength = new FlatHistogram<Integer>(this.rng(), RevisionHistograms.COMMENT_LENGTH);
this.minorEdit = new FlatHistogram<Integer>(this.rng(), RevisionHistograms.MINOR_EDIT);
@@ -86,20 +55,6 @@ public WikipediaBenchmark(WorkloadConfiguration workConf) {
this.revisionDeltas[i] = new FlatHistogram<Integer>(this.rng(), RevisionHistograms.REVISION_DELTAS[i]);
} // FOR
}
public File getTraceInput() {
return (this.traceInput);
}
public File getTraceOutput() {
return (this.traceOutput);
}
public File getTraceOutputDebug() {
return (this.traceOutputDebug);
}
public int getTraceSize() {
return (this.traceSize);
}
/**
* Special function that takes in a char field that represents the last
@@ -145,16 +100,12 @@ protected Package getProcedurePackageImpl() {
@Override
protected List<Worker<? extends BenchmarkModule>> makeWorkersImpl(boolean verbose) throws IOException {
LOG.info(String.format("Initializing %d %s using '%s' as the input trace file",
workConf.getTerminals(), WikipediaWorker.class.getSimpleName(), this.traceInput));
TransactionSelector transSel = new TransactionSelector(this.traceInput, workConf.getTransTypes());
List<WikipediaOperation> trace = Collections.unmodifiableList(transSel.readAll());
LOG.info("Total Number of Sample Operations: " + trace.size());
LOG.info(String.format("Initializing %d %s",
workConf.getTerminals(), WikipediaWorker.class.getSimpleName()));
List<Worker<? extends BenchmarkModule>> workers = new ArrayList<Worker<? extends BenchmarkModule>>();
for (int i = 0; i < workConf.getTerminals(); ++i) {
TransactionGenerator<WikipediaOperation> generator = new TraceTransactionGenerator(trace);
WikipediaWorker worker = new WikipediaWorker(this, i, generator);
WikipediaWorker worker = new WikipediaWorker(this, i);
workers.add(worker);
} // FOR
return workers;
@@ -16,16 +16,14 @@
package com.oltpbenchmark.benchmarks.wikipedia;
import java.io.File;
import java.io.PrintStream;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
@@ -35,11 +33,9 @@
import com.oltpbenchmark.benchmarks.wikipedia.data.PageHistograms;
import com.oltpbenchmark.benchmarks.wikipedia.data.TextHistograms;
import com.oltpbenchmark.benchmarks.wikipedia.data.UserHistograms;
import com.oltpbenchmark.benchmarks.wikipedia.util.TransactionSelector;
import com.oltpbenchmark.benchmarks.wikipedia.util.WikipediaUtil;
import com.oltpbenchmark.catalog.Table;
import com.oltpbenchmark.types.DatabaseType;
import com.oltpbenchmark.util.Pair;
import com.oltpbenchmark.util.RandomDistribution.Flat;
import com.oltpbenchmark.util.RandomDistribution.FlatHistogram;
import com.oltpbenchmark.util.RandomDistribution.Zipf;
import com.oltpbenchmark.util.SQLUtil;
@@ -74,11 +70,6 @@
*/
private final int page_last_rev_length[];
/**
* Pair<PageNamespace, PageTitle>
*/
private List<Pair<Integer, String>> titles = Collections.synchronizedList(new ArrayList<Pair<Integer, String>>());
/**
* Constructor
*
@@ -145,7 +136,7 @@ public void load(Connection conn) throws SQLException {
});
}
// WATCHLIST, REVISIONS and trace file depends on USERS and PAGES
// WATCHLIST and REVISIONS depends on USERS and PAGES
// WATCHLIST
threads.add(new LoaderThread() {
@@ -177,76 +168,9 @@ public void load(Connection conn) throws SQLException {
}
});
// generate trace file
threads.add(new LoaderThread() {
@Override
public void load(Connection conn) throws SQLException {
try {
userPageLatch.await();
} catch (InterruptedException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
try {
WikipediaLoader.this.genTrace();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
});
return threads;
}
private File genTrace() throws Exception {
WikipediaBenchmark b = this.benchmark;
File file = b.getTraceOutput();
File filedebug = b.getTraceOutputDebug();
if (file == null || b.getTraceSize() == 0) {
return (null);
}
assert (this.num_pages == this.titles.size());
LOG.info(String.format("Generating a %dk traces to '%s'", b.getTraceSize(), file));
Flat z_users = new Flat(this.rng(), 1, this.num_users);
Zipf z_pages = new Zipf(this.rng(), 1, this.num_pages, WikipediaConstants.USER_ID_SIGMA);
PrintStream ps = new PrintStream(file);
PrintStream psdebug = null;
if (filedebug != null) {
psdebug = new PrintStream(filedebug);
}
for (int i = 0, cnt = (b.getTraceSize() * 1000); i < cnt; i++) {
int user_id = -1;
// Check whether this should be an anonymous update
if (this.rng().nextInt(100) < WikipediaConstants.ANONYMOUS_PAGE_UPDATE_PROB) {
user_id = WikipediaConstants.ANONYMOUS_USER_ID;
}
// Otherwise figure out what user is updating this page
else {
user_id = z_users.nextInt();
}
assert (user_id != -1);
// Figure out what page they're going to update
int page_id = z_pages.nextInt();
Pair<Integer, String> p = this.titles.get(page_id);
assert (p != null);
TransactionSelector.writeEntry(ps, user_id, p.first, p.second);
if (psdebug != null) {
TransactionSelector.writeEntryDebug(psdebug, user_id, p.first, p.second, page_id + 1);
}
} // FOR
ps.close();
if (psdebug != null) {
psdebug.close();
}
return (file);
}
/**
* USERACCTS
*/
@@ -340,20 +264,16 @@ private void loadPages(Connection conn, int lo, int hi) throws SQLException {
String sql = SQLUtil.getInsertSQL(catalog_tbl, this.getDatabaseType());
PreparedStatement pageInsert = conn.prepareStatement(sql);
FlatHistogram<Integer> h_titleLength = new FlatHistogram<Integer>(this.rng(), PageHistograms.TITLE_LENGTH);
FlatHistogram<Integer> h_namespace = new FlatHistogram<Integer>(this.rng(), PageHistograms.NAMESPACE);
FlatHistogram<String> h_restrictions = new FlatHistogram<String>(this.rng(), PageHistograms.RESTRICTIONS);
int batchSize = 0;
int lastPercent = -1;
Random rand = new Random();
for (int i = lo; i <= hi; i++) {
// HACK: Always append the page id to the title so that it's
// guaranteed
// to be unique. Otherwise we can get collisions with larger scale
// factors.
int titleLength = h_titleLength.nextValue().intValue();
String title = TextGenerator.randomStr(this.rng(), titleLength) + " [" + i + "]";
int namespace = h_namespace.nextValue().intValue();
String title = WikipediaUtil.generatePageTitle(rand, i);
int namespace = WikipediaUtil.generatePageNamespace(rand, i);
String restrictions = h_restrictions.nextValue();
assert (restrictions.isEmpty() == false); // Check for Oracle
double pageRandom = this.rng().nextDouble();
@@ -372,7 +292,6 @@ private void loadPages(Connection conn, int lo, int hi) throws SQLException {
pageInsert.setInt(param++, 0); // page_latest
pageInsert.setInt(param++, 0); // page_len
pageInsert.addBatch();
this.titles.add(Pair.of(namespace, title));
if (++batchSize % WikipediaConstants.BATCH_SIZE == 0) {
pageInsert.executeBatch();
@@ -448,13 +367,14 @@ private void loadWatchlist(Connection conn) throws SQLException {
assert (pageId > 0);
userPages.add(pageId);
Pair<Integer, String> page = this.titles.get(pageId);
assert (page != null) : "Invalid PageId " + pageId;
Random rand = new Random();
Integer namespace = WikipediaUtil.generatePageNamespace(rand, pageId);
String title = WikipediaUtil.generatePageTitle(rand, pageId);
int param = 1;
watchInsert.setInt(param++, user_id); // wl_user
watchInsert.setInt(param++, page.first); // wl_namespace
watchInsert.setString(param++, page.second); // wl_title
watchInsert.setInt(param++, namespace); // wl_namespace
watchInsert.setString(param++, title); // wl_title
watchInsert.setNull(param++, java.sql.Types.VARCHAR); // wl_notificationtimestamp
watchInsert.addBatch();
batchSize++;
Oops, something went wrong.

0 comments on commit 3f7e250

Please sign in to comment.