Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Auto Tuning Feature #338

Merged
merged 1 commit into from
Feb 21, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
language: scala
sudo: false
sudo: true
jdk:
- oraclejdk8
- oraclejdk7
python: "2.6"
install:
- sudo pip install inspyred
- sudo pip install argparse

# only build PRs and master (not all branch pushes)
branches:
Expand Down
10 changes: 10 additions & 0 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -111,5 +111,15 @@ Copyright (c) 2016 Yehuda Katz, Tom Dale and Ember.js contributors
License: (https://github.com/emberjs/ember.js/blob/master/LICENSE)

------------------------------------------------------------------------------
Attribution for Python Libraries
------------------------------------------------------------------------------

inspyred (https://github.com/aarongarrett/inspyred/)
Copyright (c) 2017, Aaron Garrett
License: MIT License (https://github.com/aarongarrett/inspyred/blob/master/LICENSE)






67 changes: 67 additions & 0 deletions app-conf/AutoTuningConf.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright 2016 LinkedIn Corp.

Licensed under the Apache License, Version 2.0 (the "License"); you may not
use this file except in compliance with the License. You may obtain a copy of
the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and limitations under
the License.
-->

<!-- General configurations -->
<configuration>
<property>
<name>autotuning.enabled</name>
<value>false</value>
<description>Enable or disable auto tuning</description>
</property>
<property>
<name>autotuning.default.allowed_max_resource_usage_percent</name>
<value>150</value>
<description>Default value for maximum allowed overheard in resource usage (in percent) as compared to the average
value
</description>
</property>
<property>
<name>autotuning.default.allowed_max_execution_time_percent</name>
<value>150</value>
<description>Default value for maximum allowed overheard in execution time (in percent) as compared to the average
value
</description>
</property>
<property>
<name>autotuning.daemon.wait.interval.ms</name>
<value>60000</value>
<description>Auto tuning daemon wait interval</description>
</property>
<property>
<name>baseline.execution.count</name>
<value>30</value>
<description>Baseline to be done on recent number of execution</description>
</property>
<property>
<name>fitness.compute.wait_interval.ms</name>
<value>1800000</value>
<description>Wait time after the job is completed for fitness computation</description>
</property>
<property>
<name>dr.elephant.api.url</name>
<value>http://ltx1-holdemdre01.grid.linkedin.com:8080</value>
<description>API URL for Dr Elephant. Optional. This is needed only if you are using APIFitnessComputeUtil to get
fitness from Dr Elephant APIs
</description>
</property>
<!--The below property is optional-->
<!--<property>-->
<!--<name>python.path</name>-->
<!--<value>/path/to/python/binary</value>-->
<!--<description>Root directory for python</description>-->
<!--</property>-->
</configuration>
80 changes: 80 additions & 0 deletions app/com/linkedin/drelephant/AutoTuner.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Copyright 2016 LinkedIn Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package com.linkedin.drelephant;

import org.apache.hadoop.conf.Configuration;
import org.apache.log4j.Logger;

import com.linkedin.drelephant.analysis.HDFSContext;
import com.linkedin.drelephant.tuning.AzkabanJobCompleteDetector;
import com.linkedin.drelephant.tuning.BaselineComputeUtil;
import com.linkedin.drelephant.tuning.FitnessComputeUtil;
import com.linkedin.drelephant.tuning.JobCompleteDetector;
import com.linkedin.drelephant.tuning.PSOParamGenerator;
import com.linkedin.drelephant.tuning.ParamGenerator;
import com.linkedin.drelephant.util.Utils;

import controllers.AutoTuningMetricsController;


/**
*This class is the AutoTuner Daemon class which runs following thing in order.
* - BaselineComputeUtil: Baseline computation for new jobs which are auto tuning enabled
* - JobCompleteDetector: Detect if the current execution of the jobs is completed and update the status in DB
* - APIFitnessComputeUtil: Compute the recently succeeded jobs fitness
* - ParamGenerator : Generate the next set of parameters for suggestion
*/
public class AutoTuner implements Runnable {

public static final long ONE_MIN = 60 * 1000;
private static final Logger logger = Logger.getLogger(AutoTuner.class);
private static final long DEFAULT_METRICS_COMPUTATION_INTERVAL = ONE_MIN / 5;

public static final String AUTO_TUNING_DAEMON_WAIT_INTERVAL = "autotuning.daemon.wait.interval.ms";

public void run() {

logger.info("Starting Auto Tuning thread");
HDFSContext.load();
Configuration configuration = ElephantContext.instance().getAutoTuningConf();

Long interval =
Utils.getNonNegativeLong(configuration, AUTO_TUNING_DAEMON_WAIT_INTERVAL, DEFAULT_METRICS_COMPUTATION_INTERVAL);

try {
AutoTuningMetricsController.init();
BaselineComputeUtil baselineComputeUtil = new BaselineComputeUtil();
FitnessComputeUtil fitnessComputeUtil = new FitnessComputeUtil();
ParamGenerator paramGenerator = new PSOParamGenerator();
while (!Thread.currentThread().isInterrupted()) {
try {
baselineComputeUtil.computeBaseline();
JobCompleteDetector jobCompleteDetector = new AzkabanJobCompleteDetector();
jobCompleteDetector.updateCompletedExecutions();
fitnessComputeUtil.updateFitness();
paramGenerator.getParams();
} catch (Exception e) {
logger.error("Error in auto tuner thread ", e);
}
Thread.sleep(interval);
}
} catch (Exception e) {
logger.error("Error in auto tuner thread ", e);
}
logger.info("Auto tuning thread shutting down");
}
}
27 changes: 27 additions & 0 deletions app/com/linkedin/drelephant/DrElephant.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,52 @@

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.log4j.Logger;

import com.linkedin.drelephant.analysis.HDFSContext;


/**
* The main class which starts Dr. Elephant
*/
public class DrElephant extends Thread {
public static final String AUTO_TUNING_ENABLED = "autotuning.enabled";
private static final Logger logger = Logger.getLogger(DrElephant.class);

private ElephantRunner _elephant;
private AutoTuner _autoTuner;
private Thread _autoTunerThread;

private Boolean autoTuningEnabled;

public DrElephant() throws IOException {
HDFSContext.load();
Configuration configuration = ElephantContext.instance().getAutoTuningConf();
autoTuningEnabled = configuration.getBoolean(AUTO_TUNING_ENABLED, false);
logger.debug("Auto Tuning Configuration: " + configuration.toString());
_elephant = new ElephantRunner();
if (autoTuningEnabled) {
_autoTuner = new AutoTuner();
_autoTunerThread = new Thread(_autoTuner, "Auto Tuner Thread");
}
}

@Override
public void run() {
if (_autoTunerThread != null) {
logger.debug("Starting auto tuner thread ");
_autoTunerThread.start();
}
_elephant.run();
}

public void kill() {
if (_elephant != null) {
_elephant.kill();
}
if (_autoTunerThread != null) {
_autoTunerThread.interrupt();
}
}
}
43 changes: 31 additions & 12 deletions app/com/linkedin/drelephant/ElephantContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,21 @@ public class ElephantContext {
private static final String HEURISTICS_CONF = "HeuristicConf.xml";
private static final String JOB_TYPES_CONF = "JobTypeConf.xml";
private static final String GENERAL_CONF = "GeneralConf.xml";
private static final String AUTO_TUNING_CONF = "AutoTuningConf.xml";

private final Map<String, List<String>> _heuristicGroupedNames = new HashMap<String, List<String>>();
private List<HeuristicConfigurationData> _heuristicsConfData;
private List<FetcherConfigurationData> _fetchersConfData;
private Configuration _generalConf;

private Configuration _autoTuningConf;
private List<AggregatorConfigurationData> _aggregatorConfData;

private final Map<String, ApplicationType> _nameToType = new HashMap<String, ApplicationType>();
private final Map<ApplicationType, List<Heuristic>> _typeToHeuristics = new HashMap<ApplicationType, List<Heuristic>>();
private final Map<ApplicationType, HadoopMetricsAggregator> _typeToAggregator = new HashMap<ApplicationType, HadoopMetricsAggregator>();
private final Map<ApplicationType, List<Heuristic>> _typeToHeuristics =
new HashMap<ApplicationType, List<Heuristic>>();
private final Map<ApplicationType, HadoopMetricsAggregator> _typeToAggregator =
new HashMap<ApplicationType, HadoopMetricsAggregator>();
private final Map<ApplicationType, ElephantFetcher> _typeToFetcher = new HashMap<ApplicationType, ElephantFetcher>();
private final Map<String, Html> _heuristicToView = new HashMap<String, Html>();
private Map<ApplicationType, List<JobType>> _appTypeToJobTypes = new HashMap<ApplicationType, List<JobType>>();
Expand All @@ -96,20 +101,24 @@ private ElephantContext() {
loadConfiguration();
}

public Configuration getAutoTuningConf() {
return _autoTuningConf;
}

private void loadConfiguration() {
loadAggregators();
loadFetchers();
loadHeuristics();
loadJobTypes();

loadGeneralConf();
loadAutoTuningConf();

// It is important to configure supported types in the LAST step so that we could have information from all
// configurable components.
configureSupportedApplicationTypes();
}


private void loadAggregators() {
Document document = Utils.loadXMLDoc(AGGREGATORS_CONF);

Expand All @@ -119,8 +128,8 @@ private void loadAggregators() {
Class<?> aggregatorClass = Class.forName(data.getClassName());
Object instance = aggregatorClass.getConstructor(AggregatorConfigurationData.class).newInstance(data);
if (!(instance instanceof HadoopMetricsAggregator)) {
throw new IllegalArgumentException(
"Class " + aggregatorClass.getName() + " is not an implementation of " + HadoopMetricsAggregator.class.getName());
throw new IllegalArgumentException("Class " + aggregatorClass.getName() + " is not an implementation of "
+ HadoopMetricsAggregator.class.getName());
}

ApplicationType type = data.getAppType();
Expand All @@ -145,6 +154,7 @@ private void loadAggregators() {
}

}

/**
* Load all the fetchers configured in FetcherConf.xml
*/
Expand All @@ -157,8 +167,8 @@ private void loadFetchers() {
Class<?> fetcherClass = Class.forName(data.getClassName());
Object instance = fetcherClass.getConstructor(FetcherConfigurationData.class).newInstance(data);
if (!(instance instanceof ElephantFetcher)) {
throw new IllegalArgumentException(
"Class " + fetcherClass.getName() + " is not an implementation of " + ElephantFetcher.class.getName());
throw new IllegalArgumentException("Class " + fetcherClass.getName() + " is not an implementation of "
+ ElephantFetcher.class.getName());
}

ApplicationType type = data.getAppType();
Expand Down Expand Up @@ -198,8 +208,8 @@ private void loadHeuristics() {

Object instance = heuristicClass.getConstructor(HeuristicConfigurationData.class).newInstance(data);
if (!(instance instanceof Heuristic)) {
throw new IllegalArgumentException(
"Class " + heuristicClass.getName() + " is not an implementation of " + Heuristic.class.getName());
throw new IllegalArgumentException("Class " + heuristicClass.getName() + " is not an implementation of "
+ Heuristic.class.getName());
}
ApplicationType type = data.getAppType();
List<Heuristic> heuristics = _typeToHeuristics.get(type);
Expand Down Expand Up @@ -249,9 +259,8 @@ private void loadHeuristics() {
}

// Bind No_DATA heuristic to its helper pages, no need to add any real configurations
_heuristicsConfData.add(
new HeuristicConfigurationData(HeuristicResult.NO_DATA.getHeuristicName(),
HeuristicResult.NO_DATA.getHeuristicClassName(), "views.html.help.helpNoData", null, null));
_heuristicsConfData.add(new HeuristicConfigurationData(HeuristicResult.NO_DATA.getHeuristicName(),
HeuristicResult.NO_DATA.getHeuristicClassName(), "views.html.help.helpNoData", null, null));
}

/**
Expand Down Expand Up @@ -308,6 +317,16 @@ private void loadGeneralConf() {
_generalConf.addResource(this.getClass().getClassLoader().getResourceAsStream(GENERAL_CONF));
}

/**
* Load in the AutoTuningConf.xml file as a configuration object for other objects to access
*/
private void loadAutoTuningConf() {
logger.info("Loading configuration file " + AUTO_TUNING_CONF);

_autoTuningConf = new Configuration();
_autoTuningConf.addResource(this.getClass().getClassLoader().getResourceAsStream(AUTO_TUNING_CONF));
}

/**
* Given an application type, return the currently bound heuristics
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,15 @@
* the License.
*/

package com.linkedin.drelephant.exceptions;
package com.linkedin.drelephant.clients;

import java.io.File;
import java.util.Map;
import java.util.Set;

import com.linkedin.drelephant.exceptions.JobState;
import com.linkedin.drelephant.exceptions.LoggingEvent;


/**
* The interface WorkflowClient should be implemented by all the workflow client. The client should not
Expand Down
Loading