Permalink
Browse files

Further work on Google Storage integration for PD

  • Loading branch information...
1 parent 69824f9 commit 8573856916c0cc22da48a72e7755893912e8113c @drelu drelu committed Aug 11, 2012
View
@@ -9,4 +9,6 @@ build/*
cli/bj-*
examples/pilot-api/work/bj-*
dump.rdb
-*.log
+*.log
+examples/pilot-api/gce.dat
+gce.dat
View
@@ -1 +1 @@
-0.4.89
+0.4.90
View
@@ -5,5 +5,5 @@ saga=saga
# Logging config
# logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL
# logging.level=logging.DEBUG
-logging.level=logging.FATAL
-#logging.level=logging.DEBUG
+#logging.level=logging.FATAL
+logging.level=logging.DEBUG
View
@@ -17,6 +17,7 @@
import logging
import shutil
from string import Template
+
logging.basicConfig(level=logging.DEBUG)
try:
@@ -27,8 +28,10 @@
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../../ext/threadpool-1.2.7/src/")
logging.debug(str(sys.path))
from threadpool import *
-from bigjob import logger
+# BigJob/Pilot framework classes
+from bigjob import logger
+from pilot.impl.pilotdata_manager import DataUnit, PilotData
logger.debug("Python Version: " + str(sys.version_info))
if sys.version_info < (2, 5):
@@ -341,13 +344,14 @@ def execute_job(self, job_url, job_dict):
if not os.path.isabs(error):
error=os.path.join(workingdirectory, error)
+
# append job to job list
self.jobs.append(job_url)
# File Stage-In of dependent data units
- if job_dict.has_key("input_data"):
- self.__stage_in_data_units(job_dict["input_data"])
+ if job_dict.has_key("InputData"):
+ self.__stage_in_data_units(eval(job_dict["InputData"]), workingdirectory)
# File Stage-In - Move pilot-level files to working directory of sub-job
if self.pilot_description!=None:
@@ -445,14 +449,9 @@ def execute_job(self, job_url, job_dict):
self.coordination.set_job_state(job_url, str(bigjob.state.Running))
except:
traceback.print_exc(file=sys.stderr)
+
- def __stage_in_data_units(self, input_data=[]):
- """ stage in data units specified in input_data field """
- for i in input_data:
- pass
-
-
-
+
def allocate_nodes(self, job_dict):
""" allocate nodes
allocated nodes will be written to machinefile advert-launcher-machines-<jobid>
@@ -711,6 +710,20 @@ def stop_background_thread(self):
self.stop=True
+ #############################################################################
+ # Private methods
+
+ def __stage_in_data_units(self, input_data=[], target_directory="."):
+ """ stage in data units specified in input_data field """
+ logger.debug("Stage in input files")
+ for i in input_data:
+ pd_url = self.__get_pd_url(i)
+ du_id = self.__get_du_id(i)
+ pd = PilotData(pd_url=pd_url)
+ du = pd.get_du(du_id)
+ du.export(target_directory)
+
+
def __expand_directory(self, directory):
""" expands directory name $HOME or ~ to the working directory
on the respective machine
@@ -727,8 +740,15 @@ def __expand_directory(self, directory):
pass
return directory
+
-
+ def __get_pd_url(self, du_url):
+ url = du_url[:du_url.index(":du-")]
+ return url
+
+ def __get_du_id(self, du_url):
+ du_id = du_url[du_url.index("du-"):]
+ return du_id
def __get_launch_method(self, requested_method):
""" returns desired execution method: ssh, aprun """
@@ -11,7 +11,7 @@
import time
from bigjob import logger
-from redis import *
+import redis
if sys.version_info < (2, 5):
sys.path.append(os.path.dirname( os.path.abspath( __file__) ) + "/../ext/uuid-1.30/")
@@ -71,14 +71,14 @@ def __init__(self, server=REDIS_SERVER, server_port=REDIS_SERVER_PORT, server_co
logger.debug("Connect to Redis: " + server + " Port: " + str(server_port))
if self.password==None:
- self.redis = Redis(host=server, port=server_port, db=0)
+ self.redis_client = redis.Redis(host=server, port=server_port, db=0)
else:
- self.redis = Redis(host=server, port=server_port, password=self.password, db=0)
- #self.redis_pubsub = self.redis.pubsub() # redis pubsub client
+ self.redis_client = redis.Redis(host=server, port=server_port, password=self.password, db=0)
+ #self.redis_client_pubsub = self.redis_client.pubsub() # redis pubsub client
#self.resource_lock = threading.RLock()
- self.pipe = self.redis.pipeline()
+ self.pipe = self.redis_client.pipeline()
try:
- self.redis.ping()
+ self.redis_client.ping()
except:
logger.error("Please start Redis server!")
raise Exception("Please start Redis server!")
@@ -92,41 +92,41 @@ def get_address(self):
def set_pilot_state(self, pilot_url, new_state, stopped=False):
logger.debug("update state of pilot job to: " + str(new_state)
+ " stopped: " + str(stopped))
- self.redis.hmset(pilot_url, {"state":str(new_state), "stopped":str(stopped)})
+ self.redis_client.hmset(pilot_url, {"state":str(new_state), "stopped":str(stopped)})
if stopped==True:
self.queue_job(pilot_url, "STOP")
def get_pilot_state(self, pilot_url):
- state = self.redis.hgetall(pilot_url)
+ state = self.redis_client.hgetall(pilot_url)
return state
#####################################################################################
# Pilot-Job State
def set_pilot_description(self, pilot_url, description):
logger.debug("update description of pilot job to: " + str(description))
- self.redis.hmset(pilot_url + ":description", {"description":description})
+ self.redis_client.hmset(pilot_url + ":description", {"description":description})
def get_pilot_description(self, pilot_url):
- description = self.redis.hgetall(pilot_url + ":description")
+ description = self.redis_client.hgetall(pilot_url + ":description")
return description
#def is_pilot_stopped(self,pilot_url):
- # state = self.redis.hgetall(pilot_url)
+ # state = self.redis_client.hgetall(pilot_url)
# if state==None or not state.has_key("stopped"):
# return True
# return state["stopped"]
def get_jobs_of_pilot(self, pilot_url):
""" returns array of job_url that are associated with a pilot """
- jobs = self.redis.keys(pilot_url+":jobs:*")
+ jobs = self.redis_client.keys(pilot_url+":jobs:*")
jobs_fqdn = [os.path.join(self.get_address(), i)for i in jobs]
return jobs_fqdn
def delete_pilot(self, pilot_url):
- items = self.redis.keys(pilot_url+"*")
+ items = self.redis_client.keys(pilot_url+"*")
for i in items:
self.pipe.delete(i)
self.pipe.execute()
@@ -136,51 +136,51 @@ def delete_pilot(self, pilot_url):
def set_job_state(self, job_url, new_state):
#self.resource_lock.acquire()
logger.debug("set job state to: " + str(new_state))
- self.redis.hset(job_url, "state", str(new_state))
+ self.redis_client.hset(job_url, "state", str(new_state))
if new_state=="Unknown":
- self.redis.hset(job_url,"start_time", str(time.time()))
+ self.redis_client.hset(job_url,"start_time", str(time.time()))
elif new_state=="Running":
- self.redis.hset(job_url,"end_queue_time", str(time.time()))
+ self.redis_client.hset(job_url,"end_queue_time", str(time.time()))
elif new_state=="Done":
- self.redis.hset(job_url, "end_time", str(time.time()))
+ self.redis_client.hset(job_url, "end_time", str(time.time()))
#self.resource_lock.release()
def get_job_state(self, job_url):
- return self.redis.hget(job_url, "state")
+ return self.redis_client.hget(job_url, "state")
#####################################################################################
# Sub-Job Description
def set_job(self, job_url, job_dict):
- self.redis.hmset(job_url, job_dict)
+ self.redis_client.hmset(job_url, job_dict)
self.set_job_state(job_url, "Unknown")
def get_job(self, job_url):
- return self.redis.hgetall(job_url)
+ return self.redis_client.hgetall(job_url)
def delete_job(self, job_url):
- self.redis.delete(job_url+"*")
+ self.redis_client.delete(job_url+"*")
#####################################################################################
# Distributed queue for sub-jobs
def queue_job(self, pilot_url, job_url):
""" queue new job to pilot """
queue_name = pilot_url + ":queue"
- self.redis.set(queue_name + ':last_in', pickle.dumps(datetime.datetime.now()))
- self.redis.lpush(queue_name, job_url)
+ self.redis_client.set(queue_name + ':last_in', pickle.dumps(datetime.datetime.now()))
+ self.redis_client.lpush(queue_name, job_url)
def dequeue_job(self, pilot_url):
""" deque to new job of a certain pilot """
queue_name = pilot_url + ":queue"
logger.debug("Dequeue sub-job from: " + queue_name
- + " number queued items: " + str(self.redis.llen(queue_name)))
- self.redis.set(queue_name + ':last_out', pickle.dumps(datetime.datetime.now()))
- job_url = self.redis.brpop(queue_name, 1)
+ + " number queued items: " + str(self.redis_client.llen(queue_name)))
+ self.redis_client.set(queue_name + ':last_out', pickle.dumps(datetime.datetime.now()))
+ job_url = self.redis_client.brpop(queue_name, 1)
if job_url==None:
return job_url
logger.debug("Dequeued: " + str(job_url))
@@ -19,9 +19,9 @@
tcp://* (ZMQ - listening to all interfaces)
"""
-COORDINATION_URL = "advert://localhost/?dbtype=sqlite3"
+#COORDINATION_URL = "advert://localhost/?dbtype=sqlite3"
#COORDINATION_URL = "tcp://*"
-#COORDINATION_URL = "redis://localhost:6379"
+COORDINATION_URL = "redis://localhost:6379"
# for running BJ from local dir
sys.path.insert(0, os.getcwd() + "/../")
@@ -0,0 +1,82 @@
+import sys
+import os
+import time
+import logging
+import uuid
+#logging.basicConfig(level=logging.DEBUG)
+
+sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
+from pilot import PilotComputeService, PilotDataService, ComputeDataService, State
+
+
+COORDINATION_URL = "redis://localhost:6379"
+
+if __name__ == "__main__":
+
+
+ # create pilot data service (factory for data pilots (physical, distributed storage))
+ # and pilot data
+ pilot_data_service = PilotDataService(coordination_url=COORDINATION_URL)
+ pilot_data_description={
+ "service_url": "gs://pilot-data-" + str(uuid.uuid1()),
+ "size": 100,
+ "affinity_datacenter_label": "us-google",
+ "affinity_machine_label": ""
+ }
+ pd = pilot_data_service.create_pilot(pilot_data_description=pilot_data_description)
+
+
+ # Create Data Unit Description
+ #base_dir = "../data1"
+ #url_list = os.listdir(base_dir)
+ # make absolute paths
+ #absolute_url_list = [os.path.join(base_dir, i) for i in url_list]
+ data_unit_description = {
+ "file_urls": [os.path.join(os.getcwd(), "test.txt")],
+ "affinity_datacenter_label": "us-google",
+ "affinity_machine_label": ""
+ }
+
+ # submit pilot data to a pilot store
+ data_unit = pd.submit_data_unit(data_unit_description)
+ data_unit.wait()
+ print("Data Unit URL: " + data_unit.get_url())
+
+ pilot_compute_service = PilotComputeService(coordination_url=COORDINATION_URL)
+
+ # create pilot job service and initiate a pilot job
+ pilot_compute_description = {
+ #"service_url": 'gce+ssh://api.google.com',
+ "service_url": 'fork://localhost',
+ "number_of_processes": 1,
+ 'affinity_datacenter_label': "us-google",
+ 'affinity_machine_label': ""
+ }
+
+ pilotjob = pilot_compute_service.create_pilot(pilot_compute_description=pilot_compute_description)
+
+ compute_data_service = ComputeDataService()
+ compute_data_service.add_pilot_compute_service(pilot_compute_service)
+ compute_data_service.add_pilot_data_service(pilot_data_service)
+
+
+ # start work unit
+ compute_unit_description = {
+ "executable": "/bin/cat",
+ "arguments": ["test.txt"],
+ "number_of_processes": 1,
+ "output": "stdout.txt",
+ "error": "stderr.txt",
+ "input_data": [data_unit.get_url()],
+ "output_data": []
+ }
+
+ compute_unit = compute_data_service.submit_compute_unit(compute_unit_description)
+ logging.debug("Finished setup of ComputeDataService. Waiting for scheduling of PD")
+ compute_data_service.wait()
+
+
+ logging.debug("Terminate Pilot Compute/Data Service")
+ compute_data_service.cancel()
+ pilot_data_service.cancel()
+ pilot_compute_service.cancel()
Oops, something went wrong.

0 comments on commit 8573856

Please sign in to comment.