1. Discard nova-manage host list

Reason: nova-manage service list can be replacement. Changes: nova-manage 2. Fix resource checking inappropriate design. Reason: nova.scheduler.driver.has_enough_resource has inappropriate design, so fix it. This method didnt check free memory but check total memory. We need to register free memory onto databases(periodically). But periodically updating may causes flooding request to db in case of many compute-node. Currently, since memory information is only used in this feature, we take the choice that administrators manually has to execute nova-manage to let compute node update their own memory information. Changes: nova.db.sqlalchemy.models - Adding memory_mb_used, local_gb_used, vcpu_used column to Service. (local_gb and vcpu is just for reference to admins for now) nova.compute.manager - Changing nova.compute.manager.update_service Service table column is changed, so updating method must be changed. - Adding nova.compute.manager.update_available_resource a responder to admin's request to let compute nodes update their memory infomation nova.virt.libvirt_conn nova.virt.xenapi_conn nova.virt.fake - Adding getter method for memory_mb_used/local_gb_used/vcpu_used. nova-manage - request method to let compute nodes update their own memory info.
openstack · Jan 31, 2011 · 09f2c47 · 09f2c47
1 parent 7c80963
commit 09f2c47
Show file tree

Hide file tree

Showing 13 changed files with 476 additions and 171 deletions.
diff --git a/bin/nova-manage b/bin/nova-manage
@@ -499,50 +499,6 @@ class InstanceCommands(object):
         print msg
 
 
-class HostCommands(object):
-    """Class for mangaging host(physical nodes)."""
-
-    def list(self):
-        """describe host list."""
-
-        # To supress msg: No handlers could be found for logger "amqplib"
-        logging.basicConfig()
-
-        service_refs = db.service_get_all(context.get_admin_context())
-        hosts = [h['host'] for h in service_refs]
-        hosts = list(set(hosts))
-        for host in hosts:
-            print host
-
-    def show(self, host):
-        """describe cpu/memory/hdd info for host."""
-
-        result = rpc.call(context.get_admin_context(),
-                     FLAGS.scheduler_topic,
-                     {"method": "show_host_resource",
-                      "args": {"host": host}})
-
-        # Checking result msg format is necessary, that will have done
-        # when this feture is included in API.
-        if type(result) != dict:
-            print 'Unexpected error occurs'
-        elif not result['ret']:
-            print '%s' % result['msg']
-        else:
-            cpu = result['phy_resource']['vcpus']
-            mem = result['phy_resource']['memory_mb']
-            hdd = result['phy_resource']['local_gb']
-
-            print 'HOST\t\tPROJECT\t\tcpu\tmem(mb)\tdisk(gb)'
-            print '%s\t\t\t%s\t%s\t%s' % (host, cpu, mem, hdd)
-            for p_id, val in result['usage'].items():
-                print '%s\t%s\t\t%s\t%s\t%s' % (host,
-                                             p_id,
-                                             val['vcpus'],
-                                             val['memory_mb'],
-                                             val['local_gb'])
-
-
 class ServiceCommands(object):
     """Enable and disable running services"""
 
@@ -587,6 +543,53 @@ class ServiceCommands(object):
             return
         db.service_update(ctxt, svc['id'], {'disabled': True})
 
+    def describeresource(self, host):
+        """describe cpu/memory/hdd info for host."""
+
+        result = rpc.call(context.get_admin_context(),
+                     FLAGS.scheduler_topic,
+                     {"method": "show_host_resource",
+                      "args": {"host": host}})
+
+        # Checking result msg format is necessary, that will have done
+        # when this feture is included in API.
+        if type(result) != dict:
+            print 'Unexpected error occurs'
+        elif not result['ret']:
+            print '%s' % result['msg']
+        else:
+            cpu = result['phy_resource']['vcpus']
+            mem = result['phy_resource']['memory_mb']
+            hdd = result['phy_resource']['local_gb']
+            cpu_u = result['phy_resource']['vcpus_used']
+            mem_u = result['phy_resource']['memory_mb_used']
+            hdd_u = result['phy_resource']['local_gb_used']
+
+            print 'HOST\t\t\tPROJECT\t\tcpu\tmem(mb)\tdisk(gb)'
+            print '%s(total)\t\t\t%s\t%s\t%s' % (host, cpu, mem, hdd)
+            print '%s(used)\t\t\t%s\t%s\t%s' % (host, cpu_u, mem_u, hdd_u)
+            for p_id, val in result['usage'].items():
+                print '%s\t\t%s\t\t%s\t%s\t%s' % (host,
+                                                  p_id,
+                                                  val['vcpus'],
+                                                  val['memory_mb'],
+                                                  val['local_gb'])
+
+    def updateresource(self, host):
+        """update available vcpu/memory/disk info for host."""
+
+        ctxt = context.get_admin_context()
+        service_refs = db.service_get_all_by_host(ctxt, host)
+        if len(service_refs) <= 0:
+            raise exception.Invalid(_('%s does not exists.') % host)
+
+        service_refs = [s for s in service_refs if s['topic'] == 'compute'] 
+        if len(service_refs) <= 0:
+            raise exception.Invalid(_('%s is not compute node.') % host)
+
+        result = rpc.call(ctxt, db.queue_get_for(ctxt, FLAGS.compute_topic, host),
+                     {"method": "update_available_resource"})
+
 
 class LogCommands(object):
     def request(self, request_id, logfile='/var/log/nova.log'):
@@ -606,7 +609,6 @@ CATEGORIES = [
     ('floating', FloatingIpCommands),
     ('network', NetworkCommands),
     ('instance', InstanceCommands),
-    ('host', HostCommands),
     ('service', ServiceCommands),
     ('log', LogCommands)]
 

diff --git a/nova/compute/manager.py b/nova/compute/manager.py
@@ -134,9 +134,12 @@ def update_service(self, ctxt, host, binary):
             raise exception.Invalid(msg)
 
         # Updating host information
-        vcpu = self.driver.get_vcpu_number()
-        memory_mb = self.driver.get_memory_mb()
-        local_gb = self.driver.get_local_gb()
+        vcpu = self.driver.get_vcpu_total()
+        memory_mb = self.driver.get_memory_mb_total()
+        local_gb = self.driver.get_local_gb_total()
+        vcpu_u = self.driver.get_vcpu_used()
+        memory_mb_u = self.driver.get_memory_mb_used()
+        local_gb_u = self.driver.get_local_gb_used()
         hypervisor = self.driver.get_hypervisor_type()
         version = self.driver.get_hypervisor_version()
         cpu_info = self.driver.get_cpu_info()
@@ -146,10 +149,42 @@ def update_service(self, ctxt, host, binary):
                                {'vcpus': vcpu,
                                 'memory_mb': memory_mb,
                                 'local_gb': local_gb,
+                                'vcpus_used':vcpu_u,
+                                'memory_mb_used': memory_mb_u,
+                                'local_gb_used': local_gb_u,
                                 'hypervisor_type': hypervisor,
                                 'hypervisor_version': version,
                                 'cpu_info': cpu_info})
 
+    def update_available_resource(self, context):
+        """
+        update compute node specific info to DB.
+        Alghough this might be subset of update_service,
+        udpate_service() is used only nova-compute is lauched.
+        On the other hand, this method is used whenever administrators
+        request comes.
+        """
+        try:
+            service_ref = self.db.service_get_by_args(context,
+                                                      self.host,
+                                                      'nova-compute')
+        except exception.NotFound:
+            msg = _(("""Cannot update resource info."""
+                     """ Because no service record found."""))
+            raise exception.Invalid(msg)
+
+        # Updating host information
+        vcpu_u = self.driver.get_vcpu_used()
+        memory_mb_u = self.driver.get_memory_mb_used()
+        local_gb_u = self.driver.get_local_gb_used()
+
+        self.db.service_update(context,
+                               service_ref['id'],
+                               {'vcpus_used':vcpu_u,
+                                'memory_mb_used': memory_mb_u,
+                                'local_gb_used': local_gb_u})
+        return
+
     def _update_state(self, context, instance_id):
         """Update the state of an instance from the driver info."""
         # FIXME(ja): include other fields from state?
@@ -596,6 +631,19 @@ def compare_cpu(self, context, cpu_info):
         """ Check the host cpu is compatible to a cpu given by xml."""
         return self.driver.compare_cpu(cpu_info)
 
+    def mktmpfile(self, context):
+        """make tmpfile under FLAGS.instance_path."""
+        return utils.mktmpfile(FLAGS.instances_path)
+
+    def exists(self, context, path):
+        """Confirm existence of the tmpfile given by path."""
+        if not utils.exists(path): 
+            raise exception.NotFound(_('%s not found') % path)
+
+    def remove(self, context, path):
+        """remove the tmpfile given by path."""
+        return utils.remove(path)
+
     def pre_live_migration(self, context, instance_id):
         """Any preparation for live migration at dst host."""
 

diff --git a/nova/db/sqlalchemy/models.py b/nova/db/sqlalchemy/models.py
@@ -164,7 +164,10 @@ class Service(BASE, NovaBase):
     vcpus = Column(Integer, nullable=True)
     memory_mb = Column(Integer, nullable=True)
     local_gb = Column(Integer, nullable=True)
-    hypervisor_type = Column(String(128), nullable=True)
+    vcpus_used = Column(Integer, nullable=True)
+    memory_mb_used = Column(Integer, nullable=True)
+    local_gb_used = Column(Integer, nullable=True)
+    hypervisor_type = Column(Text(), nullable=True)
     hypervisor_version = Column(Integer, nullable=True)
     # Note(masumotok): Expected Strings example:
     #

diff --git a/nova/rpc.py b/nova/rpc.py
@@ -250,6 +250,9 @@ def msg_reply(msg_id, reply=None, failure=None):
     try:
         publisher.send({'result': reply, 'failure': failure})
     except TypeError:
+        print '>>>>>>>>>>>>>>>>>>'
+        print reply
+        print '>>>>>>>>>>>>>>>>>>'
         publisher.send(
                 {'result': dict((k, repr(v))
                                 for k, v in reply.__dict__.iteritems()),

diff --git a/nova/scheduler/driver.py b/nova/scheduler/driver.py
@@ -33,6 +33,7 @@
 FLAGS = flags.FLAGS
 flags.DEFINE_integer('service_down_time', 60,
                      'maximum time since last checkin for up service')
+flags.DECLARE('instances_path', 'nova.compute.manager')
 
 
 class NoValidHost(exception.Error):
@@ -163,6 +164,8 @@ def _live_migration_common_check(self, context, instance_ref, dest):
         http://wiki.libvirt.org/page/TodoPreMigrationChecks
 
         """
+        # Checking shared storage connectivity
+        self.mounted_on_same_shared_storage(context, instance_ref, dest)
 
         # Checking dest exists.
         dservice_refs = db.service_get_all_by_host(context, dest)
@@ -207,38 +210,60 @@ def _live_migration_common_check(self, context, instance_ref, dest):
             raise e
 
     def has_enough_resource(self, context, instance_ref, dest):
-        """Check if destination host has enough resource for live migration"""
+        """
+        Check if destination host has enough resource for live migration.
+        Currently, only memory checking has been done.
+        If storage migration(block migration, meaning live-migration
+        without any shared storage) will be available, local storage
+        checking is also necessary.
+        """
 
         # Getting instance information
         ec2_id = instance_ref['hostname']
-        vcpus = instance_ref['vcpus']
         mem = instance_ref['memory_mb']
-        hdd = instance_ref['local_gb']
 
-        # Gettin host information
+        # Getting host information
         service_refs = db.service_get_all_by_host(context, dest)
         if len(service_refs) <= 0:
             raise exception.Invalid(_('%s does not exists.') % dest)
         service_ref = service_refs[0]
 
-        total_cpu = int(service_ref['vcpus'])
-        total_mem = int(service_ref['memory_mb'])
-        total_hdd = int(service_ref['local_gb'])
+        mem_total = int(service_ref['memory_mb'])
+        mem_used = int(service_ref['memory_mb_used'])
+        mem_avail = mem_total - mem_used
+        mem_inst =  instance_ref['memory_mb']
+        if mem_avail <= mem_inst:
+            msg = _('%s is not capable to migrate %s(host:%s <= instance:%s)') 
+            raise exception.NotEmpty(msg % (dest, ec2_id, mem_avail, mem_inst))
 
-        instances_refs = db.instance_get_all_by_host(context, dest)
-        for i_ref in instances_refs:
-            total_cpu -= int(i_ref['vcpus'])
-            total_mem -= int(i_ref['memory_mb'])
-            total_hdd -= int(i_ref['local_gb'])
+    def mounted_on_same_shared_storage(self, context, instance_ref, dest):
+        """
+        Check if /nova-inst-dir/insntances is mounted same storage at
+        live-migration src and dest host.
+        """
+        src = instance_ref['host']
+        dst_t = db.queue_get_for(context, FLAGS.compute_topic, dest)
+        src_t = db.queue_get_for(context, FLAGS.compute_topic, src)
 
-        # Checking host has enough information
-        logging.debug(_('host(%s) remains vcpu:%s mem:%s hdd:%s,') %
-                      (dest, total_cpu, total_mem, total_hdd))
-        logging.debug(_('instance(%s) has vcpu:%s mem:%s hdd:%s,') %
-                      (ec2_id, vcpus, mem, hdd))
+        # create tmpfile at dest host
+        try:
+            filename = rpc.call(context, dst_t, {"method": 'mktmpfile'})
+        except rpc.RemoteError, e:
+            msg = _("Cannot create tmpfile at %s to confirm shared storage.")
+            logging.error(msg % FLAGS.instance_path)
+            raise e
 
-        if total_cpu <= vcpus or total_mem <= mem or total_hdd <= hdd:
-            raise exception.NotEmpty(_('%s is not capable to migrate %s') %
-                                     (dest, ec2_id))
+        # make sure existence at src host.
+        try:
+            rpc.call(context, src_t,
+                     {"method": 'exists', "args":{'path':filename}})
+
+        except (rpc.RemoteError, exception.NotFound), e:
+            msg = (_("""Cannot comfirm %s at %s to confirm shared storage."""
+                     """Check if %s is same shared storage"""))
+            logging.error(msg % FLAGS.instance_path)
+            raise e
 
-        logging.debug(_('%s has_enough_resource() for %s') % (dest, ec2_id))
+        # then remove.
+        rpc.call(context, dst_t,
+                 {"method": 'remove', "args":{'path':filename}})
diff --git a/nova/scheduler/manager.py b/nova/scheduler/manager.py
@@ -84,7 +84,10 @@ def show_host_resource(self, context, host, *args):
         # Getting physical resource information
         h_resource = {'vcpus': service_ref['vcpus'],
                      'memory_mb': service_ref['memory_mb'],
-                     'local_gb': service_ref['local_gb']}
+                     'local_gb': service_ref['local_gb'],
+                     'vcpus_used': service_ref['vcpus_used'],
+                     'memory_mb_used': service_ref['memory_mb_used'],
+                     'local_gb_used': service_ref['local_gb_used']}
 
         # Getting usage resource information
         u_resource = {}
@@ -108,8 +111,8 @@ def show_host_resource(self, context, host, *args):
             hdd = db.instance_get_disk_sum_by_host_and_project(context,
                                                                host,
                                                                p_id)
-            u_resource[p_id] = {'vcpus': vcpus,
-                                'memory_mb': mem,
-                                'local_gb': hdd}
+            u_resource[p_id] = {'vcpus': int(vcpus),
+                                'memory_mb': int(mem),
+                                'local_gb': int(hdd)}
 
         return {'ret': True, 'phy_resource': h_resource, 'usage': u_resource}