Skip to content

Commit

Permalink
1. Discard nova-manage host list
Browse files Browse the repository at this point in the history
Reason: nova-manage service list can be replacement.
Changes: nova-manage


2. Fix resource checking inappropriate design.
Reason: 
    nova.scheduler.driver.has_enough_resource has inappropriate design, 
    so fix it. This method didnt check free memory but check total memory.
    We need to register free memory onto databases(periodically).

    But periodically updating may causes flooding request to db in case of 
    many compute-node. Currently, since memory information is only used in
    this feature, we take the choice that administrators manually has to
    execute nova-manage to let compute node update their own memory information.

Changes:
    nova.db.sqlalchemy.models
        - Adding memory_mb_used, local_gb_used, vcpu_used column to Service.
          (local_gb and vcpu is just for reference to admins for now)

    nova.compute.manager
        - Changing nova.compute.manager.update_service
          Service table column is changed, so updating method must be changed.
        - Adding nova.compute.manager.update_available_resource
          a responder to  admin's request to let compute nodes update their
          memory infomation

    nova.virt.libvirt_conn
    nova.virt.xenapi_conn
    nova.virt.fake
        - Adding getter method for memory_mb_used/local_gb_used/vcpu_used.

    nova-manage
        - request method to let compute nodes update their own memory info.
  • Loading branch information
Kei Masumoto committed Jan 31, 2011
1 parent 7c80963 commit 09f2c47
Show file tree
Hide file tree
Showing 13 changed files with 476 additions and 171 deletions.
92 changes: 47 additions & 45 deletions bin/nova-manage
Expand Up @@ -499,50 +499,6 @@ class InstanceCommands(object):
print msg


class HostCommands(object):
"""Class for mangaging host(physical nodes)."""

def list(self):
"""describe host list."""

# To supress msg: No handlers could be found for logger "amqplib"
logging.basicConfig()

service_refs = db.service_get_all(context.get_admin_context())
hosts = [h['host'] for h in service_refs]
hosts = list(set(hosts))
for host in hosts:
print host

def show(self, host):
"""describe cpu/memory/hdd info for host."""

result = rpc.call(context.get_admin_context(),
FLAGS.scheduler_topic,
{"method": "show_host_resource",
"args": {"host": host}})

# Checking result msg format is necessary, that will have done
# when this feture is included in API.
if type(result) != dict:
print 'Unexpected error occurs'
elif not result['ret']:
print '%s' % result['msg']
else:
cpu = result['phy_resource']['vcpus']
mem = result['phy_resource']['memory_mb']
hdd = result['phy_resource']['local_gb']

print 'HOST\t\tPROJECT\t\tcpu\tmem(mb)\tdisk(gb)'
print '%s\t\t\t%s\t%s\t%s' % (host, cpu, mem, hdd)
for p_id, val in result['usage'].items():
print '%s\t%s\t\t%s\t%s\t%s' % (host,
p_id,
val['vcpus'],
val['memory_mb'],
val['local_gb'])


class ServiceCommands(object):
"""Enable and disable running services"""

Expand Down Expand Up @@ -587,6 +543,53 @@ class ServiceCommands(object):
return
db.service_update(ctxt, svc['id'], {'disabled': True})

def describeresource(self, host):
"""describe cpu/memory/hdd info for host."""

result = rpc.call(context.get_admin_context(),
FLAGS.scheduler_topic,
{"method": "show_host_resource",
"args": {"host": host}})

# Checking result msg format is necessary, that will have done
# when this feture is included in API.
if type(result) != dict:
print 'Unexpected error occurs'
elif not result['ret']:
print '%s' % result['msg']
else:
cpu = result['phy_resource']['vcpus']
mem = result['phy_resource']['memory_mb']
hdd = result['phy_resource']['local_gb']
cpu_u = result['phy_resource']['vcpus_used']
mem_u = result['phy_resource']['memory_mb_used']
hdd_u = result['phy_resource']['local_gb_used']

print 'HOST\t\t\tPROJECT\t\tcpu\tmem(mb)\tdisk(gb)'
print '%s(total)\t\t\t%s\t%s\t%s' % (host, cpu, mem, hdd)
print '%s(used)\t\t\t%s\t%s\t%s' % (host, cpu_u, mem_u, hdd_u)
for p_id, val in result['usage'].items():
print '%s\t\t%s\t\t%s\t%s\t%s' % (host,
p_id,
val['vcpus'],
val['memory_mb'],
val['local_gb'])

def updateresource(self, host):
"""update available vcpu/memory/disk info for host."""

ctxt = context.get_admin_context()
service_refs = db.service_get_all_by_host(ctxt, host)
if len(service_refs) <= 0:
raise exception.Invalid(_('%s does not exists.') % host)

service_refs = [s for s in service_refs if s['topic'] == 'compute']
if len(service_refs) <= 0:
raise exception.Invalid(_('%s is not compute node.') % host)

result = rpc.call(ctxt, db.queue_get_for(ctxt, FLAGS.compute_topic, host),
{"method": "update_available_resource"})


class LogCommands(object):
def request(self, request_id, logfile='/var/log/nova.log'):
Expand All @@ -606,7 +609,6 @@ CATEGORIES = [
('floating', FloatingIpCommands),
('network', NetworkCommands),
('instance', InstanceCommands),
('host', HostCommands),
('service', ServiceCommands),
('log', LogCommands)]

Expand Down
54 changes: 51 additions & 3 deletions nova/compute/manager.py
Expand Up @@ -134,9 +134,12 @@ def update_service(self, ctxt, host, binary):
raise exception.Invalid(msg)

# Updating host information
vcpu = self.driver.get_vcpu_number()
memory_mb = self.driver.get_memory_mb()
local_gb = self.driver.get_local_gb()
vcpu = self.driver.get_vcpu_total()
memory_mb = self.driver.get_memory_mb_total()
local_gb = self.driver.get_local_gb_total()
vcpu_u = self.driver.get_vcpu_used()
memory_mb_u = self.driver.get_memory_mb_used()
local_gb_u = self.driver.get_local_gb_used()
hypervisor = self.driver.get_hypervisor_type()
version = self.driver.get_hypervisor_version()
cpu_info = self.driver.get_cpu_info()
Expand All @@ -146,10 +149,42 @@ def update_service(self, ctxt, host, binary):
{'vcpus': vcpu,
'memory_mb': memory_mb,
'local_gb': local_gb,
'vcpus_used':vcpu_u,
'memory_mb_used': memory_mb_u,
'local_gb_used': local_gb_u,
'hypervisor_type': hypervisor,
'hypervisor_version': version,
'cpu_info': cpu_info})

def update_available_resource(self, context):
"""
update compute node specific info to DB.
Alghough this might be subset of update_service,
udpate_service() is used only nova-compute is lauched.
On the other hand, this method is used whenever administrators
request comes.
"""
try:
service_ref = self.db.service_get_by_args(context,
self.host,
'nova-compute')
except exception.NotFound:
msg = _(("""Cannot update resource info."""
""" Because no service record found."""))
raise exception.Invalid(msg)

# Updating host information
vcpu_u = self.driver.get_vcpu_used()
memory_mb_u = self.driver.get_memory_mb_used()
local_gb_u = self.driver.get_local_gb_used()

self.db.service_update(context,
service_ref['id'],
{'vcpus_used':vcpu_u,
'memory_mb_used': memory_mb_u,
'local_gb_used': local_gb_u})
return

def _update_state(self, context, instance_id):
"""Update the state of an instance from the driver info."""
# FIXME(ja): include other fields from state?
Expand Down Expand Up @@ -596,6 +631,19 @@ def compare_cpu(self, context, cpu_info):
""" Check the host cpu is compatible to a cpu given by xml."""
return self.driver.compare_cpu(cpu_info)

def mktmpfile(self, context):
"""make tmpfile under FLAGS.instance_path."""
return utils.mktmpfile(FLAGS.instances_path)

def exists(self, context, path):
"""Confirm existence of the tmpfile given by path."""
if not utils.exists(path):
raise exception.NotFound(_('%s not found') % path)

def remove(self, context, path):
"""remove the tmpfile given by path."""
return utils.remove(path)

def pre_live_migration(self, context, instance_id):
"""Any preparation for live migration at dst host."""

Expand Down
5 changes: 4 additions & 1 deletion nova/db/sqlalchemy/models.py
Expand Up @@ -164,7 +164,10 @@ class Service(BASE, NovaBase):
vcpus = Column(Integer, nullable=True)
memory_mb = Column(Integer, nullable=True)
local_gb = Column(Integer, nullable=True)
hypervisor_type = Column(String(128), nullable=True)
vcpus_used = Column(Integer, nullable=True)
memory_mb_used = Column(Integer, nullable=True)
local_gb_used = Column(Integer, nullable=True)
hypervisor_type = Column(Text(), nullable=True)
hypervisor_version = Column(Integer, nullable=True)
# Note(masumotok): Expected Strings example:
#
Expand Down
3 changes: 3 additions & 0 deletions nova/rpc.py
Expand Up @@ -250,6 +250,9 @@ def msg_reply(msg_id, reply=None, failure=None):
try:
publisher.send({'result': reply, 'failure': failure})
except TypeError:
print '>>>>>>>>>>>>>>>>>>'
print reply
print '>>>>>>>>>>>>>>>>>>'
publisher.send(
{'result': dict((k, repr(v))
for k, v in reply.__dict__.iteritems()),
Expand Down
67 changes: 46 additions & 21 deletions nova/scheduler/driver.py
Expand Up @@ -33,6 +33,7 @@
FLAGS = flags.FLAGS
flags.DEFINE_integer('service_down_time', 60,
'maximum time since last checkin for up service')
flags.DECLARE('instances_path', 'nova.compute.manager')


class NoValidHost(exception.Error):
Expand Down Expand Up @@ -163,6 +164,8 @@ def _live_migration_common_check(self, context, instance_ref, dest):
http://wiki.libvirt.org/page/TodoPreMigrationChecks
"""
# Checking shared storage connectivity
self.mounted_on_same_shared_storage(context, instance_ref, dest)

# Checking dest exists.
dservice_refs = db.service_get_all_by_host(context, dest)
Expand Down Expand Up @@ -207,38 +210,60 @@ def _live_migration_common_check(self, context, instance_ref, dest):
raise e

def has_enough_resource(self, context, instance_ref, dest):
"""Check if destination host has enough resource for live migration"""
"""
Check if destination host has enough resource for live migration.
Currently, only memory checking has been done.
If storage migration(block migration, meaning live-migration
without any shared storage) will be available, local storage
checking is also necessary.
"""

# Getting instance information
ec2_id = instance_ref['hostname']
vcpus = instance_ref['vcpus']
mem = instance_ref['memory_mb']
hdd = instance_ref['local_gb']

# Gettin host information
# Getting host information
service_refs = db.service_get_all_by_host(context, dest)
if len(service_refs) <= 0:
raise exception.Invalid(_('%s does not exists.') % dest)
service_ref = service_refs[0]

total_cpu = int(service_ref['vcpus'])
total_mem = int(service_ref['memory_mb'])
total_hdd = int(service_ref['local_gb'])
mem_total = int(service_ref['memory_mb'])
mem_used = int(service_ref['memory_mb_used'])
mem_avail = mem_total - mem_used
mem_inst = instance_ref['memory_mb']
if mem_avail <= mem_inst:
msg = _('%s is not capable to migrate %s(host:%s <= instance:%s)')
raise exception.NotEmpty(msg % (dest, ec2_id, mem_avail, mem_inst))

instances_refs = db.instance_get_all_by_host(context, dest)
for i_ref in instances_refs:
total_cpu -= int(i_ref['vcpus'])
total_mem -= int(i_ref['memory_mb'])
total_hdd -= int(i_ref['local_gb'])
def mounted_on_same_shared_storage(self, context, instance_ref, dest):
"""
Check if /nova-inst-dir/insntances is mounted same storage at
live-migration src and dest host.
"""
src = instance_ref['host']
dst_t = db.queue_get_for(context, FLAGS.compute_topic, dest)
src_t = db.queue_get_for(context, FLAGS.compute_topic, src)

# Checking host has enough information
logging.debug(_('host(%s) remains vcpu:%s mem:%s hdd:%s,') %
(dest, total_cpu, total_mem, total_hdd))
logging.debug(_('instance(%s) has vcpu:%s mem:%s hdd:%s,') %
(ec2_id, vcpus, mem, hdd))
# create tmpfile at dest host
try:
filename = rpc.call(context, dst_t, {"method": 'mktmpfile'})
except rpc.RemoteError, e:
msg = _("Cannot create tmpfile at %s to confirm shared storage.")
logging.error(msg % FLAGS.instance_path)
raise e

if total_cpu <= vcpus or total_mem <= mem or total_hdd <= hdd:
raise exception.NotEmpty(_('%s is not capable to migrate %s') %
(dest, ec2_id))
# make sure existence at src host.
try:
rpc.call(context, src_t,
{"method": 'exists', "args":{'path':filename}})

except (rpc.RemoteError, exception.NotFound), e:
msg = (_("""Cannot comfirm %s at %s to confirm shared storage."""
"""Check if %s is same shared storage"""))
logging.error(msg % FLAGS.instance_path)
raise e

logging.debug(_('%s has_enough_resource() for %s') % (dest, ec2_id))
# then remove.
rpc.call(context, dst_t,
{"method": 'remove', "args":{'path':filename}})
11 changes: 7 additions & 4 deletions nova/scheduler/manager.py
Expand Up @@ -84,7 +84,10 @@ def show_host_resource(self, context, host, *args):
# Getting physical resource information
h_resource = {'vcpus': service_ref['vcpus'],
'memory_mb': service_ref['memory_mb'],
'local_gb': service_ref['local_gb']}
'local_gb': service_ref['local_gb'],
'vcpus_used': service_ref['vcpus_used'],
'memory_mb_used': service_ref['memory_mb_used'],
'local_gb_used': service_ref['local_gb_used']}

# Getting usage resource information
u_resource = {}
Expand All @@ -108,8 +111,8 @@ def show_host_resource(self, context, host, *args):
hdd = db.instance_get_disk_sum_by_host_and_project(context,
host,
p_id)
u_resource[p_id] = {'vcpus': vcpus,
'memory_mb': mem,
'local_gb': hdd}
u_resource[p_id] = {'vcpus': int(vcpus),
'memory_mb': int(mem),
'local_gb': int(hdd)}

return {'ret': True, 'phy_resource': h_resource, 'usage': u_resource}

0 comments on commit 09f2c47

Please sign in to comment.