Permalink
Browse files

Protected all xapi calls to rrdd against exceptions.

Signed-off-by: Rok Strniša <rok.strnisa@citrix.com>
  • Loading branch information...
1 parent 5bab7eb commit ae078b223ee00c72d6c769d43c62ec68006d908b @rokstrnisa committed Jul 5, 2012
@@ -30,7 +30,7 @@ let http_fwd_path = xmlrpc_path ^ ".forwarded"
external has_vm_rrd : vm_uuid:string -> bool = ""
external push_rrd : vm_uuid:string -> domid:int -> is_on_localhost:bool ->
- unit = ""
+ unit -> unit = ""
external remove_rrd : uuid:string -> unit = ""
external migrate_rrd : ?session_id:string -> remote_address:string ->
vm_uuid:string -> host_uuid:string -> unit -> unit = ""
@@ -47,11 +47,11 @@ external forget_vm_ds : vm_uuid:string -> ds_name:string -> unit = ""
external query_possible_vm_dss : vm_uuid:string -> Data_source.t list = ""
external query_vm_ds : vm_uuid:string -> ds_name:string -> float = ""
-external update_use_min_max : value:bool -> unit = ""
+external update_use_min_max : value:bool -> unit -> unit = ""
external update_vm_memory_target : domid:int -> target:int64 -> unit = ""
-external set_cache_sr : sr_uuid:string -> unit = ""
+external set_cache_sr : sr_uuid:string -> unit -> unit = ""
external unset_cache_sr : unit -> unit = ""
module Plugin = struct
@@ -66,7 +66,7 @@ end
module HA = struct
external enable_and_update :
statefile_latencies:Rrd.Statefile_latency.t list ->
- heartbeat_latency:float -> xapi_latency:float -> unit = ""
+ heartbeat_latency:float -> xapi_latency:float -> unit -> unit = ""
external disable : unit -> unit = ""
end
@@ -75,7 +75,7 @@ module Deprecated = struct
external get_full_update_last_rra_idx : unit -> int = ""
(* Could change timescale to sum type, e.g. Slow | Fast.*)
external load_rrd : uuid:string -> domid:int -> is_host:bool ->
- timescale:int -> unit = ""
+ timescale:int -> unit -> unit = ""
(* external get_host_rrd : unit -> rrd_info option = "" *)
external get_host_stats : unit -> unit = ""
end
@@ -108,7 +108,7 @@ module Deprecated = struct
* assume that the RRDs were stored locally and fall back to asking the
* master if we can't find them. *)
let load_rrd _ ~(uuid : string) ~(domid : int) ~(is_host : bool)
- ~(timescale : int) : unit =
+ ~(timescale : int) () : unit =
try
let rrd =
try
@@ -288,7 +288,7 @@ end
(* Push function to push the archived RRD to the appropriate host
* (which might be us, in which case, pop it into the hashtbl. *)
-let push_rrd _ ~(vm_uuid : string) ~(domid : int) ~(is_on_localhost : bool)
+let push_rrd _ ~(vm_uuid : string) ~(domid : int) ~(is_on_localhost : bool) ()
: unit =
try
let path = Xapi_globs.xapi_rrd_location ^ "/" ^ vm_uuid in
@@ -420,7 +420,7 @@ let query_vm_ds _ ~(vm_uuid : string) ~(ds_name : string) : float =
Rrd.query_named_ds rrdi.rrd ds_name Rrd.CF_Average
)
-let update_use_min_max _ ~(value : bool) : unit =
+let update_use_min_max _ ~(value : bool) () : unit =
debug "Updating use_min_max: New value=%b" value;
use_min_max := value
@@ -431,7 +431,7 @@ let update_vm_memory_target _ ~(domid : int) ~(target : int64) : unit =
Mutex.execute memory_targets_m
(fun _ -> Hashtbl.replace memory_targets domid target)
-let set_cache_sr _ ~(sr_uuid : string) : unit =
+let set_cache_sr _ ~(sr_uuid : string) () : unit =
Mutex.execute cache_sr_lock (fun () -> cache_sr_uuid := Some sr_uuid)
let unset_cache_sr _ () =
@@ -667,7 +667,7 @@ end
module HA = struct
let enable_and_update _ ~(statefile_latencies : Rrd.Statefile_latency.t list)
- ~(heartbeat_latency : float) ~(xapi_latency : float) =
+ ~(heartbeat_latency : float) ~(xapi_latency : float) () =
Mutex.execute Rrdd_ha_stats.m (fun _ ->
Rrdd_ha_stats.enabled := true;
Rrdd_ha_stats.Statefile_latency.all := statefile_latencies;
@@ -240,8 +240,8 @@ let update_env __context sync_keys =
let cache_sr = Db.Host.get_local_cache_sr ~__context ~self:(Helpers.get_localhost ~__context) in
let cache_sr_uuid = Db.SR.get_uuid ~__context ~self:cache_sr in
Db.SR.set_local_cache_enabled ~__context ~self:cache_sr ~value:true;
- Rrdd.set_cache_sr cache_sr_uuid
- with _ -> Rrdd.unset_cache_sr ()
+ log_and_ignore_exn (Rrdd.set_cache_sr ~sr_uuid:cache_sr_uuid)
+ with _ -> log_and_ignore_exn Rrdd.unset_cache_sr
end;
(* Load the host rrd *)
@@ -29,7 +29,7 @@ let update_configuration_from_master () =
let oc = Db.Pool.get_other_config ~__context ~self:(Helpers.get_pool ~__context) in
let new_use_min_max = (List.mem_assoc Xapi_globs.create_min_max_in_new_VM_RRDs oc) &&
(List.assoc Xapi_globs.create_min_max_in_new_VM_RRDs oc = "true") in
- Rrdd.update_use_min_max ~value:new_use_min_max;
+ log_and_ignore_exn (Rrdd.update_use_min_max ~value:new_use_min_max);
let carrier = (List.mem_assoc Xapi_globs.pass_through_pif_carrier oc) &&
(List.assoc Xapi_globs.pass_through_pif_carrier oc = "true") in
if !Xapi_xenops.pass_through_pif_carrier <> carrier
@@ -200,13 +200,16 @@ let is_vm_on_localhost ~__context ~(vm_uuid : string) : bool =
let push_rrd ~__context ~(vm_uuid : string) : unit =
let is_on_localhost = is_vm_on_localhost ~__context ~vm_uuid in
let domid = vm_uuid_to_domid ~__context ~uuid:vm_uuid in
- Rrdd.push_rrd ~vm_uuid ~domid ~is_on_localhost
+ log_and_ignore_exn (Rrdd.push_rrd ~vm_uuid ~domid ~is_on_localhost)
let migrate_rrd ~__context ?remote_address ?session_id ~vm_uuid ~host_uuid () =
let remote_address = match remote_address with
| None -> Db.Host.get_address ~__context ~self:(Ref.of_string host_uuid)
| Some a -> a
- in Rrdd.migrate_rrd ~remote_address ?session_id ~vm_uuid ~host_uuid ()
+ in
+ log_and_ignore_exn (
+ Rrdd.migrate_rrd ~remote_address ?session_id ~vm_uuid ~host_uuid
+ )
module Deprecated = struct
let get_timescale ~__context =
@@ -222,5 +225,5 @@ module Deprecated = struct
| false -> vm_uuid_to_domid ~__context ~uuid
in
let timescale = get_timescale ~__context in
- Rrdd.Deprecated.load_rrd ~uuid ~domid ~is_host ~timescale
+ log_and_ignore_exn (Rrdd.Deprecated.load_rrd ~uuid ~domid ~is_host ~timescale)
end
@@ -27,7 +27,7 @@ let time f =
(* give xapi time to reply to API messages by means of a 10 second fuse! *)
let light_fuse_and_run ?(fuse_length = !Xapi_globs.fuse_time) () =
debug "light_fuse_and_run: calling Rrdd.backup_rrds to save current RRDs locally";
- let delay_so_far = time Rrdd.backup_rrds in
+ let delay_so_far = time (fun _ -> log_and_ignore_exn Rrdd.backup_rrds) in
let new_fuse_length = max 5. (fuse_length -. delay_so_far) in
debug "light_fuse_and_run: current RRDs have been saved";
ignore (Thread.create
@@ -66,7 +66,7 @@ let light_fuse_and_dont_restart ?(fuse_length = !Xapi_globs.fuse_time) () =
ignore (Thread.create
(fun () ->
debug "light_fuse_and_dont_restart: calling Rrdd.backup_rrds to save current RRDs locally";
- Rrdd.backup_rrds ();
+ log_and_ignore_exn Rrdd.backup_rrds;
Thread.delay fuse_length;
Db_cache_impl.flush_and_exit (Db_connections.preferred_write_db ()) 0) ());
(* This is a best-effort attempt to use the database. We must not block the flush_and_exit above, hence
@@ -347,7 +347,7 @@ module Monitor = struct
let heartbeat_latency = float_of_int local.Xha_interface.LiveSetInformation.RawStatus.heartbeat_latency /. 1000. -. (float_of_int timeouts.Timeouts.heart_beat_interval) in
let xapi_latency = float_of_int (local.Xha_interface.LiveSetInformation.RawStatus.xapi_healthcheck_latency) /. 1000. in
let statefile_latencies = List.map (fun vdi -> let open Rrd.Statefile_latency in {id = vdi.Static_vdis.uuid; latency = Some statefile}) statefiles in
- Rrdd.HA.enable_and_update ~statefile_latencies ~heartbeat_latency ~xapi_latency
+ log_and_ignore_exn (Rrdd.HA.enable_and_update ~statefile_latencies ~heartbeat_latency ~xapi_latency)
) liveset.Xha_interface.LiveSetInformation.raw_status_on_local_host;
(* All hosts: create alerts from per-host warnings (if available) *)
@@ -613,7 +613,7 @@ module Monitor = struct
(fun () -> Db_gc.use_host_heartbeat_for_liveness := true);
debug "Stopping reading per-host HA stats";
- Rrdd.HA.disable ();
+ log_and_ignore_exn Rrdd.HA.disable;
debug "HA background thread told to stop")
@@ -482,11 +482,11 @@ let shutdown_and_reboot_common ~__context ~host label description operation cmd
Xapi_ha.before_clean_shutdown_or_reboot ~__context ~host;
Remote_requests.stop_request_thread();
- (* Push the Host RRD to the master. Note there are no VMs running here so we don't have to worry about them. *)
- if not(Pool_role.is_master ())
- then Rrdd.send_host_rrd_to_master ();
- (* Also save the Host RRD to local disk for us to pick up when we return. Note there are no VMs running at this point. *)
- Rrdd.backup_rrds ();
+ (* Push the Host RRD to the master. Note there are no VMs running here so we don't have to worry about them. *)
+ if not(Pool_role.is_master ())
+ then log_and_ignore_exn Rrdd.send_host_rrd_to_master;
+ (* Also save the Host RRD to local disk for us to pick up when we return. Note there are no VMs running at this point. *)
+ log_and_ignore_exn Rrdd.backup_rrds;
(* This prevents anyone actually re-enabling us until after reboot *)
Localdb.put Constants.host_disabled_until_reboot "true";
@@ -908,8 +908,10 @@ let sync_data ~__context ~host =
Xapi_sync.sync_host __context host (* Nb, no attempt to wrap exceptions yet *)
let backup_rrds ~__context ~host ~delay =
- Xapi_periodic_scheduler.add_to_queue "RRD backup" Xapi_periodic_scheduler.OneShot
- delay (fun () -> Rrdd.backup_rrds ~save_stats_locally:(Pool_role.is_master ()) ())
+ Xapi_periodic_scheduler.add_to_queue "RRD backup" Xapi_periodic_scheduler.OneShot
+ delay (fun _ ->
+ log_and_ignore_exn (Rrdd.backup_rrds ~save_stats_locally:(Pool_role.is_master ()))
+ )
let get_servertime ~__context ~host =
Date.of_float (Unix.gettimeofday ())
@@ -1355,7 +1357,7 @@ let enable_local_storage_caching ~__context ~host ~sr =
if old_sr <> Ref.null then Db.SR.set_local_cache_enabled ~__context ~self:old_sr ~value:false;
Db.Host.set_local_cache_sr ~__context ~self:host ~value:sr;
Db.SR.set_local_cache_enabled ~__context ~self:sr ~value:true;
- Rrdd.set_cache_sr ~sr_uuid:(Db.SR.get_uuid ~__context ~self:sr);
+ log_and_ignore_exn (Rrdd.set_cache_sr ~sr_uuid:(Db.SR.get_uuid ~__context ~self:sr));
end else begin
raise (Api_errors.Server_error (Api_errors.sr_operation_not_supported,[]))
end
@@ -1364,7 +1366,7 @@ let disable_local_storage_caching ~__context ~host =
assert_bacon_mode ~__context ~host;
let sr = Db.Host.get_local_cache_sr ~__context ~self:host in
Db.Host.set_local_cache_sr ~__context ~self:host ~value:Ref.null;
- Rrdd.unset_cache_sr ();
+ log_and_ignore_exn Rrdd.unset_cache_sr;
try Db.SR.set_local_cache_enabled ~__context ~self:sr ~value:false with _ -> ()
(* Here's how we do VLAN resyncing:

0 comments on commit ae078b2

Please sign in to comment.