Skip to content

Commit 7488b92

Browse files
committed
fix: switch graph if db-worker-node server unreachable
1 parent 137dd7f commit 7488b92

11 files changed

Lines changed: 944 additions & 46 deletions

File tree

docs/agent-guide/db-worker/002-desktop-db-worker-request-cap-switch-graph.md

Lines changed: 345 additions & 0 deletions
Large diffs are not rendered by default.

src/electron/electron/db_worker.cljs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,12 @@
225225
[window-id]
226226
(ensure-window-stopped! manager window-id))
227227

228+
(defn release-runtime!
229+
([repo window-id]
230+
(release-runtime! manager repo window-id))
231+
([mgr repo window-id]
232+
(ensure-stopped! mgr repo window-id)))
233+
228234
(defn release-repo!
229235
[repo]
230236
(ensure-repo-stopped! manager repo))

src/electron/electron/handler.cljs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,11 @@
234234
(p/rejected (ex-info "repo is required" {:code :missing-repo}))
235235
(db-worker/ensure-runtime! (canonical-repo repo) (.-id window))))
236236

237+
(defmethod handle :releaseDbWorkerRuntime [^js window [_ repo]]
238+
(if (string/blank? repo)
239+
(p/rejected (ex-info "repo is required" {:code :missing-repo}))
240+
(db-worker/release-runtime! (canonical-repo repo) (.-id window))))
241+
237242
(defmethod handle :db-export [window [_ repo force-backup?]]
238243
(when-let [repo (canonical-repo repo)]
239244
(db/ensure-graph-dir! repo)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
(ns frontend.handler.graph-failover
2+
"Switches away from an unavailable active graph without deleting graph data."
3+
(:require [frontend.context.i18n :refer [t]]
4+
[frontend.handler.notification :as notification]
5+
[frontend.state :as state]
6+
[frontend.util.text :as text-util]))
7+
8+
(defn- graph-name
9+
[repo]
10+
(text-util/get-graph-name-from-path repo))
11+
12+
(defn- fallback-graph
13+
[failed-repo]
14+
(some (fn [{:keys [url]}]
15+
(when (not= failed-repo url)
16+
url))
17+
(state/get-repos)))
18+
19+
(defn switch-away-from-current-repo!
20+
([failed-repo]
21+
(switch-away-from-current-repo! failed-repo {}))
22+
([failed-repo _opts]
23+
(when (= failed-repo (state/get-current-repo))
24+
(state/set-current-repo! nil)
25+
(if-let [graph (fallback-graph failed-repo)]
26+
(do
27+
(notification/show! (t :graph.switch/db-worker-unavailable-switching-warning
28+
(graph-name failed-repo)
29+
(graph-name graph))
30+
:warning)
31+
(state/pub-event! [:graph/switch graph {:persist? false}])
32+
graph)
33+
(do
34+
(notification/show! (t :graph.switch/db-worker-unavailable-no-fallback-warning
35+
(graph-name failed-repo))
36+
:warning)
37+
nil)))))

src/main/frontend/persist_db.cljs

Lines changed: 102 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
(:require [electron.ipc :as ipc]
44
[frontend.config :as config]
55
[frontend.db.transact :as db-transact]
6+
[frontend.handler.graph-failover :as graph-failover]
67
[frontend.persist-db.browser :as browser]
78
[frontend.persist-db.protocol :as protocol]
89
[frontend.persist-db.remote :as remote]
@@ -13,12 +14,16 @@
1314
[logseq.db :as ldb]
1415
[promesa.core :as p]))
1516

17+
(def max-db-worker-request-failures 3)
18+
1619
(defonce opfs-db (browser/->InBrowser))
1720
(defonce remote-db (atom nil))
1821
(defonce remote-repo (atom nil))
22+
(defonce remote-runtime-state (atom nil))
1923

2024
(defn- clear-remote-runtime!
2125
[]
26+
(reset! remote-runtime-state nil)
2227
(reset! remote-db nil)
2328
(reset! remote-repo nil)
2429
(reset! state/*db-worker nil))
@@ -37,6 +42,78 @@
3742
(p/resolved true)))
3843
(p/resolved false)))
3944

45+
(defn- set-remote-runtime!
46+
[repo client session-id]
47+
(reset! remote-runtime-state {:repo repo
48+
:client client
49+
:session-id session-id
50+
:request-failures 0
51+
:failover-triggered? false})
52+
(reset! remote-db client)
53+
(reset! remote-repo repo)
54+
(reset! state/*db-worker (:wrapped-worker client)))
55+
56+
(defn- active-runtime-session?
57+
[state repo session-id]
58+
(and (= repo (:repo state))
59+
(= session-id (:session-id state))))
60+
61+
(defn- reset-active-request-failures!
62+
[repo session-id]
63+
(swap! remote-runtime-state
64+
(fn [state]
65+
(if (active-runtime-session? state repo session-id)
66+
(assoc state :request-failures 0)
67+
state))))
68+
69+
(defn- server-unavailable-error?
70+
[error]
71+
(let [{:keys [status code]} (ex-data error)]
72+
(or (nil? (ex-data error))
73+
(= :server-unavailable code)
74+
(= :db-worker-unavailable code)
75+
(= :connection-refused code)
76+
(= :fetch-failed code)
77+
(= :network-error code)
78+
(= 0 status))))
79+
80+
(defn- trigger-db-worker-failover!
81+
[repo remote-client]
82+
(when remote-client
83+
(-> (remote/stop! remote-client)
84+
(p/catch (fn [error]
85+
(log/warn :db-worker-failover-stop-error {:repo repo
86+
:error error})))))
87+
(when (= repo @remote-repo)
88+
(clear-remote-runtime!))
89+
(-> (ipc/ipc "releaseDbWorkerRuntime" repo)
90+
(p/catch (fn [error]
91+
(log/warn :db-worker-failover-release-runtime-error {:repo repo
92+
:error error}))))
93+
(graph-failover/switch-away-from-current-repo! repo {:reason :db-worker-request-failed}))
94+
95+
(defn- record-active-request-failure!
96+
[repo session-id error]
97+
(when (server-unavailable-error? error)
98+
(let [triggered? (atom false)
99+
remote-client (atom nil)]
100+
(swap! remote-runtime-state
101+
(fn [state]
102+
(if (and (active-runtime-session? state repo session-id)
103+
(not (:failover-triggered? state)))
104+
(let [failures (inc (or (:request-failures state) 0))]
105+
(if (>= failures max-db-worker-request-failures)
106+
(do
107+
(reset! triggered? true)
108+
(reset! remote-client (:client state))
109+
(assoc state
110+
:request-failures failures
111+
:failover-triggered? true))
112+
(assoc state :request-failures failures)))
113+
state)))
114+
(when @triggered?
115+
(trigger-db-worker-failover! repo @remote-client)))))
116+
40117
(defn- node-runtime?
41118
[]
42119
(and (exists? js/process)
@@ -57,26 +134,31 @@
57134
[repo]
58135
(if (or (nil? repo) (= repo @remote-repo))
59136
(p/resolved @remote-db)
60-
(p/let [_ (when @remote-db
61-
(remote/stop! @remote-db))
62-
runtime (ipc/ipc "db-worker-runtime" repo)
63-
client (remote/start! (assoc runtime
64-
:repo repo
65-
:event-handler worker-handler/handle))]
66-
(reset! remote-db client)
67-
(reset! remote-repo repo)
68-
(reset! state/*db-worker (:wrapped-worker client))
69-
(p/let [_ (state/<invoke-db-worker :thread-api/set-db-sync-config
70-
(current-db-sync-config))]
71-
nil)
72-
(ldb/register-transact-fn!
73-
(fn remote-transact!
74-
[repo tx-data tx-meta]
75-
(db-transact/transact browser/transact!
76-
(if (string? repo) repo (state/get-current-repo))
77-
tx-data
78-
(assoc tx-meta :client-id (:client-id @state/state)))))
79-
client)))
137+
(let [session-id (str (random-uuid))]
138+
(p/let [_ (when @remote-db
139+
(remote/stop! @remote-db))
140+
runtime (ipc/ipc "db-worker-runtime" repo)
141+
client (remote/start! (assoc runtime
142+
:repo repo
143+
:event-handler worker-handler/handle
144+
:on-invoke-success (fn [_method _args _result]
145+
(reset-active-request-failures! repo session-id))
146+
:on-invoke-failure (fn [_method _args error]
147+
(record-active-request-failure! repo session-id error))
148+
:on-event-error (fn [error]
149+
(record-active-request-failure! repo session-id error))))]
150+
(set-remote-runtime! repo client session-id)
151+
(p/let [_ (state/<invoke-db-worker :thread-api/set-db-sync-config
152+
(current-db-sync-config))]
153+
nil)
154+
(ldb/register-transact-fn!
155+
(fn remote-transact!
156+
[repo tx-data tx-meta]
157+
(db-transact/transact browser/transact!
158+
(if (string? repo) repo (state/get-current-repo))
159+
tx-data
160+
(assoc tx-meta :client-id (:client-id @state/state)))))
161+
client))))
80162

81163
(defn <start-runtime!
82164
[]

src/main/frontend/persist_db/remote.cljs

Lines changed: 40 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -87,37 +87,49 @@
8787
{:close! (fn []
8888
(.close es))})
8989
{:close! (fn [] nil)}))]
90-
(assoc opts
91-
:fetch-fn (or fetch-fn default-fetch-fn)
92-
:open-sse-fn (or open-sse-fn default-open-sse-fn)
93-
:schedule-fn (or schedule-fn (fn [f delay-ms]
94-
(js/setTimeout f delay-ms)))
95-
:reconnect-delay-ms (or reconnect-delay-ms 1000))))
90+
(assoc opts
91+
:fetch-fn (or fetch-fn default-fetch-fn)
92+
:open-sse-fn (or open-sse-fn default-open-sse-fn)
93+
:schedule-fn (or schedule-fn (fn [f delay-ms]
94+
(js/setTimeout f delay-ms)))
95+
:reconnect-delay-ms (or reconnect-delay-ms 1000))))
9696

9797
(defn invoke!
98-
[{:keys [base-url auth-token fetch-fn]} method args]
98+
[{:keys [base-url auth-token fetch-fn on-invoke-success on-invoke-failure]} method args]
9999
(let [payload (js/JSON.stringify
100100
(clj->js {:method method
101101
:argsTransit (ldb/write-transit-str args)}))]
102-
(p/let [{:keys [status body]}
103-
(fetch-fn {:method "POST"
104-
:url (invoke-url base-url)
105-
:headers (base-headers auth-token)
106-
:body payload})
107-
parsed (parse-response-body body)]
108-
(if (<= 200 status 299)
109-
(ldb/read-transit-str (:resultTransit parsed))
110-
(let [error (:error parsed)]
111-
(throw (ex-info (or (:message error) "db-worker invoke failed")
112-
(cond-> {:status status
113-
:code (normalize-code (:code error))}
114-
error (assoc :error error)))))))))
102+
(->
103+
(p/let [{:keys [status body]}
104+
(fetch-fn {:method "POST"
105+
:url (invoke-url base-url)
106+
:headers (base-headers auth-token)
107+
:body payload})
108+
parsed (parse-response-body body)]
109+
(if (<= 200 status 299)
110+
(let [result (ldb/read-transit-str (:resultTransit parsed))]
111+
(when on-invoke-success
112+
(on-invoke-success method args result))
113+
result)
114+
(let [error (:error parsed)]
115+
(throw (ex-info (or (:message error) "db-worker invoke failed")
116+
(cond-> {:status status
117+
:code (normalize-code (:code error))}
118+
error (assoc :error error)))))))
119+
(p/catch (fn [error]
120+
(when on-invoke-failure
121+
(on-invoke-failure method args error))
122+
(throw error))))))
115123

116124
(defn connect-events!
117-
[{:keys [base-url auth-token event-handler open-sse-fn schedule-fn reconnect-delay-ms]} wrapped-worker]
125+
[{:keys [base-url auth-token event-handler open-sse-fn schedule-fn reconnect-delay-ms on-event-error]} wrapped-worker]
118126
(let [connected? (atom true)
119127
buffer (atom "")
120128
subscription (atom nil)
129+
close-subscription! (fn []
130+
(when-let [close! (:close! @subscription)]
131+
(close!))
132+
(reset! subscription nil))
121133
dispatch! (fn [event-str]
122134
(when-let [line (data-line event-str)]
123135
(let [event (parse-response-body line)
@@ -140,14 +152,17 @@
140152
(reset! buffer next-buffer)
141153
(dispatch! event-str)
142154
(recur))))))
143-
:on-error (fn [_error]
155+
:on-error (fn [error]
144156
(when @connected?
145-
(schedule-fn open! reconnect-delay-ms)))}))))]
157+
(close-subscription!)
158+
(when on-event-error
159+
(on-event-error error))
160+
(when @connected?
161+
(schedule-fn open! reconnect-delay-ms))))}))))]
146162
(open!)
147163
{:disconnect! (fn []
148164
(reset! connected? false)
149-
(when-let [close! (:close! @subscription)]
150-
(close!))
165+
(close-subscription!)
151166
nil)})))
152167

153168
(defn- method->str

src/resources/dicts/en.edn

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,8 @@
734734
:graph.page/title "Page graph"
735735

736736
:graph.switch/add-graph-action "Yes, add another graph"
737+
:graph.switch/db-worker-unavailable-no-fallback-warning "The database worker for graph \"{1}\" is unavailable, and no other graph is available."
738+
:graph.switch/db-worker-unavailable-switching-warning "The database worker for graph \"{1}\" is unavailable. Switching to graph \"{2}\"."
737739
:graph.switch/empty-desc "No matched graphs. Do you want to add another one?"
738740
:graph.switch/prompt "Switch to:"
739741
:graph.switch/select-prompt "Select a graph"

src/resources/dicts/zh-cn.edn

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,8 @@
731731
:graph.page/title "页面图谱"
732732

733733
:graph.switch/add-graph-action "再添加一份图谱"
734+
:graph.switch/db-worker-unavailable-no-fallback-warning "图谱“{1}”的数据库 worker 不可用,且没有其他可用图谱。"
735+
:graph.switch/db-worker-unavailable-switching-warning "图谱“{1}”的数据库 worker 不可用。正在切换到图谱“{2}”。"
734736
:graph.switch/empty-desc "没有匹配的图谱。你想再添加一份图谱吗?"
735737
:graph.switch/prompt "切换到:"
736738
:graph.switch/select-prompt "请选择一份图谱"

src/test/electron/db_worker_manager_test.cljs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,26 @@
7878
(is false (str "unexpected error: " e))))
7979
(p/finally (fn [] (done)))))))
8080

81+
(deftest release-runtime-detaches-only-requested-window-repo-association
82+
(async done
83+
(let [stop-calls (atom [])
84+
manager (db-worker/create-manager
85+
{:start-daemon! (fn [repo] (p/resolved (runtime repo)))
86+
:stop-daemon! (fn [rt]
87+
(swap! stop-calls conj (:repo rt))
88+
(p/resolved true))})]
89+
(-> (p/let [_ (db-worker/ensure-started! manager "graph-a" :window-1)
90+
_ (db-worker/ensure-started! manager "graph-a" :window-2)
91+
_ (db-worker/release-runtime! manager "graph-a" :window-1)
92+
state @(:state manager)]
93+
(is (empty? @stop-calls))
94+
(is (nil? (get-in state [:window->repo :window-1])))
95+
(is (= "graph-a" (get-in state [:window->repo :window-2])))
96+
(is (= #{:window-2} (get-in state [:repos "graph-a" :windows]))))
97+
(p/catch (fn [e]
98+
(is false (str "unexpected error: " e))))
99+
(p/finally (fn [] (done)))))))
100+
81101
(deftest ensure-stopped-stale-repo-does-not-clear-new-window-mapping
82102
(async done
83103
(let [stop-calls (atom [])

0 commit comments

Comments
 (0)