Skip to content

Commit f4891ee

Browse files
smiradsseng
authored andcommitted
feat: implement logs persistence
Implement a log persistence controller, rotate logs and bufferize writes. Fixes #11461 Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com> Co-authored-by: Dmitrii Sharshakov <dmitry.sharshakov@siderolabs.com> Signed-off-by: Dmitrii Sharshakov <dmitry.sharshakov@siderolabs.com> (cherry picked from commit a0cfc35)
1 parent c9a4f95 commit f4891ee

File tree

15 files changed

+782
-3
lines changed

15 files changed

+782
-3
lines changed

hack/release.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,13 @@ You can still expand the list of supported cipher suites via the `cluster.apiSer
209209
title = "Kernel Log"
210210
description = """\
211211
The kernel log (dmesg) is now also available as the service log named `kernel` (`talosctl logs kernel`).
212+
"""
213+
214+
[notes.persistent-logs]
215+
title = "Persistent logs"
216+
description = """\
217+
Talos now stores system component logs in /var/log, featuring automatic log rotation and keeping two most
218+
recent log files. This change allows collecting logs from Talos like on any other Linux system.
212219
"""
213220

214221
[make_deps]

internal/app/machined/pkg/controllers/block/volume_config_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ func (suite *VolumeConfigSuite) TestReconcileDefaults() {
162162
})
163163

164164
ctest.AssertResources(suite, []resource.ID{
165-
"/var/log",
165+
constants.LogMountPoint,
166166
"/var/log/audit",
167167
"/var/log/containers",
168168
"/var/log/pods",
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4+
5+
// Package logfile implements a buffered, rotating log file.
6+
package logfile
7+
8+
import (
9+
"bufio"
10+
"fmt"
11+
"io"
12+
"os"
13+
"sync"
14+
)
15+
16+
// LogFile is an implementation of a buffered and rotated log file.
17+
type LogFile struct {
18+
mu sync.Mutex
19+
file *os.File
20+
buf bufio.Writer
21+
22+
path string
23+
size int64
24+
rotationThreshold int64
25+
}
26+
27+
// NewLogFile creates a LogFile.
28+
func NewLogFile(path string, rotationThreshold int64) *LogFile {
29+
return &LogFile{
30+
path: path,
31+
rotationThreshold: rotationThreshold,
32+
}
33+
}
34+
35+
// Write appends a line to the end of file, handling file creation and rotation.
36+
func (lf *LogFile) Write(line []byte) error {
37+
var err error
38+
39+
lf.mu.Lock()
40+
defer lf.mu.Unlock()
41+
42+
if lf.file == nil {
43+
lf.file, err = os.OpenFile(lf.path, os.O_CREATE|os.O_WRONLY, 0o640)
44+
if err != nil {
45+
return fmt.Errorf("error opening log file %q: %w", lf.path, err)
46+
}
47+
48+
lf.size, err = lf.file.Seek(0, io.SeekEnd)
49+
if err != nil {
50+
return fmt.Errorf("error determining log file %q length: %w", lf.path, err)
51+
}
52+
53+
lf.buf.Reset(lf.file)
54+
}
55+
56+
var n int
57+
if n, err = lf.buf.Write(append(line, '\n')); err != nil {
58+
return fmt.Errorf("error writing log line to file %q: %w", lf.path, err)
59+
}
60+
61+
lf.size += int64(n)
62+
if lf.size < lf.rotationThreshold {
63+
return nil
64+
}
65+
66+
if err = lf.close(); err != nil {
67+
return err
68+
}
69+
70+
if err = os.Rename(lf.path, lf.path+".1"); err != nil {
71+
return fmt.Errorf("error renaming log file %q: %w", lf.path, err)
72+
}
73+
74+
return nil
75+
}
76+
77+
func (lf *LogFile) flush() error {
78+
if err := lf.buf.Flush(); err != nil {
79+
return fmt.Errorf("failed to flush log file %s buffer: %w", lf.path, err)
80+
}
81+
82+
return nil
83+
}
84+
85+
// Flush flushes the internal buffer to persist data to the filesystem.
86+
func (lf *LogFile) Flush() error {
87+
lf.mu.Lock()
88+
defer lf.mu.Unlock()
89+
90+
return lf.flush()
91+
}
92+
93+
func (lf *LogFile) close() error {
94+
if err := lf.flush(); err != nil {
95+
return err
96+
}
97+
98+
lf.buf.Reset(nil)
99+
100+
if lf.file == nil {
101+
return nil
102+
}
103+
104+
err := lf.file.Close()
105+
lf.file = nil
106+
107+
if err != nil {
108+
return fmt.Errorf("failed to close log file %s: %w", lf.path, err)
109+
}
110+
111+
return nil
112+
}
113+
114+
// Close flushes and closes the underlying file.
115+
func (lf *LogFile) Close() error {
116+
lf.mu.Lock()
117+
defer lf.mu.Unlock()
118+
119+
return lf.close()
120+
}
Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4+
package logfile_test
5+
6+
import (
7+
"bytes"
8+
"os"
9+
"path/filepath"
10+
"strconv"
11+
"sync"
12+
"testing"
13+
14+
"github.com/stretchr/testify/require"
15+
16+
"github.com/siderolabs/talos/internal/app/machined/pkg/controllers/runtime/internal/logfile"
17+
)
18+
19+
func TestWrite(t *testing.T) {
20+
dir := t.TempDir()
21+
path := filepath.Join(dir, "test.log")
22+
23+
lf := logfile.NewLogFile(path, 1024)
24+
defer require.NoError(t, lf.Close())
25+
26+
err := lf.Write([]byte("hello world"))
27+
require.NoError(t, err)
28+
29+
// Expect write to retain data in the buffer
30+
st, err := os.Stat(path)
31+
require.NoError(t, err)
32+
require.Equal(t, int64(0), st.Size(), "file should be empty before flush")
33+
require.NoError(t, lf.Flush())
34+
35+
// After flush, check the data got written to the file
36+
content, err := os.ReadFile(path)
37+
require.NoError(t, err)
38+
require.Equal(t, "hello world\n", string(content))
39+
}
40+
41+
func TestWriteMultipleLines(t *testing.T) {
42+
dir := t.TempDir()
43+
path := filepath.Join(dir, "test.log")
44+
45+
lf := logfile.NewLogFile(path, 1024)
46+
defer require.NoError(t, lf.Close())
47+
48+
lines := []string{"line1", "line2", "line3"}
49+
for _, line := range lines {
50+
require.NoError(t, lf.Write([]byte(line)))
51+
}
52+
53+
require.NoError(t, lf.Flush())
54+
55+
content, err := os.ReadFile(path)
56+
require.NoError(t, err)
57+
require.Equal(t, "line1\nline2\nline3\n", string(content))
58+
}
59+
60+
func TestLogRotation(t *testing.T) {
61+
dir := t.TempDir()
62+
path := filepath.Join(dir, "test.log")
63+
expectedRotatedPath := path + ".1"
64+
65+
lf := logfile.NewLogFile(path, 50)
66+
defer require.NoError(t, lf.Close())
67+
68+
// We write 4 lines (indices 0-3)
69+
// expecting 0-2 to be written before rotation and 3 after rotation
70+
for i := range 4 {
71+
line := []byte("_20_character_line_" + strconv.Itoa(i))
72+
require.NoError(t, lf.Write(line))
73+
}
74+
75+
_, err := os.Stat(expectedRotatedPath)
76+
require.NoError(t, err)
77+
78+
// Verify the rotated file contains the written data
79+
rotatedContent, err := os.ReadFile(expectedRotatedPath)
80+
require.NoError(t, err)
81+
require.Len(t, rotatedContent, 63)
82+
require.Contains(t, string(rotatedContent), "_20_character_line_2")
83+
require.NoError(t, lf.Flush())
84+
85+
currentContent, err := os.ReadFile(path)
86+
require.NoError(t, err)
87+
require.Len(t, currentContent, 21)
88+
require.Contains(t, string(currentContent), "_20_character_line_3")
89+
}
90+
91+
func TestLogRotationMultipleTimes(t *testing.T) {
92+
dir := t.TempDir()
93+
path := filepath.Join(dir, "test.log")
94+
rotatedPath := path + ".1"
95+
96+
lf := logfile.NewLogFile(path, 40)
97+
defer require.NoError(t, lf.Close())
98+
99+
for i := range 10 {
100+
line := []byte("_20_character_line_" + strconv.Itoa(i))
101+
require.NoError(t, lf.Write(line))
102+
}
103+
104+
// Rotated file should exist and contain most recent events before the current
105+
rotatedContent, err := os.ReadFile(rotatedPath)
106+
require.NoError(t, err)
107+
require.Len(t, rotatedContent, 42)
108+
require.Contains(t, string(rotatedContent), "_20_character_line_8")
109+
require.Contains(t, string(rotatedContent), "_20_character_line_9")
110+
}
111+
112+
func TestFlushWithoutFile(t *testing.T) {
113+
dir := t.TempDir()
114+
path := filepath.Join(dir, "test.log")
115+
116+
lf := logfile.NewLogFile(path, 1024)
117+
defer require.NoError(t, lf.Close())
118+
119+
require.NoError(t, lf.Flush())
120+
}
121+
122+
func TestClose(t *testing.T) {
123+
dir := t.TempDir()
124+
path := filepath.Join(dir, "test.log")
125+
126+
lf := logfile.NewLogFile(path, 1024)
127+
128+
require.NoError(t, lf.Write([]byte("data")))
129+
130+
err := lf.Close()
131+
require.NoError(t, err)
132+
133+
// Expect Close to have flushed the buffer
134+
content, err := os.ReadFile(path)
135+
require.NoError(t, err)
136+
require.Contains(t, string(content), "data")
137+
}
138+
139+
func TestCloseWithoutWrite(t *testing.T) {
140+
dir := t.TempDir()
141+
path := filepath.Join(dir, "test.log")
142+
143+
lf := logfile.NewLogFile(path, 1024)
144+
require.NoError(t, lf.Close())
145+
}
146+
147+
func TestConcurrentWrites(t *testing.T) {
148+
dir := t.TempDir()
149+
path := filepath.Join(dir, "test.log")
150+
151+
// Do not rotate while the test runs
152+
lf := logfile.NewLogFile(path, 100000)
153+
defer require.NoError(t, lf.Close())
154+
155+
var wg sync.WaitGroup
156+
157+
numGoroutines := 10
158+
writesPerGoroutine := 100
159+
160+
for range numGoroutines {
161+
wg.Go(func() {
162+
for range writesPerGoroutine {
163+
require.NoError(t, lf.Write([]byte("goroutine write")))
164+
}
165+
})
166+
}
167+
168+
wg.Wait()
169+
170+
require.NoError(t, lf.Flush())
171+
172+
content, err := os.ReadFile(path)
173+
require.NoError(t, err)
174+
175+
// Count lines to verify all writes succeeded
176+
lineCount := bytes.Count(content, []byte("\n"))
177+
expectedLines := numGoroutines * writesPerGoroutine
178+
require.Equal(t, expectedLines, lineCount)
179+
}
180+
181+
func TestConcurrentWriteAndFlush(t *testing.T) {
182+
dir := t.TempDir()
183+
path := filepath.Join(dir, "test.log")
184+
185+
lf := logfile.NewLogFile(path, 10000)
186+
defer require.NoError(t, lf.Close())
187+
188+
var wg sync.WaitGroup
189+
190+
// Writer goroutines
191+
for range 5 {
192+
wg.Go(func() {
193+
for range 50 {
194+
require.NoError(t, lf.Write([]byte("concurrent data")))
195+
}
196+
})
197+
}
198+
199+
// Flusher goroutines
200+
for range 3 {
201+
wg.Go(func() {
202+
for range 10 {
203+
require.NoError(t, lf.Flush())
204+
}
205+
})
206+
}
207+
208+
wg.Wait()
209+
210+
require.NoError(t, lf.Flush())
211+
}
212+
213+
func TestConcurrentWritesWithRotation(t *testing.T) {
214+
dir := t.TempDir()
215+
path := filepath.Join(dir, "test.log")
216+
217+
// Small threshold to trigger rotation during concurrent writes
218+
lf := logfile.NewLogFile(path, 100)
219+
defer require.NoError(t, lf.Close())
220+
221+
var wg sync.WaitGroup
222+
223+
numGoroutines := 5
224+
writesPerGoroutine := 50
225+
226+
for range numGoroutines {
227+
wg.Go(func() {
228+
for range writesPerGoroutine {
229+
require.NoError(t, lf.Write([]byte("rotation test line")))
230+
}
231+
})
232+
}
233+
234+
wg.Wait()
235+
236+
_, err := os.Stat(path + ".1")
237+
require.NoError(t, err)
238+
}

0 commit comments

Comments
 (0)