forked from kubernetes/kubernetes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
conntrack.go
145 lines (123 loc) · 4.48 KB
/
conntrack.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package app
import (
"errors"
"io/ioutil"
"strconv"
"strings"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/util/mount"
"k8s.io/kubernetes/pkg/util/sysctl"
)
// Conntracker is an interface to the global sysctl. Descriptions of the various
// sysctl fields can be found here:
//
// https://www.kernel.org/doc/Documentation/networking/nf_conntrack-sysctl.txt
type Conntracker interface {
// SetMax adjusts nf_conntrack_max.
SetMax(max int) error
// SetTCPEstablishedTimeout adjusts nf_conntrack_tcp_timeout_established.
SetTCPEstablishedTimeout(seconds int) error
// SetTCPCloseWaitTimeout nf_conntrack_tcp_timeout_close_wait.
SetTCPCloseWaitTimeout(seconds int) error
}
type realConntracker struct{}
var readOnlySysFSError = errors.New("readOnlySysFS")
func (rct realConntracker) SetMax(max int) error {
if err := rct.setIntSysCtl("nf_conntrack_max", max); err != nil {
return err
}
glog.Infof("Setting nf_conntrack_max to %d", max)
// Linux does not support writing to /sys/module/nf_conntrack/parameters/hashsize
// when the writer process is not in the initial network namespace
// (https://github.com/torvalds/linux/blob/v4.10/net/netfilter/nf_conntrack_core.c#L1795-L1796).
// Usually that's fine. But in some configurations such as with github.com/kinvolk/kubeadm-nspawn,
// kube-proxy is in another netns.
// Therefore, check if writing in hashsize is necessary and skip the writing if not.
hashsize, err := readIntStringFile("/sys/module/nf_conntrack/parameters/hashsize")
if err != nil {
return err
}
if hashsize >= (max / 4) {
return nil
}
// sysfs is expected to be mounted as 'rw'. However, it may be
// unexpectedly mounted as 'ro' by docker because of a known docker
// issue (https://github.com/docker/docker/issues/24000). Setting
// conntrack will fail when sysfs is readonly. When that happens, we
// don't set conntrack hashsize and return a special error
// readOnlySysFSError here. The caller should deal with
// readOnlySysFSError differently.
writable, err := isSysFSWritable()
if err != nil {
return err
}
if !writable {
return readOnlySysFSError
}
// TODO: generify this and sysctl to a new sysfs.WriteInt()
glog.Infof("Setting conntrack hashsize to %d", max/4)
return writeIntStringFile("/sys/module/nf_conntrack/parameters/hashsize", max/4)
}
func (rct realConntracker) SetTCPEstablishedTimeout(seconds int) error {
return rct.setIntSysCtl("nf_conntrack_tcp_timeout_established", seconds)
}
func (rct realConntracker) SetTCPCloseWaitTimeout(seconds int) error {
return rct.setIntSysCtl("nf_conntrack_tcp_timeout_close_wait", seconds)
}
func (realConntracker) setIntSysCtl(name string, value int) error {
entry := "net/netfilter/" + name
sys := sysctl.New()
if val, _ := sys.GetSysctl(entry); val != value {
glog.Infof("Set sysctl '%v' to %v", entry, value)
if err := sys.SetSysctl(entry, value); err != nil {
return err
}
}
return nil
}
// isSysFSWritable checks /proc/mounts to see whether sysfs is 'rw' or not.
func isSysFSWritable() (bool, error) {
const permWritable = "rw"
const sysfsDevice = "sysfs"
m := mount.New("" /* default mount path */)
mountPoints, err := m.List()
if err != nil {
glog.Errorf("failed to list mount points: %v", err)
return false, err
}
for _, mountPoint := range mountPoints {
if mountPoint.Type != sysfsDevice {
continue
}
// Check whether sysfs is 'rw'
if len(mountPoint.Opts) > 0 && mountPoint.Opts[0] == permWritable {
return true, nil
}
glog.Errorf("sysfs is not writable: %+v (mount options are %v)",
mountPoint, mountPoint.Opts)
return false, readOnlySysFSError
}
return false, errors.New("No sysfs mounted")
}
func readIntStringFile(filename string) (int, error) {
b, err := ioutil.ReadFile(filename)
if err != nil {
return -1, err
}
return strconv.Atoi(strings.TrimSpace(string(b)))
}
func writeIntStringFile(filename string, value int) error {
return ioutil.WriteFile(filename, []byte(strconv.Itoa(value)), 0640)
}