Skip to content

Commit

Permalink
bpf: SNAT of outer IP only if retuning to outer client
Browse files Browse the repository at this point in the history
When we generate an ICMP in a workload or at a host in response to
traffic that originated through a NP tunnel ammend the outer source IP
as if the reponse was from the original node as all the rest is internal
to the cluster.
  • Loading branch information
tomastigera committed Apr 6, 2020
1 parent a561c6a commit 82cf3e2
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 15 deletions.
2 changes: 2 additions & 0 deletions bpf-gpl/skb.h
Expand Up @@ -37,6 +37,8 @@

#define skb_len_dir_access(skb) skb_tail_len(skb, skb_start_ptr(skb))

#define skb_seen(skb) ((skb)->mark & CALI_SKB_MARK_SEEN)

#define IPV4_UDP_SIZE (sizeof(struct iphdr) + sizeof(struct udphdr))
#define ETH_IPV4_UDP_SIZE (sizeof(struct ethhdr) + IPV4_UDP_SIZE)

Expand Down
31 changes: 27 additions & 4 deletions bpf-gpl/tc.c
Expand Up @@ -691,6 +691,7 @@ static CALI_BPF_INLINE struct fwd calico_tc_skb_accepted(struct __sk_buff *skb,
bool ct_related = ct_result_is_related(state->ct_result.rc);
uint32_t seen_mark;
size_t l4_csum_off = 0, l3_csum_off;
uint32_t fib_flags = 0;

CALI_DEBUG("src=%x dst=%x\n", be32_to_host(state->ip_src), be32_to_host(state->ip_dst));
CALI_DEBUG("post_nat=%x:%d\n", be32_to_host(state->post_nat_ip_dst), state->post_nat_dport);
Expand Down Expand Up @@ -733,16 +734,31 @@ static CALI_BPF_INLINE struct fwd calico_tc_skb_accepted(struct __sk_buff *skb,
if (ct_related) {
if (ip_header->protocol == IPPROTO_ICMP) {
struct icmphdr *icmp;

/* if SNAT, fix the outer header IP first */
if (ct_rc == CALI_CT_ESTABLISHED_SNAT) {
bool outer_ip_snat;

/* if we do SNAT ... */
outer_ip_snat = ct_rc == CALI_CT_ESTABLISHED_SNAT;
/* ... there is a return path to the tunnel ... */
outer_ip_snat = outer_ip_snat && state->ct_result.tun_ret_ip;
/* ... and should do encap and it is not DSR or it is
* it is leaving host and either DSR from WEP or
* originated at host ... */
outer_ip_snat = outer_ip_snat &&
((dnat_return_should_encap() && !CALI_F_DSR) ||
(CALI_F_TO_HEP &&
((CALI_F_DSR && skb_seen(skb)) || !skb_seen(skb))));

/* ... then fix the outer header IP first */
if (outer_ip_snat) {
ip_header->saddr = state->ct_result.nat_ip;
int res = bpf_l3_csum_replace(skb, l3_csum_off,
state->ip_src, state->ct_result.nat_ip, 4);
if (res) {
reason = CALI_REASON_CSUM_FAIL;
goto deny;
}
CALI_DEBUG("ICMP related: outer IP SNAT to %x\n",
be32_to_host(state->ct_result.nat_ip));
}

if (!icmp_skb_get_hdr(skb, &icmp)) {
Expand All @@ -756,6 +772,14 @@ static CALI_BPF_INLINE struct fwd calico_tc_skb_accepted(struct __sk_buff *skb,
/* flip the direction, we need to reverse the original packet */
switch (ct_rc) {
case CALI_CT_ESTABLISHED_SNAT:
/* handle the DSR case, see CALI_CT_ESTABLISHED_SNAT where nat is done */
if (dnat_return_should_encap() && state->ct_result.tun_ret_ip) {
if (CALI_F_DSR) {
/* SNAT will be done after routing, when leaving HEP */
CALI_DEBUG("DSR enabled, skipping SNAT + encap\n");
goto allow;
}
}
ct_rc = CALI_CT_ESTABLISHED_DNAT;
break;
case CALI_CT_ESTABLISHED_DNAT:
Expand All @@ -772,7 +796,6 @@ static CALI_BPF_INLINE struct fwd calico_tc_skb_accepted(struct __sk_buff *skb,

int res = 0;
bool encap_needed = false;
uint32_t fib_flags = 0;

if (state->ip_proto == IPPROTO_ICMP && ct_related) {
/* do not fix up embedded L4 checksum for related ICMP */
Expand Down
2 changes: 1 addition & 1 deletion bpf/ut/icmp_related_test.go
Expand Up @@ -136,7 +136,7 @@ func TestICMPRelatedNATPodPod(t *testing.T) {
// we have a normal ct record, it is related, must be allowed
Expect(res.Retval).To(Equal(resTC_ACT_UNSPEC))

checkICMP(res.dataOut, ipv4.DstIP, ipv4.SrcIP, ipv4.SrcIP, ipv4.DstIP, ipv4.Protocol,
checkICMP(res.dataOut, hostIP, ipv4.SrcIP, ipv4.SrcIP, ipv4.DstIP, ipv4.Protocol,
uint16(udp.SrcPort), uint16(udp.DstPort))
})
}
Expand Down
22 changes: 12 additions & 10 deletions fv/bpf_test.go
Expand Up @@ -1159,7 +1159,7 @@ func describeBPFTests(opts ...bpfTestOpt) bool {
})
}

Context("with icmp blocked", func() {
Context("with icmp blocked from workloads, external client", func() {
var (
testSvc *v1.Service
testSvcNamespace string
Expand Down Expand Up @@ -1224,7 +1224,7 @@ func describeBPFTests(opts ...bpfTestOpt) bool {
tgtWorkload = deadWorkload
})

It("should get host unreachable from nodepot via node1->node0 fwd", func() {
It("should get host unreachable from nodeport via node1->node0 fwd", func() {
if testOpts.connTimeEnabled {
Skip("FIXME externalClient also does conntime balancing")
}
Expand All @@ -1234,16 +1234,17 @@ func describeBPFTests(opts ...bpfTestOpt) bool {

tcpdump := containers.AttachTCPDump(externalClient, "any")
tcpdump.SetLogEnabled(true)
tcpdump.AddMatcher("ICMP", regexp.MustCompile(
fmt.Sprintf("IP %s > %s: ICMP host %s unreachable",
felixes[1].IP, externalClient.IP, felixes[1].IP)))
matcher := fmt.Sprintf("IP %s > %s: ICMP host %s unreachable",
felixes[1].IP, externalClient.IP, felixes[1].IP)
tcpdump.AddMatcher("ICMP", regexp.MustCompile(matcher))
tcpdump.Start(testOpts.protocol, "port", strconv.Itoa(int(npPort)), "or", "icmp")
defer tcpdump.Stop()

cc.ExpectNone(externalClient, TargetIP(felixes[1].IP), npPort)
cc.CheckConnectivity()

Eventually(func() int { return tcpdump.MatchCount("ICMP") }).Should(BeNumerically(">", 0))
Eventually(func() int { return tcpdump.MatchCount("ICMP") }).
Should(BeNumerically(">", 0), matcher)
})
})

Expand Down Expand Up @@ -1272,15 +1273,16 @@ func describeBPFTests(opts ...bpfTestOpt) bool {

tcpdump := containers.AttachTCPDump(externalClient, "any")
tcpdump.SetLogEnabled(true)
tcpdump.AddMatcher("ICMP", regexp.MustCompile(
fmt.Sprintf("IP %s > %s: ICMP %s udp port %d unreachable",
felixes[1].IP, externalClient.IP, felixes[1].IP, npPort)))
matcher := fmt.Sprintf("IP %s > %s: ICMP %s udp port %d unreachable",
felixes[1].IP, externalClient.IP, felixes[1].IP, npPort)
tcpdump.AddMatcher("ICMP", regexp.MustCompile(matcher))
tcpdump.Start(testOpts.protocol, "port", strconv.Itoa(tgtPort), "or", "icmp")
defer tcpdump.Stop()

cc.ExpectNone(externalClient, TargetIP(felixes[1].IP), npPort)
cc.CheckConnectivity()
Eventually(func() int { return tcpdump.MatchCount("ICMP") }).Should(BeNumerically(">", 0))
Eventually(func() int { return tcpdump.MatchCount("ICMP") }).
Should(BeNumerically(">", 0), matcher)
})
})
})
Expand Down

0 comments on commit 82cf3e2

Please sign in to comment.