New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ICMP errors generated by tracked flows treated as related traffic #2247
Merged
Merged
Changes from all commits
Commits
Show all changes
21 commits
Select commit
Hold shift + click to select a range
b63ddac
bpf: ct_make_key() to make keys CT easily
tomastigera ee18268
bpf: icmp related lookup for non-NATed traffic
tomastigera a684d88
bpf: icmp_skb_get_hdr() returns the icmp header
tomastigera 1c8652b
bpf: icmp related for NAT
tomastigera c49f205
bpf: ut for NAT related from the host
tomastigera 290081d
fv: allow creating inactive workloads
tomastigera 57cf27a
fv: allow to start inactive workload
tomastigera 910c0f6
fv/bpf: addWorkload returns the workload
tomastigera 7fc04a8
bpf/fv: fix the IP size in makeICMPError() and test ports
tomastigera b483d5e
fv/bpf: check icmp related outer IP header
tomastigera cbf1998
bpf: tunneling from host and csum of icmp related
tomastigera 8739e6f
fv: tcpdump fails test if it never listened
tomastigera 43c8738
fv: tcpdump for containers without tcpdump installed
tomastigera 5dc2d48
bpf: nodeports and icmp related
tomastigera 65314c5
bpf: update conntrack vars only when icmp related success
tomastigera e4759c1
bpf: SNAT of outer IP only if returning to outer client
tomastigera 14b7721
fv: icmp related workload-workload
tomastigera 5c6a24e
bpf: fix comment in handling ttl
tomastigera 7cbe463
fv: icmp related workload-workload through service
tomastigera aa28d96
bpf: icmp related retunign from tunnel is fwd approved
tomastigera 0364cf6
bpf: fixed comment in icmp related SNAT
tomastigera File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
#include <linux/in.h> | ||
#include "nat.h" | ||
#include "bpf.h" | ||
#include "icmp.h" | ||
|
||
// Connection tracking. | ||
|
||
|
@@ -30,6 +31,23 @@ struct calico_ct_key { | |
uint16_t port_a, port_b; // HBO | ||
}; | ||
|
||
#define src_lt_dest(ip_src, ip_dst, sport, dport) \ | ||
((ip_src) < (ip_dst)) || (((ip_src) == (ip_dst)) && (sport) < (dport)) | ||
|
||
#define __ct_make_key(proto, ipa, ipb, porta, portb) \ | ||
(struct calico_ct_key) { \ | ||
.protocol = proto, \ | ||
.addr_a = ipa, .port_a = porta, \ | ||
.addr_b = ipb, .port_b = portb, \ | ||
} | ||
|
||
#define ct_make_key(sltd, p, ipa, ipb, pta, ptb) ({ \ | ||
struct calico_ct_key k; \ | ||
k = sltd ? __ct_make_key(p, ipa, ipb, pta, ptb) : __ct_make_key(p, ipb, ipa, ptb, pta); \ | ||
dump_ct_key(&k); \ | ||
k; \ | ||
}) | ||
|
||
enum cali_ct_type { | ||
CALI_CT_TYPE_NORMAL = 0x00, /* Non-NATted entry. */ | ||
CALI_CT_TYPE_NAT_FWD = 0x01, /* Forward entry for a DNATted flow, keyed on orig src/dst. | ||
|
@@ -148,21 +166,8 @@ static CALI_BPF_INLINE int calico_ct_v4_create_tracking(struct ct_ctx *ctx, | |
*/ | ||
CALI_DEBUG("CT-ALL Asked to create entry but packet is marked as " | ||
"from another endpoint, doing lookup\n"); | ||
bool srcLTDest = (ip_src < ip_dst) || ((ip_src == ip_dst) && sport < dport); | ||
if (srcLTDest) { | ||
*k = (struct calico_ct_key) { | ||
.protocol = ctx->proto, | ||
.addr_a = ip_src, .port_a = sport, | ||
.addr_b = ip_dst, .port_b = dport, | ||
}; | ||
} else { | ||
*k = (struct calico_ct_key) { | ||
.protocol = ctx->proto, | ||
.addr_a = ip_dst, .port_a = dport, | ||
.addr_b = ip_src, .port_b = sport, | ||
}; | ||
} | ||
dump_ct_key(k); | ||
bool srcLTDest = src_lt_dest(ip_src, ip_dst, sport, dport); | ||
*k = ct_make_key(srcLTDest, ctx->proto, ip_src, ip_dst, sport, dport); | ||
struct calico_ct_value *ct_value = bpf_map_lookup_elem(&cali_v4_ct, k); | ||
if (!ct_value) { | ||
CALI_VERB("CT Packet marked as from workload but got a conntrack miss!\n"); | ||
|
@@ -327,6 +332,14 @@ enum calico_ct_result_type { | |
CALI_CT_INVALID, | ||
}; | ||
|
||
#define CALI_CT_RELATED (1 << 8) | ||
|
||
#define ct_result_rc(rc) ((rc) & 0xff) | ||
#define ct_result_flags(rc) ((rc) & ~0xff) | ||
#define ct_result_set_flag(val, flags) ((val) |= (flags)) | ||
|
||
#define ct_result_is_related(rc) ((rc) & CALI_CT_RELATED) | ||
|
||
struct calico_ct_result { | ||
__s16 rc; | ||
__u16 flags; | ||
|
@@ -335,36 +348,68 @@ struct calico_ct_result { | |
__be32 tun_ret_ip; | ||
}; | ||
|
||
/* skb_is_icmp_err_unpack fills in ctx, but only what needs to be changed. For instance, keeps the | ||
* cxt->skb or ctx->nat_tun_src. It returns true if the original packet is an icmp error and all | ||
* checks went well. | ||
*/ | ||
static CALI_BPF_INLINE bool skb_is_icmp_err_unpack(struct __sk_buff *skb, struct ct_ctx *ctx) | ||
{ | ||
struct iphdr *ip; | ||
struct icmphdr *icmp; | ||
|
||
if (!icmp_skb_get_hdr(skb, &icmp)) { | ||
CALI_DEBUG("CT-ICMP: failed to get inner IP\n"); | ||
return false; | ||
} | ||
|
||
if (!icmp_type_is_err(icmp->type)) { | ||
CALI_DEBUG("CT-ICMP: type %d not an error\n", icmp->type); | ||
return false; | ||
} | ||
|
||
ip = (struct iphdr *)(icmp + 1); /* skip to inner ip */ | ||
CALI_DEBUG("CT-ICMP: proto %d\n", ip->protocol); | ||
|
||
ctx->proto = ip->protocol; | ||
ctx->src = ip->saddr; | ||
ctx->dst = ip->daddr; | ||
|
||
switch (ip->protocol) { | ||
case IPPROTO_TCP: | ||
{ | ||
struct tcphdr *tcp = (struct tcphdr *)(ip + 1); | ||
ctx->sport = be16_to_host(tcp->source); | ||
ctx->dport = be16_to_host(tcp->dest); | ||
ctx->tcp = tcp; | ||
} | ||
break; | ||
case IPPROTO_UDP: | ||
{ | ||
struct udphdr *udp = (struct udphdr *)(ip + 1); | ||
ctx->sport = be16_to_host(udp->source); | ||
ctx->dport = be16_to_host(udp->dest); | ||
} | ||
break; | ||
}; | ||
|
||
return true; | ||
} | ||
|
||
static CALI_BPF_INLINE void calico_ct_v4_tcp_delete( | ||
__be32 ip_src, __be32 ip_dst, __u16 sport, __u16 dport) | ||
{ | ||
CALI_DEBUG("CT-TCP delete from %x:%d\n", be32_to_host(ip_src), sport); | ||
CALI_DEBUG("CT-TCP delete to %x:%d\n", be32_to_host(ip_dst), dport); | ||
|
||
bool srcLTDest = (ip_src < ip_dst) || ((ip_src == ip_dst) && sport < dport); | ||
struct calico_ct_key k; | ||
if (srcLTDest) { | ||
k = (struct calico_ct_key) { | ||
.protocol = IPPROTO_TCP, | ||
.addr_a = ip_src, .port_a = sport, | ||
.addr_b = ip_dst, .port_b = dport, | ||
}; | ||
} else { | ||
k = (struct calico_ct_key) { | ||
.protocol = IPPROTO_TCP, | ||
.addr_a = ip_dst, .port_a = dport, | ||
.addr_b = ip_src, .port_b = sport, | ||
}; | ||
} | ||
|
||
dump_ct_key(&k); | ||
bool srcLTDest = src_lt_dest(ip_src, ip_dst, sport, dport); | ||
struct calico_ct_key k = ct_make_key(srcLTDest, IPPROTO_TCP, ip_src, ip_dst, sport, dport); | ||
|
||
int rc = bpf_map_delete_elem(&cali_v4_ct, &k); | ||
CALI_DEBUG("CT-TCP delete result: %d\n", rc); | ||
} | ||
|
||
#define CALI_CT_LOG(level, fmt, ...) \ | ||
CALI_LOG_IF_FLAG(level, CALI_COMPILE_FLAGS, "CT-%d "fmt, proto, ## __VA_ARGS__) | ||
CALI_LOG_IF_FLAG(level, CALI_COMPILE_FLAGS, "CT-%d "fmt, proto_orig, ## __VA_ARGS__) | ||
#define CALI_CT_DEBUG(fmt, ...) \ | ||
CALI_CT_LOG(CALI_LOG_LEVEL_DEBUG, fmt, ## __VA_ARGS__) | ||
#define CALI_CT_VERB(fmt, ...) \ | ||
|
@@ -376,7 +421,7 @@ static CALI_BPF_INLINE void ct_tcp_entry_update(struct tcphdr *tcp_header, | |
struct calico_ct_leg *src_to_dst, | ||
struct calico_ct_leg *dst_to_src) | ||
{ | ||
__u8 proto = IPPROTO_TCP; /* used by logging */ | ||
__u8 proto_orig = IPPROTO_TCP; /* used by logging */ | ||
|
||
if (tcp_header->rst) { | ||
CALI_CT_DEBUG("RST seen, marking CT entry.\n"); | ||
|
@@ -425,15 +470,15 @@ static CALI_BPF_INLINE void ct_tcp_entry_update(struct tcphdr *tcp_header, | |
} | ||
} | ||
|
||
|
||
static CALI_BPF_INLINE struct calico_ct_result calico_ct_v4_lookup(struct ct_ctx *ctx) | ||
{ | ||
__u8 proto = ctx->proto; | ||
__u8 proto_orig = ctx->proto; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ctx->proto changes once we decode the icmp payload |
||
__be32 ip_src = ctx->src; | ||
__be32 ip_dst = ctx->dst; | ||
__u16 sport = ctx->sport; | ||
__u16 dport = ctx->dport; | ||
struct tcphdr *tcp_header = ctx->tcp; | ||
bool related = false; | ||
|
||
CALI_CT_DEBUG("lookup from %x:%d\n", be32_to_host(ip_src), sport); | ||
CALI_CT_DEBUG("lookup to %x:%d\n", be32_to_host(ip_dst), dport); | ||
|
@@ -454,27 +499,38 @@ static CALI_BPF_INLINE struct calico_ct_result calico_ct_v4_lookup(struct ct_ctx | |
goto out_lookup_fail; | ||
} | ||
|
||
bool srcLTDest = (ip_src < ip_dst) || ((ip_src == ip_dst) && sport < dport); | ||
struct calico_ct_key k; | ||
if (srcLTDest) { | ||
k = (struct calico_ct_key) { | ||
.protocol = proto, | ||
.addr_a = ip_src, .port_a = sport, | ||
.addr_b = ip_dst, .port_b = dport, | ||
}; | ||
} else { | ||
k = (struct calico_ct_key) { | ||
.protocol = proto, | ||
.addr_a = ip_dst, .port_a = dport, | ||
.addr_b = ip_src, .port_b = sport, | ||
}; | ||
} | ||
dump_ct_key(&k); | ||
bool srcLTDest = src_lt_dest(ip_src, ip_dst, sport, dport); | ||
struct calico_ct_key k = ct_make_key(srcLTDest, ctx->proto, ip_src, ip_dst, sport, dport); | ||
|
||
struct calico_ct_value *v = bpf_map_lookup_elem(&cali_v4_ct, &k); | ||
if (!v) { | ||
CALI_CT_DEBUG("Miss.\n"); | ||
goto out_lookup_fail; | ||
if (ctx->proto != IPPROTO_ICMP) { | ||
CALI_CT_DEBUG("Miss.\n"); | ||
goto out_lookup_fail; | ||
} | ||
if (!skb_is_icmp_err_unpack(ctx->skb, ctx)) { | ||
CALI_CT_DEBUG("unrelated icmp\n"); | ||
goto out_lookup_fail; | ||
} | ||
|
||
CALI_CT_DEBUG("related lookup from %x:%d\n", be32_to_host(ctx->src), ctx->sport); | ||
CALI_CT_DEBUG("related lookup to %x:%d\n", be32_to_host(ctx->dst), ctx->dport); | ||
|
||
srcLTDest = src_lt_dest(ctx->src, ctx->dst, ctx->sport, ctx->dport); | ||
k = ct_make_key(srcLTDest, ctx->proto, ctx->src, ctx->dst, ctx->sport, ctx->dport); | ||
v = bpf_map_lookup_elem(&cali_v4_ct, &k); | ||
if (!v) { | ||
CALI_CT_DEBUG("Miss on ICMP related\n"); | ||
goto out_lookup_fail; | ||
} | ||
|
||
ip_src = ctx->src; | ||
ip_dst = ctx->dst; | ||
sport = ctx->sport; | ||
dport = ctx->dport; | ||
tcp_header = ctx->tcp; | ||
|
||
related = true; | ||
} | ||
|
||
__u64 now = bpf_ktime_get_ns(); | ||
|
@@ -514,7 +570,7 @@ static CALI_BPF_INLINE struct calico_ct_result calico_ct_v4_lookup(struct ct_ctx | |
result.tun_ret_ip = tracking_v->tun_ip; | ||
CALI_CT_DEBUG("fwd tun_ip:%x\n", be32_to_host(tracking_v->tun_ip)); | ||
|
||
if (proto == IPPROTO_ICMP) { | ||
if (ctx->proto == IPPROTO_ICMP) { | ||
result.rc = CALI_CT_ESTABLISHED_DNAT; | ||
result.nat_ip = tracking_v->orig_ip; | ||
} else if (CALI_F_TO_HOST) { | ||
|
@@ -536,9 +592,13 @@ static CALI_BPF_INLINE struct calico_ct_result calico_ct_v4_lookup(struct ct_ctx | |
dst_to_src = &v->a_to_b; | ||
} | ||
|
||
if (proto == IPPROTO_ICMP) { | ||
result.tun_ret_ip = v->tun_ip; | ||
CALI_CT_DEBUG("tun_ip:%x\n", be32_to_host(v->tun_ip)); | ||
|
||
if (ctx->proto == IPPROTO_ICMP || (related && proto_orig == IPPROTO_ICMP)) { | ||
result.rc = CALI_CT_ESTABLISHED_SNAT; | ||
result.nat_ip = v->orig_ip; | ||
result.nat_port = v->orig_port; | ||
break; | ||
} | ||
|
||
|
@@ -568,14 +628,10 @@ static CALI_BPF_INLINE struct calico_ct_result calico_ct_v4_lookup(struct ct_ctx | |
CALI_CT_DEBUG("Hit! NAT REV entry but not connection opener: ESTABLISHED.\n"); | ||
result.rc = CALI_CT_ESTABLISHED; | ||
} | ||
result.tun_ret_ip = v->tun_ip; | ||
CALI_CT_DEBUG("tun_ip:%x\n", be32_to_host(v->tun_ip)); | ||
break; | ||
|
||
case CALI_CT_TYPE_NORMAL: | ||
if (v->type == CALI_CT_TYPE_NORMAL) { | ||
CALI_CT_DEBUG("Hit! NORMAL entry.\n"); | ||
} | ||
CALI_CT_DEBUG("Hit! NORMAL entry.\n"); | ||
CALI_CT_VERB("Created: %llu.\n", v->created); | ||
if (tcp_header) { | ||
CALI_CT_VERB("Last seen: %llu.\n", v->last_seen); | ||
|
@@ -621,6 +677,19 @@ static CALI_BPF_INLINE struct calico_ct_result calico_ct_v4_lookup(struct ct_ctx | |
src_to_dst->whitelisted && | ||
!result.tun_ret_ip; | ||
|
||
if (related) { | ||
if (proto_orig == IPPROTO_ICMP) { | ||
/* flip src/dst as ICMP related carries the original ip/l4 headers in | ||
* opposite direction - it is a reaction on the original packet. | ||
*/ | ||
struct calico_ct_leg *tmp; | ||
|
||
tmp = src_to_dst; | ||
src_to_dst = dst_to_src; | ||
dst_to_src = tmp; | ||
} | ||
} | ||
|
||
if (CALI_F_TO_HOST && !ctx->nat_tun_src) { | ||
/* Source of the packet is the endpoint, so check the src whitelist. */ | ||
if (src_to_dst->whitelisted) { | ||
|
@@ -652,7 +721,7 @@ static CALI_BPF_INLINE struct calico_ct_result calico_ct_v4_lookup(struct ct_ctx | |
} | ||
} | ||
|
||
if (tcp_header) { | ||
if (tcp_header && !related) { | ||
if (ret_from_tun) { | ||
/* we returned from tunnel, we are after SNAT, unlike | ||
* with NAT on workload, we hit FWD entry in both | ||
|
@@ -666,7 +735,13 @@ static CALI_BPF_INLINE struct calico_ct_result calico_ct_v4_lookup(struct ct_ctx | |
ct_tcp_entry_update(tcp_header, src_to_dst, dst_to_src); | ||
} | ||
|
||
CALI_CT_DEBUG("result: %d.\n", result.rc); | ||
CALI_CT_DEBUG("result: %d\n", result.rc); | ||
|
||
if (related) { | ||
ct_result_set_flag(result.rc, CALI_CT_RELATED); | ||
CALI_CT_DEBUG("result: related\n"); | ||
} | ||
|
||
return result; | ||
|
||
out_lookup_fail: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i decided to pass the flag as part of the code (a) the result struct is part of the state and I did not want to change it and (b) it effectively creates a whole set of return values, which was another option, but they would only differ in related or not