In [258]:
import pandas as pd
import sqlalchemy as sa

In [259]:
conn = sa.create_engine("postgresql://user:pwd@localhost:5432/db")

In [260]:
temp_table_q = """
    CREATE TEMP TABLE temp_multiaddresses
    AS
    SELECT * from multi_addresses
"""
res = conn.execute(temp_table_q)


Insert fields to indicate the type of given multiaddress: 
- ip4_tcp
- ip4_quic
- ip6_tcp
- ip6_quic

In [261]:
alter_table_addcols_q = """
ALTER TABLE temp_multiaddresses ADD COLUMN is_ip4_tcp int DEFAULT 0;
ALTER TABLE temp_multiaddresses ADD COLUMN is_ip4_quic int DEFAULT 0;
ALTER TABLE temp_multiaddresses ADD COLUMN is_ip6_tcp int DEFAULT 0;
ALTER TABLE temp_multiaddresses ADD COLUMN is_ip6_quic int DEFAULT 0;
"""
res = conn.execute(alter_table_addcols_q)


In [262]:
update_ip4_tcp_q = """
    UPDATE temp_multiaddresses SET
    is_ip4_tcp = CASE
        WHEN family(addr)=4 AND maddr LIKE '%%/tcp/%%' THEN 
            1
        ELSE 
            0
    END;
"""
res = conn.execute(update_ip4_tcp_q)

In [263]:
update_ip6_tcp_q = """
    UPDATE temp_multiaddresses SET
    is_ip6_tcp = CASE
        WHEN family(addr)=6 AND maddr LIKE '%%/tcp/%%' THEN 
            1
        ELSE 
            0
    END;
"""
res = conn.execute(update_ip6_tcp_q)

In [264]:
update_ip4_quic_q = """
UPDATE temp_multiaddresses SET
    is_ip4_quic = CASE
        WHEN family(addr)=4 AND maddr LIKE '%%/quic%%' THEN
            1
        ELSE
            0
    END;
"""
res = conn.execute(update_ip4_quic_q)

In [265]:
update_ip6_quic_q = """
UPDATE temp_multiaddresses SET
    is_ip6_quic = CASE
        WHEN family(addr)=6 AND maddr LIKE '%%/quic%%' THEN
            1
        ELSE
            0
    END;
"""
res = conn.execute(update_ip6_quic_q)

We are only interested in holepunch results where both the sides had an address relevant for a given protocol filter

if we applied a filter of \[ip6, quic\] we'd like to only look at results where both the sides had at least one ip6_quic address.

In the query: 
The first subquery calculates the number of address types(count ip6_quic etc) for the local side
The second subquery calculates the number of address types for the remote side

In [288]:
final_results_q = """
SELECT 
    hpr.outcome, 
    hpr.protocol_filters, 
    hpr.local_ip4_tcp_cnt,
    hpr.local_ip6_tcp_cnt,
    hpr.local_ip4_quic_cnt,
    hpr.local_ip6_quic_cnt,
    remote_addrs.ip4_tcp_cnt as remote_ip4_tcp_cnt,
    remote_addrs.ip6_tcp_cnt as remote_ip6_tcp_cnt,
    remote_addrs.ip4_quic_cnt as remote_ip4_quic_cnt,
    remote_addrs.ip6_quic_cnt as remote_ip6_quic_cnt,
    COUNT(DISTINCT hpr.id) as count
    FROM (
        (SELECT
            hpr.id, 
            hpr.outcome,
            hpr.protocol_filters,
            sum(ma.is_ip4_tcp) as local_ip4_tcp_cnt, 
            sum(ma.is_ip6_tcp) as local_ip6_tcp_cnt, 
            sum(ma.is_ip4_quic) as local_ip4_quic_cnt,
            sum(ma.is_ip6_quic) as local_ip6_quic_cnt
            FROM
                hole_punch_results hpr
                INNER JOIN multi_addresses_sets mas ON mas.id=hpr.listen_multi_addresses_set_id
                CROSS JOIN unnest(mas.multi_addresses_ids) lmids(mid)
                INNER JOIN temp_multiaddresses ma ON ma.id=lmids.mid
            WHERE 
                NOT ma.is_relay 
                AND ma.is_public
                AND NOT EXISTS (SELECT FROM port_mappings pm WHERE pm.hole_punch_result_id = hpr.id)
            GROUP BY hpr.id, hpr.outcome, hpr.protocol_filters
        ) hpr
        INNER JOIN (
             SELECT 
                result_id,
                sum(is_ip4_tcp) as ip4_tcp_cnt,
                sum(is_ip6_tcp) as ip6_tcp_cnt,
                sum(is_ip4_quic) as ip4_quic_cnt,
                sum(is_ip6_quic) as ip6_quic_cnt
            FROM (
                SELECT DISTINCT
                        hpa.hole_punch_result_id as result_id, 
                         ma.id as aid,
                         ma.is_ip4_tcp as is_ip4_tcp,
                         ma.is_ip6_tcp as is_ip6_tcp,
                         ma.is_ip4_quic as is_ip4_quic,
                         ma.is_ip6_quic as is_ip6_quic
                FROM hole_punch_attempt hpa
                INNER JOIN hole_punch_attempt_x_multi_addresses hpaxma on hpa.id = hpaxma.hole_punch_attempt
                INNER JOIN temp_multiaddresses ma on hpaxma.multi_address_id = ma.id
                WHERE ma.is_public AND NOT ma.is_relay
                ) remote
            GROUP BY remote.result_id
        ) remote_addrs ON remote_addrs.result_id=hpr.id
    )
    GROUP BY 
        outcome, 
        protocol_filters, 
        local_ip4_tcp_cnt,
        local_ip6_tcp_cnt,
        local_ip4_quic_cnt,
        local_ip6_quic_cnt,
        remote_ip4_tcp_cnt,
        remote_ip6_tcp_cnt,
        remote_ip4_quic_cnt,
        remote_ip6_quic_cnt
"""
df = pd.read_sql(final_results_q, con=conn)

In [289]:
dff = df

add a string protocol filter column for easy grouping

In [306]:
def pf_to_string(x):
    if not x['protocol_filters']:
        return "No Filter"
    else:
        ip_filter =  4 if x['protocol_filters'][0] == 4 else 6
        transport_filter = "tcp" if x['protocol_filters'][1] == 6 else "quic"
        return f"ip:{ip_filter}|transport:{transport_filter}"
dff['pf'] = dff.apply(lambda x: pf_to_string(x), axis=1)


There is some issue with multiple ip4 addresses on remote side but this behaviour is not observed with ip6 cases. So whatever is causing the ip6 errors is something else. 

In [300]:
dff.query('pf == "ip:4|transport:tcp" & local_ip4_tcp_cnt==1 & remote_ip4_tcp_cnt>1').groupby(['pf', 'outcome'])['count'].sum()


pf                  outcome            
ip:4|transport:tcp  CONNECTION_REVERSED      289
                    FAILED                 49471
                    NO_STREAM                930
                    SUCCESS                34651
Name: count, dtype: int64

In [301]:
dff.query('pf == "ip:4|transport:tcp" & local_ip4_tcp_cnt==1 & remote_ip4_tcp_cnt==1').groupby(['pf', 'outcome'])['count'].sum()


pf                  outcome            
ip:4|transport:tcp  CONNECTION_REVERSED      1749
                    FAILED                  57277
                    NO_STREAM                1673
                    SUCCESS                277045
Name: count, dtype: int64

In [302]:
dff.query('pf == "ip:4|transport:quic" & local_ip4_quic_cnt==1 & remote_ip4_quic_cnt>1').groupby(['pf', 'outcome'])['count'].sum()


pf                   outcome            
ip:4|transport:quic  CONNECTION_REVERSED     1137
                     FAILED                 61653
                     NO_STREAM               1399
                     SUCCESS                66183
Name: count, dtype: int64

In [303]:
dff.query('pf == "ip:4|transport:quic" & local_ip4_quic_cnt==1 & remote_ip4_quic_cnt==1').groupby(['pf', 'outcome'])['count'].sum()


pf                   outcome            
ip:4|transport:quic  CONNECTION_REVERSED      1571
                     FAILED                  53468
                     NO_STREAM                1270
                     SUCCESS                192483
Name: count, dtype: int64

For ip6 tcp most of the errors happen when both sides had 1 address

In [304]:
dff.query('pf == "ip:6|transport:tcp" & local_ip6_tcp_cnt==1 & remote_ip6_tcp_cnt==1').groupby(['pf', 'outcome'])['count'].sum()


pf                  outcome            
ip:6|transport:tcp  CONNECTION_REVERSED      364
                    FAILED                 20830
                    NO_STREAM                333
                    SUCCESS                 8732
Name: count, dtype: int64

In [305]:
dff.query('pf == "ip:6|transport:tcp" & local_ip6_tcp_cnt==1 & remote_ip6_tcp_cnt>1').groupby(['pf', 'outcome'])['count'].sum()

pf                  outcome            
ip:6|transport:tcp  CONNECTION_REVERSED     24
                    FAILED                 580
                    NO_STREAM                9
                    SUCCESS                751
Name: count, dtype: int64