## ECN Spider data input

First, load prerequisites and configure things

In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import bz2
from ipaddress import ip_address

# Change this to point at the raw data files

DATAPATH="."
TABLESDIR="."


Now read in data from ecnspider from each vantage point and massage it into a useful format

In [2]:
def load_es_df(filename, vp, trial):
    # raw dataframe
    df = pd.read_csv(filename, names=["time", "rank", "site", "ip",
                                      "ecn0rv","ecn0sp","ecn1rv","ecn1sp",
                                      "ecn0http","ecn1http"],
                     usecols=[0,1,2,3,4,5,6,7,16,19])

    # cast IP address to string
    df['ip'] = df['ip'].astype(np.str)

    # drop all rows with garbage addresses
    df = df[df['ip'].apply(lambda x: not x.startswith("[::"))]
    df = df[df['ip'].apply(lambda x: not x.startswith("[fe80:"))]
    df = df[df['ip'].apply(lambda x: not x.startswith("[fc00:"))]
    df = df[df['ip'].apply(lambda x: not x.startswith("[64:ff9b:"))]
    df = df[df['ip'].apply(lambda x: not x.startswith("0."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("10."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("127."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("169.254."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("172.16."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("172.17."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("172.18."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("172.19."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("172.20."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("172.21."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("172.22."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("172.23."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("172.24."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("172.25."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("172.26."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("172.27."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("172.28."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("172.29."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("172.30."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("172.31."))]
    df = df[df['ip'].apply(lambda x: not x.startswith("192.168."))]
    
    # tag IPv6 addresses
    df["ip6"] = df['ip'].apply(lambda x: x.startswith("["))

    # cast timestamp to datetime
    df['time'] = pd.to_datetime(df['time'] * 1e9)

    # rank is an integer
    df['rank'] = df['rank'].astype(np.uint32)

    # sitr is a string
    df['site'] = df['site'].astype(np.str)

    # cast ports
    df["ecn0sp"] = df["ecn0sp"].astype(np.uint16)
    df["ecn1sp"] = df["ecn1sp"].astype(np.uint16)

    # categorize errors
    df["ecn0rv"] = pd.Categorical(df['ecn0rv'].fillna("Success"))
    df["ecn1rv"] = pd.Categorical(df['ecn1rv'].fillna("Success"))
    df["ecn0ok"] = (df['ecn0rv'] == "Success")
    df["ecn1ok"] = (df['ecn1rv'] == "Success")

    # cast HTTP status
    df["ecn0http"] = df["ecn0http"].fillna(0).astype(np.uint16)
    df["ecn1http"] = df["ecn1http"].fillna(0).astype(np.uint16)

    # annotate mismatch between error states
    # (the error codes are less interesting; the fact that the status is different moreso)
    df["ecndep"] = (df["ecn0ok"] != df["ecn1ok"])

    # annotate with vp and trial, in case we want pivot/select on these later
    df["vp"] = vp
    df["trial"] = trial
    
    # and now build the index
    df.index = pd.Index(df['ip'], name="ip")
    del(df['ip'])
    
    return df

es_df = {}
%time es_df["ams-0"] = load_es_df(DATAPATH+"/results0.csv", "ams", 0)

CPU times: user 10.8 s, sys: 540 ms, total: 11.4 s
Wall time: 11.4 s


## Table Merging

Now pull all the ECN Spider data from all runs into a single dataframe on which we can do subsequent column comparisons to look at temporal and spatial dependency of ECN connectivity dependency.

In [3]:
def index_intersect(dfs):
    """Return the intersection of the indices of passed-in dataframes"""
    idx = dfs[0].index
    for i in range(1, len(dfs)):
        idx = idx & dfs[i].index
    return pd.Index(idx.unique(), name=dfs[0].index.name)

def connmatrix(es_dfs, vps, trials):
    # use only items in every dataframe
    idx = index_intersect(es_dfs)
    
    # make an initial dataframe from the first
    e0col = "-".join([str(vps[0]),str(trials[0]),"e0"])
    e0cols = [e0col]
    e1col = "-".join([str(vps[0]),str(trials[0]),"e1"])
    e1cols = [e1col]
#     eqcol = "-".join([str(vps[0]),str(trials[0]),"eq"])
#     eqcols = [eqcol]
#     depcol = "-".join([str(vps[0]),str(trials[0]),"dep"])
#     depcols = [depcol]
#     oddcol = "-".join([str(vps[0]),str(trials[0]),"odd"])
#     oddcols = [oddcol]
    cat_df = es_dfs[0].loc[idx, ["rank", "site", "ip6", "ecn0ok", "ecn1ok"]]
    cat_df.columns = ["rank", "site", "ip6", e0col, e1col]
#     cat_df[eqcol] = (cat_df[e0col] & cat_df[e1col]) | (~cat_df[e0col] & ~cat_df[e1col])
#     cat_df[depcol] = cat_df[e0col] & ~cat_df[e1col]
#     cat_df[oddcol] = ~cat_df[e0col] & cat_df[e1col]

    # now add columns to the catdf
    for i in range(1, len(es_dfs)):
        e0col = "-".join([str(vps[i]),str(trials[i]),"e0"])
        e0cols += [e0col]
        e1col = "-".join([str(vps[i]),str(trials[i]),"e1"])
        e1cols += [e1col]
#         eqcol = "-".join([str(vps[i]),str(trials[i]),"eq"])
#         eqcols += [eqcol]
#         depcol = "-".join([str(vps[i]),str(trials[i]),"dep"])
#         depcols += [depcol]       
#         oddcol = "-".join([str(vps[i]),str(trials[i]),"odd"])
#         oddcols += [oddcol]        
        cat_df[e0col] = es_dfs[i].loc[idx]["ecn0ok"]
        cat_df[e1col] = es_dfs[i].loc[idx]["ecn1ok"]
#         cat_df[eqcol] = (cat_df[e0col] & cat_df[e1col]) | (~cat_df[e0col] & ~cat_df[e1col])
#         cat_df[depcol] = cat_df[e0col] & ~cat_df[e1col]
#         cat_df[oddcol] = ~cat_df[e0col] & cat_df[e1col]

#     # add a few columns summarizing all
#     # all eq = no evidence of ECN dependency
#     cat_df["all-eq"] = cat_df.loc[:,eqcols].all(axis=1)
#     # count of equal trials
#     cat_df["eq-sum"] = cat_df.loc[:,eqcols].sum(axis=1)
#     # count of e0 connections
#     cat_df["e0-sum"] = cat_df.loc[:,e0cols].sum(axis=1)
#     # count of e1 connections
#     cat_df["e1-sum"] = cat_df.loc[:,e1cols].sum(axis=1)
#     # count of odd connections
#     cat_df["dep-sum"] = cat_df.loc[:,depcols].sum(axis=1)
#     # count of odd connections
#     cat_df["odd-sum"] = cat_df.loc[:,oddcols].sum(axis=1)
#     # all conn = no connection failure at all
#     cat_df["all-conn"] = cat_df.loc[:,e0cols+e1cols].all(axis=1)
#     # no conn = permanent connection failure
#     cat_df["no-conn"] = ~cat_df.loc[:,e0cols+e1cols].any(axis=1)

    return cat_df

cc_df = connmatrix([es_df["ams-0"]], ["ams"], [0])

## QoF flow data input

Now we load IPFIX flow data generated by QoF. We'll link this to the ECN spider connectivity data by destination address.

Because python-ipfix is relatively slow, we can either reload from a pre-loaded HDF5 table or direct from raw IPFIX; both possibilities are below.

In [4]:
import ipfix
import panfix

# configure IPFIX information model
ipfix.ie.use_iana_default()
ipfix.ie.use_5103_default()
ipfix.ie.use_specfile("qof.iespec")

# Define flags
S = panfix.TCP_SYN
R = panfix.TCP_RST
SA = panfix.TCP_SYN | panfix.TCP_ACK
SEW = (panfix.TCP_SYN | panfix.TCP_ECE | panfix.TCP_CWR)
SAE = (panfix.TCP_SYN | panfix.TCP_ECE | panfix.TCP_ACK)
SAEW = (panfix.TCP_SYN | panfix.TCP_ECE | panfix.TCP_ACK | panfix.TCP_CWR)
QECT = (panfix.QOF_ECT0 | panfix.QOF_ECT1)
QECT0 = panfix.QOF_ECT0
QECT1 = panfix.QOF_ECT1
QCE = panfix.QOF_CE

# iain's last syn qof characteristics flags
QSYNECT0 = 0x0100
QSYNECT1 = 0x0200
QSYNCE   = 0x0400

def load_qof_df(filename, ipv6_mode=False, open_fn=open, spider_idx=None, count=None):
    # select destination address IE
    if ipv6_mode:
        dip_ie = "destinationIPv6Address"
    else:
        dip_ie = "destinationIPv4Address"
    
    # raw dataframe
    df = panfix.dataframe_from_ipfix(filename, open_fn=open_fn, count=count,
               ienames=(  "flowStartMilliseconds",
                          "octetDeltaCount",
                          "reverseOctetDeltaCount",
                          "transportOctetDeltaCount",
                          "reverseTransportOctetDeltaCount",
                          "tcpSequenceCount",
                          "reverseTcpSequenceCount",
                          dip_ie,
                          "sourceTransportPort",
                          "destinationTransportPort",
                          "initialTCPFlags",
                          "reverseInitialTCPFlags",
                          "unionTCPFlags",
                          "reverseUnionTCPFlags",
                          "lastSynTcpFlags",
                          "reverseLastSynTcpFlags",
                          "tcpSynTotalCount",
                          "reverseTcpSynTotalCount",
                          "qofTcpCharacteristics",
                          "reverseQofTcpCharacteristics",
                          "reverseMinimumTTL",
                          "reverseMaximumTTL"))

    # turn timestamps into pandas-friendly types
    df = panfix.coerce_timestamps(df)
    
    # cast flags down to reduce memory consumption
    df["initialTCPFlags"] = df["initialTCPFlags"].astype(np.uint8)
    df["reverseInitialTCPFlags"] = df["reverseInitialTCPFlags"].astype(np.uint8)
    df["unionTCPFlags"] = df["unionTCPFlags"].astype(np.uint8)
    df["reverseUnionTCPFlags"] = df["reverseUnionTCPFlags"].astype(np.uint8)
    df["lastSynTcpFlags"] = df["lastSynTcpFlags"].astype(np.uint8)
    df["reverseLastSynTcpFlags"] = df["reverseLastSynTcpFlags"].astype(np.uint8)
    
    # drop all flows without dport == 80
    df = df[df["destinationTransportPort"] == 80]
    del(df["destinationTransportPort"])
    
    # drop all flows without an initial SYN
    df = df[np.bitwise_and(df["initialTCPFlags"], S) > 0]
        
    # cast addresses to strings to match ecnspider data
    if ipv6_mode:
        df[dip_ie] = df[dip_ie].apply(lambda x: "["+str(x)+"]")
    else:
        df[dip_ie] = df[dip_ie].apply(str)

    # mark IPv6 mode
    df['ip6'] = ipv6_mode
        
    # now build the index
    df.index = pd.Index(df[dip_ie], name="ip")
    del(df[dip_ie])

    # filter on index if requested
    if spider_idx is not None:
        qof_idx = pd.Index((spider_idx & df.index).unique(), name=spider_idx.name)
        df = df.loc[qof_idx]

    # Now annotate the dataframe with ECN and establishment columns
    df["ecnAttempted"] = np.bitwise_and(df["lastSynTcpFlags"],SAEW) == SEW
    df["ecnNegotiated"] = np.bitwise_and(df["reverseLastSynTcpFlags"],SAEW) == SAE
    df["ecnCapable"] = np.bitwise_and(df["reverseQofTcpCharacteristics"],QECT0) > 0
    df["ecnECT1"] = np.bitwise_and(df["reverseQofTcpCharacteristics"],QECT1) > 0
    df["ecnCE"] = np.bitwise_and(df["reverseQofTcpCharacteristics"],QCE) > 0
    df["didEstablish"] = ((np.bitwise_and(df["lastSynTcpFlags"], S) == S) &
                          (np.bitwise_and(df["reverseLastSynTcpFlags"], SA) == SA))
    df["isUniflow"] = (df["reverseMaximumTTL"] == 0)

    return df

**Reload flow dataframes from HDF5, or...**

In [6]:
%%time
# Load from HDF5
qof4_df = {}
qof6_df = {}
with pd.get_store(DATAPATH+"/ecnqof.hdf5") as qofstore:
    for key in qofstore.keys():
        if key.startswith("/"): 
            key = key[1:]
        (vp, trial, ipv) = key.split("-")
        if ipv == "ip4":
            qof4_df[vp+"-"+trial] = qofstore[key]
        elif ipv == "ip6":
            qof6_df[vp+"-"+trial] = qofstore[key]

CPU times: user 4.12 s, sys: 700 ms, total: 4.82 s
Wall time: 4.82 s


**...from IPFIX: the latter is much slower, necessary when we have rerun QoF, otherwise skippable**

In [6]:
# (re)load IPv4 QoF dataframes
qof4_df = {}
%time qof4_df['ams-0'] = load_qof_df(filename=DATAPATH+"/results0.ipfix", spider_idx=cc_df.index)

CPU times: user 1min 41s, sys: 3.38 s, total: 1min 45s
Wall time: 1min 45s


In [7]:
# (re)load IPv6 QoF dataframes
qof6_df = {}
%time qof6_df['ams-0'] = load_qof_df(filename=DATAPATH+"/results0.ipfix", spider_idx=cc_df.index, ipv6_mode=True)

CPU times: user 3.72 s, sys: 147 ms, total: 3.87 s
Wall time: 3.87 s


In [8]:
# and store to HDF5 so we can retrieve them later
with pd.get_store(DATAPATH+"/ecnqof.hdf5") as qofstore:
    for key in qof4_df.keys():
        qofstore[key+"-ip4"] = qof4_df[key]
    for key in qof6_df.keys():
        qofstore[key+"-ip6"] = qof6_df[key]



Now split the QoF dataframes based on whether an ECN attempt was seen or not, and merge these back together as with the ECN Spider data frames.

In [9]:
def split_qof_df(df):
    # split on attempt
    qe0_df = df[~df['ecnAttempted']]
    qe1_df = df[ df['ecnAttempted']]
    
    # take only the biggest object HACK HACK HACK
    qe0_df = qe0_df.sort("reverseTransportOctetDeltaCount",ascending=False).groupby(level=0).first()  
    qe1_df = qe1_df.sort("reverseTransportOctetDeltaCount",ascending=False).groupby(level=0).first()

    # take only rows appearing in both
    qof_idx = index_intersect([qe0_df, qe1_df])
    qe0_df = qe0_df.loc[qof_idx]
    qe1_df = qe1_df.loc[qof_idx]
    
    return (qe0_df, qe1_df)
    
def flowmatrix_columns(mq_df):
    return filter(lambda x: x != "ip6", mq_df.columns)
    
def flowmatrix(qof_dfs, labels):
    mq_dfs = []
    for qof_df in qof_dfs:
        # split on ecn attempted
        (qe0_df, qe1_df) = split_qof_df(qof_df)
        
        # and merge back together
        mqof_df = qe0_df.loc[:,["ip6", "didEstablish", "ecnCapable", "ecnECT1", "ecnCE",
                                "lastSynTcpFlags", "reverseLastSynTcpFlags", 
                                "reverseUnionTCPFlags", "reverseMaximumTTL"]]
        mqof_df.columns = ["ip6", "e0", "e0ect0", "e0ect1", "e0ce", 
                           "e0f", "e0rf", "e0ruf", "ttl"]
        mqof_df["z0"] = (qe0_df["reverseTransportOctetDeltaCount"] == 0)
        mqof_df["z1"] = (qe1_df["reverseTransportOctetDeltaCount"] == 0)
        mqof_df["e1"] = qe1_df["didEstablish"]
        mqof_df["neg"] = qe1_df["ecnNegotiated"]
        # markings on ECN negotiated flows
        mqof_df["ect0"] = qe1_df["ecnCapable"]
        mqof_df["ect1"] = qe1_df["ecnECT1"]
        mqof_df["ce"] = qe1_df["ecnCE"]
        mqof_df["synect0"] = np.bitwise_and(qe1_df["reverseQofTcpCharacteristics"], QSYNECT0) == QSYNECT0
        mqof_df["synect1"] = np.bitwise_and(qe1_df["reverseQofTcpCharacteristics"], QSYNECT1) == QSYNECT1
        mqof_df["synce"] = np.bitwise_and(qe1_df["reverseQofTcpCharacteristics"], QSYNCE) == QSYNCE
        # markings on non-negotiated flows
        mqof_df["e0ect0"] =    qe0_df["ecnCapable"]
        mqof_df["e0ect1"] =    qe0_df["ecnECT1"]
        mqof_df["e0ce"] =      qe0_df["ecnCE"]
        mqof_df["e0synect0"] = np.bitwise_and(qe0_df["reverseQofTcpCharacteristics"], QSYNECT0) == QSYNECT0
        mqof_df["e0synect1"] = np.bitwise_and(qe0_df["reverseQofTcpCharacteristics"], QSYNECT1) == QSYNECT1
        mqof_df["e0synce"] =   np.bitwise_and(qe0_df["reverseQofTcpCharacteristics"], QSYNCE) == QSYNCE

        mqof_df["refl"] = np.bitwise_and(qe1_df["reverseLastSynTcpFlags"], SAEW) == SAEW
        
        # add to list of merged dataframes
        mq_dfs.append(mqof_df)
    
    # use only items in every dataframe
    idx = index_intersect(mq_dfs)
    
    # make an initial dataframe from the first
    pfx = labels[0]+"-"
    cat_df = mq_dfs[0].loc[idx]
    cat_df.columns = ["ip6"] + [pfx + col for col in flowmatrix_columns(mq_dfs[0])]
    cat_df[pfx+"ect"] = cat_df[pfx+"ect0"] | cat_df[pfx+"ect1"]
    cat_df[pfx+"negok"] = cat_df[pfx+"neg"] & cat_df[pfx+"ect"]
    
    # now add columns to the catdf
    for i in range(1, len(mq_dfs)):
        pfx = labels[i]+"-"
        for col in flowmatrix_columns(mq_dfs[i]):
            cat_df[pfx+col] = mq_dfs[i].loc[idx][col]
        cat_df[pfx+"ect"] = cat_df[pfx+"ect0"] | cat_df[pfx+"ect1"]
        cat_df[pfx+"negok"] = cat_df[pfx+"neg"] & cat_df[pfx+"ect"]

        
    # now some sums
    sum_cols = ["negok","neg","ect","refl",
                "ect0","ect1","ce",
                "synect0","synect1","synce",
                "e0ect0","e0ect1","e0ce",
                "e0synect0","e0synect1","e0synce",
                "e1","e0","z1","z0"]
    for sum_col in sum_cols:
        cat_df[sum_col+"-sum"] = cat_df.loc[:,[label+"-"+sum_col for label in labels]].sum(axis=1)

    return cat_df

In [10]:
%%time

import itertools
vps    = ["ams"]
trials = [str(x) for x in [0]]
labels = ["-".join(l) for l in itertools.product(vps,trials)]

qq4_df = flowmatrix([qof4_df[label] for label in labels],
                   labels)
qq4_df["rank"] = cc_df.loc[qq4_df.index]["rank"]
qq4_df["site"] = cc_df.loc[qq4_df.index]["site"]


qq6_df = flowmatrix([qof6_df[label] for label in labels],
                     labels)
qq6_df["rank"] = cc_df.loc[qq6_df.index]["rank"]
qq6_df["site"] = cc_df.loc[qq6_df.index]["site"]

qq_df = pd.concat((qq4_df, qq6_df))



CPU times: user 11.3 s, sys: 1.68 s, total: 13 s
Wall time: 13 s


In [11]:
cc_df.columns

Index(['rank', 'site', 'ip6', 'ams-0-e0', 'ams-0-e1'], dtype='object')

In [12]:
qq_df.columns

Index(['ip6', 'ams-0-e0', 'ams-0-e0ect0', 'ams-0-e0ect1', 'ams-0-e0ce',
       'ams-0-e0f', 'ams-0-e0rf', 'ams-0-e0ruf', 'ams-0-ttl', 'ams-0-z0',
       'ams-0-z1', 'ams-0-e1', 'ams-0-neg', 'ams-0-ect0', 'ams-0-ect1',
       'ams-0-ce', 'ams-0-synect0', 'ams-0-synect1', 'ams-0-synce',
       'ams-0-e0synect0', 'ams-0-e0synect1', 'ams-0-e0synce', 'ams-0-refl',
       'ams-0-ect', 'ams-0-negok', 'negok-sum', 'neg-sum', 'ect-sum',
       'refl-sum', 'ect0-sum', 'ect1-sum', 'ce-sum', 'synect0-sum',
       'synect1-sum', 'synce-sum', 'e0ect0-sum', 'e0ect1-sum', 'e0ce-sum',
       'e0synect0-sum', 'e0synect1-sum', 'e0synce-sum', 'e1-sum', 'e0-sum',
       'z1-sum', 'z0-sum', 'rank', 'site'],
      dtype='object')

## Dataframe CSV output

Output the `cc_df` table, which has the following columns:

- `ip`: index, IP address as string
- `rank`: Alexa website rank
- `site`: Website hostname
- `ip6`: True if address is IPv6 (for convenience in splitting v4 and v6 analyses without string munging)
- *vantage*`-`*trial*`-e0`: True if connection succeeded for *trial* at *vantage* without ECN negotiation attempt
- *vantage*`-`*trial*`-e1`: True if connection succeeded for *trial* at *vantage* with ECN negotiation attempt


In [13]:
cc_df.to_csv(TABLESDIR+"/cc_df.csv")

Output the `qq_df` table:

- `ip`: index, IP address as string
- `rank`: Alexa website rank
- `site`: Website hostname
- `ip6`: True if address is IPv6 (for convenience in splitting v4 and v6 analyses without string munging)
- *vantage*`-`*trial*`-e0`: True if connection established for *trial* at *vantage* without ECN negotiation attempt
- *vantage*`-`*trial*`-e0ect0`: True if non-ECN flow set ECT0 flag on downstream for *trial* at *vantage*
- *vantage*`-`*trial*`-e0ect1`: True if non-ECN flow set ECT1 flag on downstream for *trial* at *vantage*
- *vantage*`-`*trial*`-e0ce`: True if non-ECN flow set CE flag on downstream for *trial* at *vantage*
- *vantage*`-`*trial*`-e0f`: Flags on last SYN of upstream non-ECN flow for *trial* at *vantage*
- *vantage*`-`*trial*`-e0rf`: Flags on last SYN of downstream non-ECN flow for *trial* at *vantage*
- *vantage*`-`*trial*`-e0ruf`: Union flags on downstream non-ECN flow for *trial* at *vantage*
- *vantage*`-`*trial*`-ttl`: Max TTL received on downstream non-ECN flow for *trial* at *vantage*
- *vantage*`-`*trial*`-z0`: True if no payload received for non-ECN flow for *trial* at *vantage*
- *vantage*`-`*trial*`-z1`: True if no payload received for ECN flow for *trial* at *vantage*
- *vantage*`-`*trial*`-e1`: True if connection established for *trial* at *vantage* with ECN negotiation attempt
- *vantage*`-`*trial*`-neg`: True if ECN negotiated for *trial* at *vantage*
- *vantage*`-`*trial*`-ect0`: True if ECN flow set ECT0 flag on downstream for *trial* at *vantage*
- *vantage*`-`*trial*`-ect1`: True if ECN flow set ECT1 flag on downstream for *trial* at *vantage*
- *vantage*`-`*trial*`-ce`: True if ECN flow set CE flag on downstream for *trial* at *vantage*
- *vantage*`-`*trial*`-synect0`: True if ECN flow set ECT0 flag on downstream SYN ACK for *trial* at *vantage*
- *vantage*`-`*trial*`-synect1`: True if ECN flow set ECT1 flag on downstream SYN ACK for *trial* at *vantage*
- *vantage*`-`*trial*`-synce`: True if ECN flow set CE flag on downstream SYN ACK for *trial* at *vantage*
- *vantage*`-`*trial*`-refl`: True if ECN TCP flags were reflected (SEW -> SAEW) for *trial* at *vantage*
- *vantage*`-`*trial*`-ect`: True if ECT0 or ECT1 set on downstream ECN flow for *trial* at *vantage*
- *vantage*`-`*trial*`-negok`: True if negotiation succeeded (resulted in ECT marking) for  *trial* at *vantage*

The `-sum` columns for each of the boolean columns above simply count True values across all trials and vantage points.

In [14]:
qq_df.to_csv(TABLESDIR+"/qq_df.csv")