In [None]:
from IPython.display import clear_output
from collections import Counter
from collections import defaultdict
from dtrace import DTraceConsumerThread
from graphviz import Digraph
import copy
import dtrace
import functools
import json
import math
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import pyflamegraph
import statistics as stats
import subprocess

In [None]:
totalsize=1<<24
buffersize=totalsize
(totalsize,buffersize)

In [None]:
# configure this once per boot
!ipfw add 1 pipe 1 tcp from any 10141 to any out via lo0
!ipfw add 2 pipe 2 tcp from any to any 10141 out via lo0

In [None]:
!ifconfig lo0 mtu 1500

In [None]:
def dtrace_synchronous(script, walker, out, cmdline):
    dtrace_thread = DTraceConsumerThread(script,
                                     walk_func=walker,
                                     out_func=out,
                                     chew_func=lambda v: None,
                                     chewrec_func=lambda v: None,
                                     sleep=1)
    
    dtrace_thread.start()
    print("## Starting ", cmdline)

    output_dtrace = subprocess.run(cmdline.split(" "))
        
    dtrace_thread.stop()
    dtrace_thread.join()

    if output_dtrace.returncode == 0:
        print("## Finished ", cmdline)
    elif output_dtrace.returncode == 64: # EX_USAGE
        print("## Invalid command", cmdline)
    else:
        print("## Failed with the exit code {}".format(output_dtrace.returncode))
        
    del dtrace_thread

flags_to_str_map={
    0:'FIN',
    1:'SYN',
    2:'RST',
    3:'PUSH',
    4:'ACK',
    5:'RST'
}
def flags_to_str(flags):
    res=[]
    for i in range(6):
        if flags>>i & 1:
            res.append(flags_to_str_map[i])
    return res

In [None]:
script="""
#pragma D option cleanrate=800hz

BEGIN {
    self->first_segment_client=0;
    self->first_segment_server=0;
    start_time=timestamp;
}
fbt::tcp_state_change:entry
/args[0]->t_inpcb->inp_inc.inc_ie.ie_fport== htons(10141)/
{
    printf("client %s %s %s",
            probefunc,
            tcp_state_string[args[0]->t_state],
            tcp_state_string[args[1]]);
    @stack_traces["client",timestamp,stack()]=count();
}
fbt::tcp_state_change:entry
/args[0]->t_inpcb->inp_inc.inc_ie.ie_lport== htons(10141)/
{
    printf("server %s %s %s",
            probefunc,
            tcp_state_string[args[0]->t_state],
            tcp_state_string[args[1]]);
    @stack_traces["server",timestamp,stack()]=count();
}

fbt::tcp_do_segment:entry
/(args[1]->th_sport == htons(10141))&(self->first_segment_client==0)/
{
    self->first_segment_client=1;
    self->first_byte_client=args[1]->th_seq;
    printf("client %s %s %u %u %u %d",
            probefunc,
            tcp_state_string[args[3]->t_state],
            (unsigned int)args[1]->th_flags,
            (unsigned int)args[1]->th_seq,
            (unsigned int)args[1]->th_ack,
            timestamp-start_time
        );
}
fbt::tcp_do_segment:entry
/(args[1]->th_sport == htons(10141))&(self->first_segment_client==1)&((args[1]->th_seq - self->first_byte_client < 8000)|(args[1]->th_seq - self->first_byte_client > 16777216 - 8000))/
{
    printf("client %s %s %u %u %u %d",
            probefunc,
            tcp_state_string[args[3]->t_state],
            (unsigned int)args[1]->th_flags,
            (unsigned int)args[1]->th_seq,
            (unsigned int)args[1]->th_ack,
            timestamp-start_time
            );

}

fbt::tcp_do_segment:entry
/(args[1]->th_dport == htons(10141))&(self->first_segment_server==0)/
{
    self->first_segment_server=1;
    self->first_byte_server=args[1]->th_ack;
    printf("server %s %s %u %u %u %d",
            probefunc,
            tcp_state_string[args[3]->t_state],
            (unsigned int)args[1]->th_flags,
            (unsigned int)args[1]->th_seq,
            (unsigned int)args[1]->th_ack,
            timestamp-start_time
            );

}
fbt::tcp_do_segment:entry
/(args[1]->th_dport == htons(10141))&(self->first_segment_server==1)&((args[1]->th_ack - self->first_byte_server < 8000)|(args[1]->th_ack - self->first_byte_server > 16777216 - 8000))/
{
    printf("server %s %s %u %u %u %d",
            probefunc,
            tcp_state_string[args[3]->t_state],
            (unsigned int)args[1]->th_flags,
            (unsigned int)args[1]->th_seq,
            (unsigned int)args[1]->th_ack,
            timestamp-start_time
            );

}
fbt::soclose:entry
{
    printf("%s %s %d",
        probefunc,
        probefunc,
        timestamp-start_time
        );
}
/*
*/
"""

In [None]:
def get_data():
    workload=f'ipc/ipc-benchmark -i tcp -t {totalsize} -v -j -g -b {buffersize} -n 1 -q 2thread'
    data={
        'server':{
            'tcp_state_change':[],
            'stack':[],
            'tcp_do_segment':[],
        },
        'client':{
            'tcp_state_change':[],
            'stack':[],
            'tcp_do_segment':[],
        },
        'soclose':{
            'soclose':[]
        }
    }
    threads=['server','client']

    def tcp_out(value):
        # print(value.decode())
        value=value.decode().split()
        data[value[0]][value[1]].append(value[2:])

    def tcp_walker(action,identifier,keys,value):
        # pass
        data[keys[0].decode()]['stack'].append(keys[1:])

    dtrace_synchronous(script,tcp_walker,tcp_out,workload)

    for thread in threads:
        data[thread]['stack'].sort()
        # tmp=data[thread]['tcp_do_segment']
        # data[thread]['tcp_do_segment']=tmp[:5]+tmp[-5:]
        for segment in data[thread]['tcp_do_segment']:
            segment[1]=flags_to_str(int(segment[1]))
    
    return data

In [None]:
all_data={}
for delay in range(0,41,5):
    !ipfw pipe config 1 delay $delay
    !ipfw pipe config 2 delay $delay
    all_data[delay]=get_data()

In [None]:
all_data

In [None]:
dot = Digraph()
dot.node('CLOSED')
dot.node('SYN SENT')
dot.node('LISTEN')
dot.node('SYN RCVD')
dot.node('ESTAB')
dot.node('FINWAIT-1')
dot.node('FINWAIT-2')
dot.node('CLOSE WAIT')
dot.node('LAST ACK')
dot.node('TIME WAIT')

# server transitions
dot.edge('CLOSED'    ,'LISTEN'    ,label='server\n\n'+'solisten()')
dot.edge('CLOSED'    ,'SYN RCVD'  ,label='server\n\n'+'syncache_expand()')
dot.edge('SYN RCVD'  ,'ESTAB'     ,label='server\n\n'+'rcv: ACK')
dot.edge('LISTEN'    ,'CLOSED'    ,label='server\n\n'+'soclose()' )
dot.edge('ESTAB'     ,'CLOSE WAIT',label='server\n\n'+'rcv: FIN,ACK')
dot.edge('CLOSE WAIT','LAST ACK'  ,label='server\n\n'+'soclose()')
dot.edge('LAST ACK'  ,'CLOSED'    ,label='server\n\n'+'rcv: ACK')

# client transitions
dot.edge('CLOSED'    ,'SYN SENT'  ,label='client\n\n'+'soconnectat()')
dot.edge('SYN SENT'  ,'ESTAB'     ,label='client\n\n'+'rcv: SYN,ACK')
dot.edge('ESTAB'     ,'FINWAIT-1' ,label='client\n\n'+'soclose()')
dot.edge('FINWAIT-1' ,'FINWAIT-2' ,label='client\n\n'+'rcv: ACK')
dot.edge('FINWAIT-2' ,'TIME WAIT' ,label='client\n\n'+'rcv: FIN,ACK')

print('Figure 1: state transition diagram with no delay')
dot

In [None]:
dot = Digraph()
dot.node('CLOSED')
dot.node('SYN SENT')
dot.node('LISTEN')
dot.node('SYN RCVD')
dot.node('ESTAB')
dot.node('FINWAIT-1')
dot.node('TIME WAIT')
dot.node('CLOSING')

# server transitions
dot.edge('CLOSED'    ,'LISTEN'    ,label='server\n\n'+'solisten()')
dot.edge('CLOSED'    ,'SYN RCVD'  ,label='server\n\n'+'syncache_expand()')
dot.edge('SYN RCVD'  ,'ESTAB'     ,label='server\n\n'+'rcv: ACK')
dot.edge('LISTEN'    ,'CLOSED'    ,label='server\n\n'+'soclose()' )

# dot.edge('ESTAB'     ,'CLOSE WAIT',label='server\n\n'+'rcv: FIN,ACK')
# dot.edge('CLOSE WAIT','LAST ACK'  ,label='server\n\n'+'soclose()')
# dot.edge('LAST ACK'  ,'CLOSED'    ,label='server\n\n'+'rcv: ACK')

# client transitions
dot.edge('CLOSED'    ,'SYN SENT'  ,label='client\n\n'+'soconnectat()')
dot.edge('SYN SENT'  ,'ESTAB'     ,label='client\n\n'+'rcv: SYN,ACK')
# dot.edge('ESTAB'     ,'FINWAIT-1' ,label='client\n\n'+'soclose()')
# dot.edge('FINWAIT-1' ,'FINWAIT-2' ,label='client\n\n'+'rcv: ACK')
# dot.edge('FINWAIT-2' ,'TIME WAIT' ,label='client\n\n'+'rcv: FIN,ACK')

# shared transitions
dot.edge('ESTAB'     ,'FINWAIT-1' ,label='both\n\n'+'soclose()')
dot.edge('FINWAIT-1' ,'CLOSING'   ,label='both\n\n'+'rcv: FIN,ACK')
dot.edge('CLOSING'   ,'TIME WAIT' ,label='both\n\n'+'rcv: FIN,ACK')

print('Figure 2: state transition diagram with delay')
dot

### Experimental questions: Latency and the TCP state machine
#### With no synthetic latency introduced, plot an effective (i.e., as measured) TCP state-transition diagram for the two directions of a single TCP connection: states will be nodes, and transitions will be edges. Where state transitions diverge between the two directions, be sure to label edges indicating ‘client’ vs. ‘server’.
#### Extend the diagram to indicate, for each edge, the TCP header flags of the received packet triggering the transition, or the local system call (or other event – e.g., timer) that triggers the transition.
#### Compare the graphs you have drawn with the TCP state diagram in RFC 793, describing and explaining any differences.
It is easier to follow the state diagram for the client and server separately. For this section, refer to the state diagram in Figure 1.

Following the state transitions taken by the client, most of the transitions match what is specified in RFC 793. 

The transition from FINWAIT-2 to TIME WAIT should be on rcv FIN according to RFC 793, but the header flags turn out to be FIN, ACK experimentally.

Lastly, we do not observe the transition from TIME WAIT to CLOSED. This may be explained by the fact that on timeout, the TCB is deleted, so there is no point changing state and tcp_state_change is not called.

Following the state transitions taken by the server, the first difference is that in RFC 793, a TCP state should progress from CLOSED to LISTEN on passive OPEN, and LISTEN to SYN RCVD on rcv SYN. However, experimentally we see that the server has 2 separate states. 

- The first state goes from CLOSED to LISTEN on solisten(), and then from LISTEN to CLOSED on soclose(). 
    - This represents the server socket's state, which continuously listens on the port for any incoming connections, and creates a new socket for each connection.
    - in the ipc.c code, the server socket is closed after the client connects.
- The second state goes from CLOSED to SYN RCVD on syncache_expand().
    - This represents the state of the actual socket that the server is using to send information to the client. syncache_expand(), as the name suggests, is called when a TCP packet with SYN in its header is received.
    - This corresponds to the transition from LISTEN to SYN RCVD on rcv SYN in RFC 793.
    
The difference between specification and experiment seems to be justified by the fact that connections often occur between a server and client, and it is more efficient to use this interface. This is the Berkeley socket interface, which is at odds with the TCP specification.

Continuing on, the last difference is that in RFC 793, ESTAB transitions to CLOSE WAIT on rcv FIN. However, the TCP flags that are observed are FIN, ACK.
    
#### Using DUMMYNET, explore the effects of simulated latency at 5ms intervals between 0ms and 40ms. What observations can we make about state-machine transitions as latency increases, and why do any diferences arise?

From my testing, the main difference occurs when the simulated latency increases from 0ms to 5ms or more. The new state transition diagram is shown in Figure 2.

The main differences are the transitions after the ESTAB state. For both client and server, the transitions now go from ESTAB to FINWAIT-1 on soclose(), from FINWAIT-1 to CLOSING on rcv FIN,ACK, and finally from CLOSING to TIME WAIT on rcv FIN,ACK.

This difference can be explained as follows:

In the case when there is no simulated latency, the client calls soclose() first (this can be verified by reading ipc.c - the code frees the read file descriptor first). As there is no latency, a FIN is sent to and received by the server socket, before the server calls soclose(). This causes the server to transition from ESTAB to CLOSE WAIT on rcv FIN, and the rest of the transitions follow.

In the case when there is simulated latency, the client still calls soclose() and sends a FIN before the server calls soclose(). However, due to the latency, the packet with the FIN header arrives after the soclose() call for the server has executed. Thus, the server will be in FINWAIT-1 when it receives FIN (which is the same state as the client), and the rest of the transitions follow.

To justify this, we may print the walltimestamp in the calls to tcp_do_segment() and soclose(). The DTrace script is below this markdown block.

#### Describe any apparent simulation or probe effects that may be affecting the fidelity of the state machines you have measured and plotted.