In [280]:
# importing pandas as pd
import pandas as pd
import math
import matplotlib.pyplot as plt
import seaborn as sns

In [281]:
# switches
separateDevices = True
allDevices = True

#### Loading csv data

In [282]:
# array contains all the flowResult files which belongs to this MUD flow
# DUP
all_files = [
             "AwairAirQuality_70886b100fc6.pcap", 
             "LiFXBulb_d073d5018308.pcap", 
             "PixStarPhotoFrame_e076d033bb85.pcap", 
             "RingDoorBell_884aea31669d.pcap",
             "TPLinkCamera_f4f26D9351f1.pcap", 
             "TribySpeaker_18B79E022044.pcap", 
             "AmazonEcho/DUP_CONCATANATED_AmazonEcho_44650d56ccd3", 
             "NestProtect/DUP_CONCATANATED_NestProtect_18b43025bee4",
             "WithingsSleepSensor/DUP_CONCATANATED_WithingsSleepSensor_0024e42028c6"
             ]

# following list contains the names of the devices corresponding to the above filenames, in order
all_devices = [
    'awairAirQuality',
    'lifxbulb',
    'pixstarphotoframe',
    'ringdoorbell',
    'tplinkcamera',
    'tribyspeaker',
    'amazonEcho',
    'nestsmokesensor',
    'withingssleepsensor'
]


In [283]:
# loading all the flowResults as pandas dataframes
filePath = 'flowResults/V4/noMetadata/'
fileType = '_flowResult.csv'

flowResult_dfs = {}  # {'device': flowResult_df}

for filename, device in zip(all_files, all_devices):
    df = pd.read_csv("../../../../data/" + filePath + filename + fileType)
    df = df.astype(str)
    flowResult_dfs[device] = df
    # print(filename, device)
# df.head()
# df.info()
flowResult_dfs['pixstarphotoframe'].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6239 entries, 0 to 6238
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   srcMac       6239 non-null   object
 1    dstMac      6239 non-null   object
 2    ethType     6239 non-null   object
 3    srcIp       6239 non-null   object
 4    dstIp       6239 non-null   object
 5    ipProto     6239 non-null   object
 6    srcPort     6239 non-null   object
 7    dstPort     6239 non-null   object
 8    protocol    6239 non-null   object
 9    allMatches  6239 non-null   object
dtypes: object(10)
memory usage: 487.5+ KB


In [284]:
# loading the MUDFlow-->FDCP map as a pandas dataframe
filePath = 'MUD/MUD_FDCP_map/'
fileName = 'MUDFlowtoFDCP.csv'

MUDFlowtoFDCP_df = pd.read_csv("../../../../results/" + filePath + fileName)
MUDFlowtoFDCP_df.head()

# MUDFlowtoFDCP_df.columns

# MUDFlowtoFDCP_df['MUDFlow']
# print(type(MUDFlowtoFDCP_df['MUDFlow'].values))
# print('ipv4-6-443' in MUDFlowtoFDCP_df['MUDFlow'].values)

# MUDFlowtoFDCP_df.loc[MUDFlowtoFDCP_df.index[MUDFlowtoFDCP_df['MUDFlow'] == 'ipv4-17-3478'].values[0]]['FDCPs'].replace('\'', '').split(' | ')

# print("'tls'" in MUDFlowtoFDCP_df['FDCPs'].values)

Unnamed: 0,MUDFlow,FDCPs
0,ipv4-6-443,'tls'
1,ipv4-17-123,'ntp'
2,ipv4-17-1900,'ssdp'
3,ipv4-17-67,'dhcp'
4,ipv4-6-80,'http'


In [285]:
# loading the device-->MUDFlow map as a pandas dataframe
filePath = 'MUD/classifyMUDFlowsByDevices/'
fileName = 'devices_MUDFlows.csv'

devices_MUDFlows_df = pd.read_csv("../../../../results/" + filePath + fileName)
devices_MUDFlows_df.head()
# devices_MUDFlows_df.columns

# devices_MUDFlows_df.loc[devices_MUDFlows_df.index[devices_MUDFlows_df['Device'] == 'awairAirQuality'].values[0]]['Flows_Used'].replace('\'', '').split(' | ')
# devices_MUDFlows_df['Device'].str.lower()

Unnamed: 0,Device,Flows_Used
0,amazonEcho,'ipv4-6-443' | 'ipv4-17-123' | 'ipv4-2-*' | 'i...
1,awairAirQuality,'ipv4-17-67' | 'ipv4-17-53' | 'ipv4-6-443' | '...
2,lifxbulb,'ipv4-17-123' | 'ipv4-17-56700' | 'ipv4-17-67'...
3,nestsmokesensor,'ipv4-6-11095' | 'ipv4-17-67' | 'ipv4-17-53' |...
4,pixstarphotoframe,'ipv4-17-67' | 'ipv4-17-137' | 'ipv4-6-80' | '...


#### Creating the data structure to store TP FP TN FN

In [286]:
device_MUDFlow_data = {}
for device in all_devices:
    # flowResult of the current device
    currFlowResult_df = flowResult_dfs[device]
    
    # MUD flows used by the current device
    print('Running for device:', device)
    currMUDFlows = devices_MUDFlows_df.loc[devices_MUDFlows_df.index[devices_MUDFlows_df['Device'] == device].values[0]]['Flows_Used'].replace('\'', '').split(' | ')

    MUDFlow_data = {}

    # iterating for each MUDFlow of the current device
    for currMUDFlow in currMUDFlows:
        
        # accuracy parameters
        totalMatches = 0
        tp = 0
        fp = 0
        tn = 0
        fn = 0
        # print(currMUDFlow)
        etherType = currMUDFlow.split('-')[0]
        ipProto = '*'
        port = '*'
        if etherType == 'ipv4' or etherType == 'ipv6':
            if etherType == 'ipv4':
                etherType = '0x0800'
            elif etherType == 'ipv6':
                etherType = '0x86DD'
            ipProto = currMUDFlow.split('-')[1]

            # if ipProto == '6' or ipProto == '17':
            port = currMUDFlow.split('-')[2]
        elif etherType == 'ethernet':
            etherType = currMUDFlow.split('-')[1]
        
        # print(etherType)
        # print(ipProto)
        # print(port)
        # print()
        # filtering the instances of the currMUDFlow from the currFlowResult_df
        #############################################################################################
        filtered_df = currFlowResult_df.loc[( (currFlowResult_df[' srcPort'] == port) | (currFlowResult_df[' dstPort'] == port) ) & ( currFlowResult_df[' ipProto'] == ipProto ) & ( currFlowResult_df[' ethType'] == etherType.lower() )]
        #############################################################################################
        
        # print(len(filtered_df))
        # print('--------------------------')

        # running for each instance in the filtered_df
        for index, row in filtered_df.iterrows():

            # getting the correct matching FDCP is available
            correctMatches = 'none'  # matching FDCP not available
            if currMUDFlow in MUDFlowtoFDCP_df['MUDFlow'].values:
                correctMatches = MUDFlowtoFDCP_df.loc[MUDFlowtoFDCP_df.index[MUDFlowtoFDCP_df['MUDFlow'] == currMUDFlow].values[0]]['FDCPs'].replace('\'', '').split(' | ')
                
            # list of all the matches
            if row[' protocol'] == 'none':
                allMatches = []
                totalMatches += 1
                if correctMatches == 'none':
                    tn += 1
                else:
                    fn += 1
            else:
                allMatches = row[' allMatches'].split('|')
                del allMatches[-1]
                # print(allMatches)
                totalMatches += len(allMatches)

                # counting the false positives
                if correctMatches == 'none':
                    fp += len(allMatches)
                # counting the true positives
                else:
                    for correctMatch in correctMatches:
                        if correctMatch in allMatches:
                            tp += 1
                            # removing the correct match from allMatches
                            allMatches.remove(correctMatch)
                            # print(correctMatch in allMatches)
                    # all matches which are not correct matches are false positive
                    fp += len(allMatches)
        # storing the accuracy parameters for this MUDFlow
        MUDFlow_data[currMUDFlow] = [tp,fp,tn,fn,totalMatches]
        if tp + fp + tn + fn != totalMatches:
            print('wrong')
    device_MUDFlow_data[device] = MUDFlow_data
# currFlowResult_df.head()

Running for device: awairAirQuality
Running for device: lifxbulb
Running for device: pixstarphotoframe
Running for device: ringdoorbell
Running for device: tplinkcamera
Running for device: tribyspeaker
Running for device: amazonEcho
Running for device: nestsmokesensor
Running for device: withingssleepsensor


In [287]:
device_MUDFlow_data

{'awairAirQuality': {'ipv4-17-67': [2, 4, 0, 0, 6],
  'ipv4-17-53': [1118, 1118, 0, 0, 2236],
  'ipv4-6-443': [647, 0, 0, 0, 647],
  'ipv4-6-8883': [0, 191, 2, 0, 193],
  'ethernet-0x888e': [0, 0, 0, 0, 0],
  'ethernet-0x0006': [0, 0, 0, 0, 0]},
 'lifxbulb': {'ipv4-17-123': [1117, 0, 0, 10, 1127],
  'ipv4-17-56700': [0, 6, 162, 0, 168],
  'ipv4-17-67': [2, 4, 0, 0, 6],
  'ipv4-17-53': [6292, 6292, 0, 0, 12584],
  'ipv4-6-56700': [0, 147, 10, 0, 157],
  'ipv6-58-*': [0, 0, 0, 3, 3],
  'ethernet-0x888e': [0, 0, 0, 0, 0],
  'ethernet-0x0006': [0, 0, 0, 0, 0]},
 'pixstarphotoframe': {'ipv4-17-67': [2, 4, 0, 0, 6],
  'ipv4-17-137': [14, 4, 0, 0, 18],
  'ipv4-6-80': [23, 0, 0, 0, 23],
  'ipv4-6-443': [1049, 2, 0, 0, 1051],
  'ipv4-17-138': [0, 8, 0, 0, 8],
  'ipv4-17-53': [4842, 18, 0, 0, 4860],
  'ipv6-0-*': [0, 0, 2, 0, 2],
  'ipv6-58-*': [0, 0, 0, 3, 3],
  'ethernet-0x888e': [0, 0, 0, 0, 0],
  'ethernet-0x0006': [0, 0, 0, 0, 0]},
 'ringdoorbell': {'ipv4-17-67': [2, 4, 0, 0, 6],
  'ipv4-17

#### Writing the data to a CSV file

In [291]:
csvPath = '../../../../results/flowResults/V4/accuracy/'
fileName = 'noMetadata'
csvHeader = 'Device,MUDFlow,TP,FP,TN,FN,Total,Correct_FDCP\n'
with open(csvPath + fileName + '.csv', 'w') as f:
        f.write(csvHeader)
        for device in device_MUDFlow_data.keys():
            for mudflow in device_MUDFlow_data[device].keys():
                correctMatches = 'none'  # matching FDCP not available
                if mudflow in MUDFlowtoFDCP_df['MUDFlow'].values:
                    correctMatches = MUDFlowtoFDCP_df.loc[MUDFlowtoFDCP_df.index[MUDFlowtoFDCP_df['MUDFlow'] == mudflow].values[0]]['FDCPs'].replace('\'', '').split(' | ')
               
                f.write("%s,%s,%s,%s\n"%(device, mudflow, str(device_MUDFlow_data[device][mudflow]).replace('[', '').replace(']', '').replace(' ', ''), str(correctMatches).replace('[', '').replace(']', '').replace(',', '|').replace(' ', '').replace('\'', '')))